From 3e7832e1a77c1c58caede11c89b6ad033784a4e3 Mon Sep 17 00:00:00 2001
From: water111 <48171810+water111@users.noreply.github.com>
Date: Sat, 18 Jun 2022 15:07:40 -0400
Subject: [PATCH 01/17] [custom levels] add 3rdparty library for gltf files
 (#1481)

---
 CMakeLists.txt                          |    2 +
 game/graphics/display.h                 |   18 +-
 third-party/tiny_gltf/CMakeLists.txt    |    9 +
 third-party/tiny_gltf/stb_image.h       | 7530 ++++++++++++++++++++++
 third-party/tiny_gltf/stb_image_write.h | 1621 +++++
 third-party/tiny_gltf/tiny_gltf.cpp     |    4 +
 third-party/tiny_gltf/tiny_gltf.h       | 7748 +++++++++++++++++++++++
 tools/CMakeLists.txt                    |    1 +
 tools/build_level/CMakeLists.txt        |    4 +
 tools/build_level/main.cpp              |    3 +
 10 files changed, 16931 insertions(+), 9 deletions(-)
 create mode 100644 third-party/tiny_gltf/CMakeLists.txt
 create mode 100644 third-party/tiny_gltf/stb_image.h
 create mode 100644 third-party/tiny_gltf/stb_image_write.h
 create mode 100644 third-party/tiny_gltf/tiny_gltf.cpp
 create mode 100644 third-party/tiny_gltf/tiny_gltf.h
 create mode 100644 tools/build_level/CMakeLists.txt
 create mode 100644 tools/build_level/main.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e8c2e3c65e..5ac40a0be8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -162,6 +162,8 @@ add_subdirectory(third-party/lzokay EXCLUDE_FROM_ALL)
 
 # build format library
 add_subdirectory(third-party/fmt EXCLUDE_FROM_ALL)
+add_subdirectory(third-party/tiny_gltf EXCLUDE_FROM_ALL)
+
 # discord rich presence
 include_directories(third-party/discord-rpc/include)
 add_subdirectory(third-party/discord-rpc EXCLUDE_FROM_ALL)
diff --git a/game/graphics/display.h b/game/graphics/display.h
index 6c66795892..f0943b8573 100644
--- a/game/graphics/display.h
+++ b/game/graphics/display.h
@@ -40,15 +40,15 @@ class GfxDisplay {
  public:
   virtual ~GfxDisplay() {}
 
-  virtual void* get_window() const __NYI_DEF;
-  virtual void set_size(int w, int h) __NYI_DEF;
-  virtual void update_fullscreen(GfxDisplayMode mode, int screen) __NYI_DEF;
-  virtual void get_scale(float* x, float* y) __NYI_DEF;
-  virtual void get_screen_size(int vmode_idx, s32* w, s32* h, s32* c) __NYI_DEF;
-  virtual void get_position(int* x, int* y) __NYI_DEF;
-  virtual void get_size(int* w, int* h) __NYI_DEF;
-  virtual GfxDisplayMode get_fullscreen() __NYI_DEF;
-  virtual void render() __NYI_DEF;
+  virtual void* get_window() const = 0;
+  virtual void set_size(int w, int h) = 0;
+  virtual void update_fullscreen(GfxDisplayMode mode, int screen) = 0;
+  virtual void get_scale(float* x, float* y) = 0;
+  virtual void get_screen_size(int vmode_idx, s32* w, s32* h, s32* c) = 0;
+  virtual void get_position(int* x, int* y) = 0;
+  virtual void get_size(int* w, int* h) = 0;
+  virtual GfxDisplayMode get_fullscreen() = 0;
+  virtual void render() = 0;
   bool is_active() const { return get_window() != nullptr; }
   void set_title(const char* title);
   const char* title() const { return m_title; }
diff --git a/third-party/tiny_gltf/CMakeLists.txt b/third-party/tiny_gltf/CMakeLists.txt
new file mode 100644
index 0000000000..eb0f926d45
--- /dev/null
+++ b/third-party/tiny_gltf/CMakeLists.txt
@@ -0,0 +1,9 @@
+if (UNIX)
+    set(CMAKE_CXX_FLAGS "-O3")
+else ()
+    set(CMAKE_CXX_FLAGS "/EHsc")
+endif (UNIX)
+
+include_directories(../)
+add_library(tiny_gltf tiny_gltf.cpp)
+
diff --git a/third-party/tiny_gltf/stb_image.h b/third-party/tiny_gltf/stb_image.h
new file mode 100644
index 0000000000..db2b1bcdf4
--- /dev/null
+++ b/third-party/tiny_gltf/stb_image.h
@@ -0,0 +1,7530 @@
+/* stb_image - v2.21 - public domain image loader - http://nothings.org/stb
+                                  no warranty implied; use at your own risk
+
+   Do this:
+      #define STB_IMAGE_IMPLEMENTATION
+   before you include this file in *one* C or C++ file to create the implementation.
+
+   // i.e. it should look like this:
+   #include ...
+   #include ...
+   #include ...
+   #define STB_IMAGE_IMPLEMENTATION
+   #include "stb_image.h"
+
+   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
+   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
+
+
+   QUICK NOTES:
+      Primarily of interest to game developers and other people who can
+          avoid problematic images and only need the trivial interface
+
+      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
+      PNG 1/2/4/8/16-bit-per-channel
+
+      TGA (not sure what subset, if a subset)
+      BMP non-1bpp, non-RLE
+      PSD (composited view only, no extra channels, 8/16 bit-per-channel)
+
+      GIF (*comp always reports as 4-channel)
+      HDR (radiance rgbE format)
+      PIC (Softimage PIC)
+      PNM (PPM and PGM binary only)
+
+      Animated GIF still needs a proper API, but here's one way to do it:
+          http://gist.github.com/urraka/685d9a6340b26b830d49
+
+      - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
+      - decode from arbitrary I/O callbacks
+      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
+
+   Full documentation under "DOCUMENTATION" below.
+
+
+LICENSE
+
+  See end of file for license information.
+
+RECENT REVISION HISTORY:
+
+      2.21  (2019-02-25) fix typo in comment
+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+      2.19  (2018-02-11) fix warning
+      2.18  (2018-01-30) fix warnings
+      2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
+      2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
+      2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
+      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
+                         RGB-format JPEG; remove white matting in PSD;
+                         allocate large structures on the stack;
+                         correct channel count for PNG & BMP
+      2.10  (2016-01-22) avoid warning introduced in 2.09
+      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
+
+   See end of file for full revision history.
+
+
+ ============================    Contributors    =========================
+
+ Image formats                          Extensions, features
+    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
+    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
+    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
+    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
+    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
+    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
+    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
+    github:urraka (animated gif)           Junggon Kim (PNM comments)
+    Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
+                                           socks-the-fox (16-bit PNG)
+                                           Jeremy Sawicki (handle all ImageNet JPGs)
+ Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
+    Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
+    Arseny Kapoulkine
+    John-Mark Allen
+    Carmelo J Fdez-Aguera
+
+ Bug & warning fixes
+    Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
+    Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
+    Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
+    Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
+    the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh
+    Janez Zemva             John Bartholomew   Michal Cichon      github:romigrou
+    Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk
+    Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar
+    Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex
+    Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210
+    Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw
+    Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus
+    Julian Raschke          Gregory Mullen     Baldur Karlsson    github:poppolopoppo
+    Christian Floisand      Kevin Schmidt      JR Smith           github:darealshinji
+    Blazej Dariusz Roszkowski                                     github:Michaelangel007
+*/
+
+#ifndef STBI_INCLUDE_STB_IMAGE_H
+#define STBI_INCLUDE_STB_IMAGE_H
+
+// DOCUMENTATION
+//
+// Limitations:
+//    - no 12-bit-per-channel JPEG
+//    - no JPEGs with arithmetic coding
+//    - GIF always returns *comp=4
+//
+// Basic usage (see HDR discussion below for HDR usage):
+//    int x,y,n;
+//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
+//    // ... process data if not NULL ...
+//    // ... x = width, y = height, n = # 8-bit components per pixel ...
+//    // ... replace '0' with '1'..'4' to force that many components per pixel
+//    // ... but 'n' will always be the number that it would have been if you said 0
+//    stbi_image_free(data)
+//
+// Standard parameters:
+//    int *x                 -- outputs image width in pixels
+//    int *y                 -- outputs image height in pixels
+//    int *channels_in_file  -- outputs # of image components in image file
+//    int desired_channels   -- if non-zero, # of image components requested in result
+//
+// The return value from an image loader is an 'unsigned char *' which points
+// to the pixel data, or NULL on an allocation failure or if the image is
+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
+// with each pixel consisting of N interleaved 8-bit components; the first
+// pixel pointed to is top-left-most in the image. There is no padding between
+// image scanlines or between pixels, regardless of format. The number of
+// components N is 'desired_channels' if desired_channels is non-zero, or
+// *channels_in_file otherwise. If desired_channels is non-zero,
+// *channels_in_file has the number of components that _would_ have been
+// output otherwise. E.g. if you set desired_channels to 4, you will always
+// get RGBA output, but you can check *channels_in_file to see if it's trivially
+// opaque because e.g. there were only 3 channels in the source image.
+//
+// An output image with N components has the following components interleaved
+// in this order in each pixel:
+//
+//     N=#comp     components
+//       1           grey
+//       2           grey, alpha
+//       3           red, green, blue
+//       4           red, green, blue, alpha
+//
+// If image loading fails for any reason, the return value will be NULL,
+// and *x, *y, *channels_in_file will be unchanged. The function
+// stbi_failure_reason() can be queried for an extremely brief, end-user
+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+// more user-friendly ones.
+//
+// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
+//
+// ===========================================================================
+//
+// UNICODE:
+//
+//   If compiling for Windows and you wish to use Unicode filenames, compile
+//   with
+//       #define STBI_WINDOWS_UTF8
+//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
+//   Windows wchar_t filenames to utf8.
+//
+// ===========================================================================
+//
+// Philosophy
+//
+// stb libraries are designed with the following priorities:
+//
+//    1. easy to use
+//    2. easy to maintain
+//    3. good performance
+//
+// Sometimes I let "good performance" creep up in priority over "easy to maintain",
+// and for best performance I may provide less-easy-to-use APIs that give higher
+// performance, in addition to the easy-to-use ones. Nevertheless, it's important
+// to keep in mind that from the standpoint of you, a client of this library,
+// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
+//
+// Some secondary priorities arise directly from the first two, some of which
+// provide more explicit reasons why performance can't be emphasized.
+//
+//    - Portable ("ease of use")
+//    - Small source code footprint ("easy to maintain")
+//    - No dependencies ("ease of use")
+//
+// ===========================================================================
+//
+// I/O callbacks
+//
+// I/O callbacks allow you to read from arbitrary sources, like packaged
+// files or some other source. Data read from callbacks are processed
+// through a small internal buffer (currently 128 bytes) to try to reduce
+// overhead.
+//
+// The three functions you must define are "read" (reads some bytes of data),
+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
+//
+// ===========================================================================
+//
+// SIMD support
+//
+// The JPEG decoder will try to automatically use SIMD kernels on x86 when
+// supported by the compiler. For ARM Neon support, you must explicitly
+// request it.
+//
+// (The old do-it-yourself SIMD API is no longer supported in the current
+// code.)
+//
+// On x86, SSE2 will automatically be used when available based on a run-time
+// test; if not, the generic C versions are used as a fall-back. On ARM targets,
+// the typical path is to have separate builds for NEON and non-NEON devices
+// (at least this is true for iOS and Android). Therefore, the NEON support is
+// toggled by a build flag: define STBI_NEON to get NEON loops.
+//
+// If for some reason you do not want to use any of SIMD code, or if
+// you have issues compiling it, you can disable it entirely by
+// defining STBI_NO_SIMD.
+//
+// ===========================================================================
+//
+// HDR image support   (disable by defining STBI_NO_HDR)
+//
+// stb_image supports loading HDR images in general, and currently the Radiance
+// .HDR file format specifically. You can still load any file through the existing
+// interface; if you attempt to load an HDR file, it will be automatically remapped
+// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+// both of these constants can be reconfigured through this interface:
+//
+//     stbi_hdr_to_ldr_gamma(2.2f);
+//     stbi_hdr_to_ldr_scale(1.0f);
+//
+// (note, do not use _inverse_ constants; stbi_image will invert them
+// appropriately).
+//
+// Additionally, there is a new, parallel interface for loading files as
+// (linear) floats to preserve the full dynamic range:
+//
+//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
+//
+// If you load LDR images through this interface, those images will
+// be promoted to floating point values, run through the inverse of
+// constants corresponding to the above:
+//
+//     stbi_ldr_to_hdr_scale(1.0f);
+//     stbi_ldr_to_hdr_gamma(2.2f);
+//
+// Finally, given a filename (or an open file or memory block--see header
+// file for details) containing image data, you can query for the "most
+// appropriate" interface to use (that is, whether the image is HDR or
+// not), using:
+//
+//     stbi_is_hdr(char *filename);
+//
+// ===========================================================================
+//
+// iPhone PNG support:
+//
+// By default we convert iphone-formatted PNGs back to RGB, even though
+// they are internally encoded differently. You can disable this conversion
+// by calling stbi_convert_iphone_png_to_rgb(0), in which case
+// you will always just get the native iphone "format" through (which
+// is BGR stored in RGB).
+//
+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
+// pixel to remove any premultiplied alpha *only* if the image file explicitly
+// says there's premultiplied data (currently only happens in iPhone images,
+// and only if iPhone convert-to-rgb processing is on).
+//
+// ===========================================================================
+//
+// ADDITIONAL CONFIGURATION
+//
+//  - You can suppress implementation of any of the decoders to reduce
+//    your code footprint by #defining one or more of the following
+//    symbols before creating the implementation.
+//
+//        STBI_NO_JPEG
+//        STBI_NO_PNG
+//        STBI_NO_BMP
+//        STBI_NO_PSD
+//        STBI_NO_TGA
+//        STBI_NO_GIF
+//        STBI_NO_HDR
+//        STBI_NO_PIC
+//        STBI_NO_PNM   (.ppm and .pgm)
+//
+//  - You can request *only* certain decoders and suppress all other ones
+//    (this will be more forward-compatible, as addition of new decoders
+//    doesn't require you to disable them explicitly):
+//
+//        STBI_ONLY_JPEG
+//        STBI_ONLY_PNG
+//        STBI_ONLY_BMP
+//        STBI_ONLY_PSD
+//        STBI_ONLY_TGA
+//        STBI_ONLY_GIF
+//        STBI_ONLY_HDR
+//        STBI_ONLY_PIC
+//        STBI_ONLY_PNM   (.ppm and .pgm)
+//
+//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+//
+
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif // STBI_NO_STDIO
+
+#define STBI_VERSION 1
+
+enum
+{
+  STBI_default = 0, // only used for desired_channels
+
+  STBI_grey       = 1,
+  STBI_grey_alpha = 2,
+  STBI_rgb        = 3,
+  STBI_rgb_alpha  = 4
+};
+
+#include <stdlib.h>
+typedef unsigned char stbi_uc;
+typedef unsigned short stbi_us;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef STB_IMAGE_STATIC
+#define STBIDEF static
+#else
+#define STBIDEF extern
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// PRIMARY API - works on images of any type
+//
+
+//
+// load image by filename, open file, or memory buffer
+//
+
+typedef struct
+{
+  int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
+  void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
+  int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
+} stbi_io_callbacks;
+
+////////////////////////////////////
+//
+// 8-bits-per-channel interface
+//
+
+STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+// for stbi_load_from_file, file pointer is left pointing immediately after image
+#endif
+
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
+#endif
+
+#ifdef STBI_WINDOWS_UTF8
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
+#endif
+
+////////////////////////////////////
+//
+// 16-bits-per-channel interface
+//
+
+STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+#endif
+
+////////////////////////////////////
+//
+// float-per-channel interface
+//
+#ifndef STBI_NO_LINEAR
+STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
+STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
+#endif
+#endif
+
+#ifndef STBI_NO_HDR
+STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
+STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_LINEAR
+STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
+STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
+#endif // STBI_NO_LINEAR
+
+// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
+STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
+STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_is_hdr          (char const *filename);
+STBIDEF int      stbi_is_hdr_from_file(FILE *f);
+#endif // STBI_NO_STDIO
+
+
+// get a VERY brief reason for failure
+// NOT THREADSAFE
+STBIDEF const char *stbi_failure_reason  (void);
+
+// free the loaded image -- this is just free()
+STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
+
+// get image dimensions & components without fully decoding
+STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
+STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
+STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
+STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
+
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
+STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
+STBIDEF int      stbi_is_16_bit          (char const *filename);
+STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
+#endif
+
+
+
+// for image formats that explicitly notate that they have premultiplied alpha,
+// we just return the colors as stored in the file. set this flag to force
+// unpremultiplication. results are undefined if the unpremultiply overflow.
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
+
+// indicate whether we should process iphone images back to canonical format,
+// or just pass them through "as-is"
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
+
+// flip the image vertically, so the first pixel in the output array is the bottom left
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
+
+// ZLIB client - used by PNG, available for other purposes
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
+STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
+STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+
+STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
+STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+//
+//
+////   end header file   /////////////////////////////////////////////////////
+#endif // STBI_INCLUDE_STB_IMAGE_H
+
+#ifdef STB_IMAGE_IMPLEMENTATION
+
+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
+  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
+  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
+  || defined(STBI_ONLY_ZLIB)
+   #ifndef STBI_ONLY_JPEG
+   #define STBI_NO_JPEG
+   #endif
+   #ifndef STBI_ONLY_PNG
+   #define STBI_NO_PNG
+   #endif
+   #ifndef STBI_ONLY_BMP
+   #define STBI_NO_BMP
+   #endif
+   #ifndef STBI_ONLY_PSD
+   #define STBI_NO_PSD
+   #endif
+   #ifndef STBI_ONLY_TGA
+   #define STBI_NO_TGA
+   #endif
+   #ifndef STBI_ONLY_GIF
+   #define STBI_NO_GIF
+   #endif
+   #ifndef STBI_ONLY_HDR
+   #define STBI_NO_HDR
+   #endif
+   #ifndef STBI_ONLY_PIC
+   #define STBI_NO_PIC
+   #endif
+   #ifndef STBI_ONLY_PNM
+   #define STBI_NO_PNM
+   #endif
+#endif
+
+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
+#define STBI_NO_ZLIB
+#endif
+
+
+#include <stdarg.h>
+#include <stddef.h> // ptrdiff_t on osx
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#include <math.h>  // ldexp, pow
+#endif
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STBI_ASSERT
+#include <assert.h>
+#define STBI_ASSERT(x) assert(x)
+#endif
+
+#ifdef __cplusplus
+#define STBI_EXTERN extern "C"
+#else
+#define STBI_EXTERN extern
+#endif
+
+
+#ifndef _MSC_VER
+   #ifdef __cplusplus
+   #define stbi_inline inline
+   #else
+   #define stbi_inline
+   #endif
+#else
+   #define stbi_inline __forceinline
+#endif
+
+
+#ifdef _MSC_VER
+typedef unsigned short stbi__uint16;
+typedef   signed short stbi__int16;
+typedef unsigned int   stbi__uint32;
+typedef   signed int   stbi__int32;
+#else
+#include <stdint.h>
+typedef uint16_t stbi__uint16;
+typedef int16_t  stbi__int16;
+typedef uint32_t stbi__uint32;
+typedef int32_t  stbi__int32;
+#endif
+
+// should produce compiler error if size is wrong
+typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
+
+#ifdef _MSC_VER
+#define STBI_NOTUSED(v)  (void)(v)
+#else
+#define STBI_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#ifdef _MSC_VER
+#define STBI_HAS_LROTL
+#endif
+
+#ifdef STBI_HAS_LROTL
+   #define stbi_lrot(x,y)  _lrotl(x,y)
+#else
+   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
+#endif
+
+#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
+// ok
+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
+// ok
+#else
+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
+#endif
+
+#ifndef STBI_MALLOC
+#define STBI_MALLOC(sz)           malloc(sz)
+#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
+#define STBI_FREE(p)              free(p)
+#endif
+
+#ifndef STBI_REALLOC_SIZED
+#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
+#endif
+
+// x86/x64 detection
+#if defined(__x86_64__) || defined(_M_X64)
+#define STBI__X64_TARGET
+#elif defined(__i386) || defined(_M_IX86)
+#define STBI__X86_TARGET
+#endif
+
+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
+// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
+// but previous attempts to provide the SSE2 functions with runtime
+// detection caused numerous issues. The way architecture extensions are
+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
+// New behavior: if compiled with -msse2, we use SSE2 without any
+// detection; if not, we don't use it at all.
+#define STBI_NO_SIMD
+#endif
+
+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
+//
+// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
+// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
+// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
+// simultaneously enabling "-mstackrealign".
+//
+// See https://github.com/nothings/stb/issues/81 for more information.
+//
+// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
+// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
+#define STBI_NO_SIMD
+#endif
+
+#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
+#define STBI_SSE2
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1400  // not VC6
+#include <intrin.h> // __cpuid
+static int stbi__cpuid3(void)
+{
+   int info[4];
+   __cpuid(info,1);
+   return info[3];
+}
+#else
+static int stbi__cpuid3(void)
+{
+   int res;
+   __asm {
+      mov  eax,1
+      cpuid
+      mov  res,edx
+   }
+   return res;
+}
+#endif
+
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
+{
+   int info3 = stbi__cpuid3();
+   return ((info3 >> 26) & 1) != 0;
+}
+#endif
+
+#else // assume GCC-style if not VC++
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
+#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
+static int stbi__sse2_available(void)
+{
+   // If we're even attempting to compile this on GCC/Clang, that means
+   // -msse2 is on, which means the compiler is allowed to use SSE2
+   // instructions at will, and so are we.
+   return 1;
+}
+#endif
+
+#endif
+#endif
+
+// ARM NEON
+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+#undef STBI_NEON
+#endif
+
+#ifdef STBI_NEON
+#include <arm_neon.h>
+// assume GCC or Clang on ARM targets
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+#endif
+
+#ifndef STBI_SIMD_ALIGN
+#define STBI_SIMD_ALIGN(type, name) type name
+#endif
+
+///////////////////////////////////////////////
+//
+//  stbi__context struct and start_xxx functions
+
+// stbi__context structure is our basic context used by all images, so it
+// contains all the IO context, plus some basic image information
+typedef struct
+{
+   stbi__uint32 img_x, img_y;
+   int img_n, img_out_n;
+
+   stbi_io_callbacks io;
+   void *io_user_data;
+
+   int read_from_callbacks;
+   int buflen;
+   stbi_uc buffer_start[128];
+
+   stbi_uc *img_buffer, *img_buffer_end;
+   stbi_uc *img_buffer_original, *img_buffer_original_end;
+} stbi__context;
+
+
+static void stbi__refill_buffer(stbi__context *s);
+
+// initialize a memory-decode context
+static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
+{
+   s->io.read = NULL;
+   s->read_from_callbacks = 0;
+   s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
+   s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
+}
+
+// initialize a callback-based context
+static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
+{
+   s->io = *c;
+   s->io_user_data = user;
+   s->buflen = sizeof(s->buffer_start);
+   s->read_from_callbacks = 1;
+   s->img_buffer_original = s->buffer_start;
+   stbi__refill_buffer(s);
+   s->img_buffer_original_end = s->img_buffer_end;
+}
+
+#ifndef STBI_NO_STDIO
+
+static int stbi__stdio_read(void *user, char *data, int size)
+{
+   return (int) fread(data,1,size,(FILE*) user);
+}
+
+static void stbi__stdio_skip(void *user, int n)
+{
+   fseek((FILE*) user, n, SEEK_CUR);
+}
+
+static int stbi__stdio_eof(void *user)
+{
+   return feof((FILE*) user);
+}
+
+static stbi_io_callbacks stbi__stdio_callbacks =
+{
+   stbi__stdio_read,
+   stbi__stdio_skip,
+   stbi__stdio_eof,
+};
+
+static void stbi__start_file(stbi__context *s, FILE *f)
+{
+   stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
+}
+
+//static void stop_file(stbi__context *s) { }
+
+#endif // !STBI_NO_STDIO
+
+static void stbi__rewind(stbi__context *s)
+{
+   // conceptually rewind SHOULD rewind to the beginning of the stream,
+   // but we just rewind to the beginning of the initial buffer, because
+   // we only use it after doing 'test', which only ever looks at at most 92 bytes
+   s->img_buffer = s->img_buffer_original;
+   s->img_buffer_end = s->img_buffer_original_end;
+}
+
+enum
+{
+   STBI_ORDER_RGB,
+   STBI_ORDER_BGR
+};
+
+typedef struct
+{
+   int bits_per_channel;
+   int num_channels;
+   int channel_order;
+} stbi__result_info;
+
+#ifndef STBI_NO_JPEG
+static int      stbi__jpeg_test(stbi__context *s);
+static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNG
+static int      stbi__png_test(stbi__context *s);
+static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__png_is16(stbi__context *s);
+#endif
+
+#ifndef STBI_NO_BMP
+static int      stbi__bmp_test(stbi__context *s);
+static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_TGA
+static int      stbi__tga_test(stbi__context *s);
+static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PSD
+static int      stbi__psd_test(stbi__context *s);
+static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
+static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
+static int      stbi__psd_is16(stbi__context *s);
+#endif
+
+#ifndef STBI_NO_HDR
+static int      stbi__hdr_test(stbi__context *s);
+static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PIC
+static int      stbi__pic_test(stbi__context *s);
+static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_GIF
+static int      stbi__gif_test(stbi__context *s);
+static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
+static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+#ifndef STBI_NO_PNM
+static int      stbi__pnm_test(stbi__context *s);
+static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
+static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
+#endif
+
+// this is not threadsafe
+static const char *stbi__g_failure_reason;
+
+STBIDEF const char *stbi_failure_reason(void)
+{
+   return stbi__g_failure_reason;
+}
+
+static int stbi__err(const char *str)
+{
+   stbi__g_failure_reason = str;
+   return 0;
+}
+
+static void *stbi__malloc(size_t size)
+{
+    return STBI_MALLOC(size);
+}
+
+// stb_image uses ints pervasively, including for offset calculations.
+// therefore the largest decoded image size we can support with the
+// current code, even on 64-bit targets, is INT_MAX. this is not a
+// significant limitation for the intended use case.
+//
+// we do, however, need to make sure our size calculations don't
+// overflow. hence a few helper functions for size calculations that
+// multiply integers together, making sure that they're non-negative
+// and no overflow occurs.
+
+// return 1 if the sum is valid, 0 on overflow.
+// negative terms are considered invalid.
+static int stbi__addsizes_valid(int a, int b)
+{
+   if (b < 0) return 0;
+   // now 0 <= b <= INT_MAX, hence also
+   // 0 <= INT_MAX - b <= INTMAX.
+   // And "a + b <= INT_MAX" (which might overflow) is the
+   // same as a <= INT_MAX - b (no overflow)
+   return a <= INT_MAX - b;
+}
+
+// returns 1 if the product is valid, 0 on overflow.
+// negative factors are considered invalid.
+static int stbi__mul2sizes_valid(int a, int b)
+{
+   if (a < 0 || b < 0) return 0;
+   if (b == 0) return 1; // mul-by-0 is always safe
+   // portable way to check for no overflows in a*b
+   return a <= INT_MAX/b;
+}
+
+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad2sizes_valid(int a, int b, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
+}
+
+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__addsizes_valid(a*b*c, add);
+}
+
+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
+{
+   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
+      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
+}
+#endif
+
+// mallocs with size overflow checking
+static void *stbi__malloc_mad2(int a, int b, int add)
+{
+   if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
+   return stbi__malloc(a*b + add);
+}
+
+static void *stbi__malloc_mad3(int a, int b, int c, int add)
+{
+   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
+   return stbi__malloc(a*b*c + add);
+}
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
+{
+   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
+   return stbi__malloc(a*b*c*d + add);
+}
+#endif
+
+// stbi__err - error
+// stbi__errpf - error returning pointer to float
+// stbi__errpuc - error returning pointer to unsigned char
+
+#ifdef STBI_NO_FAILURE_STRINGS
+   #define stbi__err(x,y)  0
+#elif defined(STBI_FAILURE_USERMSG)
+   #define stbi__err(x,y)  stbi__err(y)
+#else
+   #define stbi__err(x,y)  stbi__err(x)
+#endif
+
+#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
+#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
+
+STBIDEF void stbi_image_free(void *retval_from_stbi_load)
+{
+   STBI_FREE(retval_from_stbi_load);
+}
+
+#ifndef STBI_NO_LINEAR
+static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
+#endif
+
+#ifndef STBI_NO_HDR
+static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
+#endif
+
+static int stbi__vertically_flip_on_load = 0;
+
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
+{
+    stbi__vertically_flip_on_load = flag_true_if_should_flip;
+}
+
+static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
+{
+   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+   ri->num_channels = 0;
+
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_PNG
+   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_BMP
+   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_GIF
+   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
+   #endif
+   #ifndef STBI_NO_PIC
+   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
+   #endif
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
+   #endif
+
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_test(s)) {
+      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
+      return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
+   }
+   #endif
+
+   #ifndef STBI_NO_TGA
+   // test tga last because it's a crappy test!
+   if (stbi__tga_test(s))
+      return stbi__tga_load(s,x,y,comp,req_comp, ri);
+   #endif
+
+   return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
+{
+   int i;
+   int img_len = w * h * channels;
+   stbi_uc *reduced;
+
+   reduced = (stbi_uc *) stbi__malloc(img_len);
+   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
+   STBI_FREE(orig);
+   return reduced;
+}
+
+static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
+{
+   int i;
+   int img_len = w * h * channels;
+   stbi__uint16 *enlarged;
+
+   enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
+   if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+
+   for (i = 0; i < img_len; ++i)
+      enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
+   STBI_FREE(orig);
+   return enlarged;
+}
+
+static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
+{
+   int row;
+   size_t bytes_per_row = (size_t)w * bytes_per_pixel;
+   stbi_uc temp[2048];
+   stbi_uc *bytes = (stbi_uc *)image;
+
+   for (row = 0; row < (h>>1); row++) {
+      stbi_uc *row0 = bytes + row*bytes_per_row;
+      stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
+      // swap row0 with row1
+      size_t bytes_left = bytes_per_row;
+      while (bytes_left) {
+         size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
+         memcpy(temp, row0, bytes_copy);
+         memcpy(row0, row1, bytes_copy);
+         memcpy(row1, temp, bytes_copy);
+         row0 += bytes_copy;
+         row1 += bytes_copy;
+         bytes_left -= bytes_copy;
+      }
+   }
+}
+
+#ifndef STBI_NO_GIF
+static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
+{
+   int slice;
+   int slice_size = w * h * bytes_per_pixel;
+
+   stbi_uc *bytes = (stbi_uc *)image;
+   for (slice = 0; slice < z; ++slice) {
+      stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
+      bytes += slice_size;
+   }
+}
+#endif
+
+static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
+
+   if (result == NULL)
+      return NULL;
+
+   if (ri.bits_per_channel != 8) {
+      STBI_ASSERT(ri.bits_per_channel == 16);
+      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 8;
+   }
+
+   // @TODO: move stbi__convert_format to here
+
+   if (stbi__vertically_flip_on_load) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
+   }
+
+   return (unsigned char *) result;
+}
+
+static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__result_info ri;
+   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
+
+   if (result == NULL)
+      return NULL;
+
+   if (ri.bits_per_channel != 16) {
+      STBI_ASSERT(ri.bits_per_channel == 8);
+      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
+      ri.bits_per_channel = 16;
+   }
+
+   // @TODO: move stbi__convert_format16 to here
+   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
+
+   if (stbi__vertically_flip_on_load) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
+   }
+
+   return (stbi__uint16 *) result;
+}
+
+#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
+static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
+{
+   if (stbi__vertically_flip_on_load && result != NULL) {
+      int channels = req_comp ? req_comp : *comp;
+      stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
+   }
+}
+#endif
+
+#ifndef STBI_NO_STDIO
+
+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
+STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
+#endif
+
+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
+{
+	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, bufferlen, NULL, NULL);
+}
+#endif
+
+static FILE *stbi__fopen(char const *filename, char const *mode)
+{
+   FILE *f;
+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+   wchar_t wMode[64];
+   wchar_t wFilename[1024];
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
+      return 0;
+
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
+      return 0;
+
+#if _MSC_VER >= 1400
+	if (0 != _wfopen_s(&f, wFilename, wMode))
+		f = 0;
+#else
+   f = _wfopen(wFilename, wMode);
+#endif
+
+#elif defined(_MSC_VER) && _MSC_VER >= 1400
+   if (0 != fopen_s(&f, filename, mode))
+      f=0;
+#else
+   f = fopen(filename, mode);
+#endif
+   return f;
+}
+
+
+STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   unsigned char *result;
+   if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
+   result = stbi_load_from_file(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   unsigned char *result;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+   if (result) {
+      // need to 'unget' all the characters in the IO buffer
+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+   }
+   return result;
+}
+
+STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__uint16 *result;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
+   if (result) {
+      // need to 'unget' all the characters in the IO buffer
+      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
+   }
+   return result;
+}
+
+STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   stbi__uint16 *result;
+   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
+   result = stbi_load_from_file_16(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+
+#endif //!STBI_NO_STDIO
+
+STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
+STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
+   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
+}
+
+STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+}
+
+STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
+}
+
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+   unsigned char *result;
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+
+   result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
+   if (stbi__vertically_flip_on_load) {
+      stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
+   }
+
+   return result;
+}
+#endif
+
+#ifndef STBI_NO_LINEAR
+static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
+{
+   unsigned char *data;
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_test(s)) {
+      stbi__result_info ri;
+      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
+      if (hdr_data)
+         stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
+      return hdr_data;
+   }
+   #endif
+   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
+   if (data)
+      return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
+   return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
+}
+
+STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+
+STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
+{
+   float *result;
+   FILE *f = stbi__fopen(filename, "rb");
+   if (!f) return stbi__errpf("can't fopen", "Unable to open file");
+   result = stbi_loadf_from_file(f,x,y,comp,req_comp);
+   fclose(f);
+   return result;
+}
+
+STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
+{
+   stbi__context s;
+   stbi__start_file(&s,f);
+   return stbi__loadf_main(&s,x,y,comp,req_comp);
+}
+#endif // !STBI_NO_STDIO
+
+#endif // !STBI_NO_LINEAR
+
+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
+// reports false!
+
+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
+{
+   #ifndef STBI_NO_HDR
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__hdr_test(&s);
+   #else
+   STBI_NOTUSED(buffer);
+   STBI_NOTUSED(len);
+   return 0;
+   #endif
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_is_hdr          (char const *filename)
+{
+   FILE *f = stbi__fopen(filename, "rb");
+   int result=0;
+   if (f) {
+      result = stbi_is_hdr_from_file(f);
+      fclose(f);
+   }
+   return result;
+}
+
+STBIDEF int stbi_is_hdr_from_file(FILE *f)
+{
+   #ifndef STBI_NO_HDR
+   long pos = ftell(f);
+   int res;
+   stbi__context s;
+   stbi__start_file(&s,f);
+   res = stbi__hdr_test(&s);
+   fseek(f, pos, SEEK_SET);
+   return res;
+   #else
+   STBI_NOTUSED(f);
+   return 0;
+   #endif
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
+{
+   #ifndef STBI_NO_HDR
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
+   return stbi__hdr_test(&s);
+   #else
+   STBI_NOTUSED(clbk);
+   STBI_NOTUSED(user);
+   return 0;
+   #endif
+}
+
+#ifndef STBI_NO_LINEAR
+static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
+
+STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
+STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
+#endif
+
+static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
+
+STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
+STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Common code used by all image loaders
+//
+
+enum
+{
+   STBI__SCAN_load=0,
+   STBI__SCAN_type,
+   STBI__SCAN_header
+};
+
+static void stbi__refill_buffer(stbi__context *s)
+{
+   int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
+   if (n == 0) {
+      // at end of file, treat same as if from memory, but need to handle case
+      // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
+      s->read_from_callbacks = 0;
+      s->img_buffer = s->buffer_start;
+      s->img_buffer_end = s->buffer_start+1;
+      *s->img_buffer = 0;
+   } else {
+      s->img_buffer = s->buffer_start;
+      s->img_buffer_end = s->buffer_start + n;
+   }
+}
+
+stbi_inline static stbi_uc stbi__get8(stbi__context *s)
+{
+   if (s->img_buffer < s->img_buffer_end)
+      return *s->img_buffer++;
+   if (s->read_from_callbacks) {
+      stbi__refill_buffer(s);
+      return *s->img_buffer++;
+   }
+   return 0;
+}
+
+stbi_inline static int stbi__at_eof(stbi__context *s)
+{
+   if (s->io.read) {
+      if (!(s->io.eof)(s->io_user_data)) return 0;
+      // if feof() is true, check if buffer = end
+      // special case: we've only got the special 0 character at the end
+      if (s->read_from_callbacks == 0) return 1;
+   }
+
+   return s->img_buffer >= s->img_buffer_end;
+}
+
+static void stbi__skip(stbi__context *s, int n)
+{
+   if (n < 0) {
+      s->img_buffer = s->img_buffer_end;
+      return;
+   }
+   if (s->io.read) {
+      int blen = (int) (s->img_buffer_end - s->img_buffer);
+      if (blen < n) {
+         s->img_buffer = s->img_buffer_end;
+         (s->io.skip)(s->io_user_data, n - blen);
+         return;
+      }
+   }
+   s->img_buffer += n;
+}
+
+static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
+{
+   if (s->io.read) {
+      int blen = (int) (s->img_buffer_end - s->img_buffer);
+      if (blen < n) {
+         int res, count;
+
+         memcpy(buffer, s->img_buffer, blen);
+
+         count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
+         res = (count == (n-blen));
+         s->img_buffer = s->img_buffer_end;
+         return res;
+      }
+   }
+
+   if (s->img_buffer+n <= s->img_buffer_end) {
+      memcpy(buffer, s->img_buffer, n);
+      s->img_buffer += n;
+      return 1;
+   } else
+      return 0;
+}
+
+static int stbi__get16be(stbi__context *s)
+{
+   int z = stbi__get8(s);
+   return (z << 8) + stbi__get8(s);
+}
+
+static stbi__uint32 stbi__get32be(stbi__context *s)
+{
+   stbi__uint32 z = stbi__get16be(s);
+   return (z << 16) + stbi__get16be(s);
+}
+
+#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
+// nothing
+#else
+static int stbi__get16le(stbi__context *s)
+{
+   int z = stbi__get8(s);
+   return z + (stbi__get8(s) << 8);
+}
+#endif
+
+#ifndef STBI_NO_BMP
+static stbi__uint32 stbi__get32le(stbi__context *s)
+{
+   stbi__uint32 z = stbi__get16le(s);
+   return z + (stbi__get16le(s) << 16);
+}
+#endif
+
+#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  generic converter from built-in img_n to req_comp
+//    individual types do this automatically as much as possible (e.g. jpeg
+//    does all cases internally since it needs to colorspace convert anyway,
+//    and it never has alpha, so very few cases ). png can automatically
+//    interleave an alpha=255 channel, but falls back to this for other cases
+//
+//  assume data buffer is malloced, so malloc a new one and free that one
+//  only failure mode is malloc failing
+
+static stbi_uc stbi__compute_y(int r, int g, int b)
+{
+   return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
+}
+
+static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+   int i,j;
+   unsigned char *good;
+
+   if (req_comp == img_n) return data;
+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
+   if (good == NULL) {
+      STBI_FREE(data);
+      return stbi__errpuc("outofmem", "Out of memory");
+   }
+
+   for (j=0; j < (int) y; ++j) {
+      unsigned char *src  = data + j * x * img_n   ;
+      unsigned char *dest = good + j * x * req_comp;
+
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      // convert source image with img_n components to one with req_comp components;
+      // avoid switch per pixel, so use switch per scanline and massive macros
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break;
+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break;
+         default: STBI_ASSERT(0);
+      }
+      #undef STBI__CASE
+   }
+
+   STBI_FREE(data);
+   return good;
+}
+
+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
+{
+   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
+}
+
+static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+   int i,j;
+   stbi__uint16 *good;
+
+   if (req_comp == img_n) return data;
+   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
+   if (good == NULL) {
+      STBI_FREE(data);
+      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
+   }
+
+   for (j=0; j < (int) y; ++j) {
+      stbi__uint16 *src  = data + j * x * img_n   ;
+      stbi__uint16 *dest = good + j * x * req_comp;
+
+      #define STBI__COMBO(a,b)  ((a)*8+(b))
+      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+      // convert source image with img_n components to one with req_comp components;
+      // avoid switch per pixel, so use switch per scanline and massive macros
+      switch (STBI__COMBO(img_n, req_comp)) {
+         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break;
+         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break;
+         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
+         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
+         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break;
+         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break;
+         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
+         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
+         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
+         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break;
+         default: STBI_ASSERT(0);
+      }
+      #undef STBI__CASE
+   }
+
+   STBI_FREE(data);
+   return good;
+}
+
+#ifndef STBI_NO_LINEAR
+static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
+{
+   int i,k,n;
+   float *output;
+   if (!data) return NULL;
+   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
+   if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
+   // compute number of non-alpha components
+   if (comp & 1) n = comp; else n = comp-1;
+   for (i=0; i < x*y; ++i) {
+      for (k=0; k < n; ++k) {
+         output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
+      }
+   }
+   if (n < comp) {
+      for (i=0; i < x*y; ++i) {
+         output[i*comp + n] = data[i*comp + n]/255.0f;
+      }
+   }
+   STBI_FREE(data);
+   return output;
+}
+#endif
+
+#ifndef STBI_NO_HDR
+#define stbi__float2int(x)   ((int) (x))
+static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
+{
+   int i,k,n;
+   stbi_uc *output;
+   if (!data) return NULL;
+   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
+   if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
+   // compute number of non-alpha components
+   if (comp & 1) n = comp; else n = comp-1;
+   for (i=0; i < x*y; ++i) {
+      for (k=0; k < n; ++k) {
+         float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
+         if (z < 0) z = 0;
+         if (z > 255) z = 255;
+         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
+      }
+      if (k < comp) {
+         float z = data[i*comp+k] * 255 + 0.5f;
+         if (z < 0) z = 0;
+         if (z > 255) z = 255;
+         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
+      }
+   }
+   STBI_FREE(data);
+   return output;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  "baseline" JPEG/JFIF decoder
+//
+//    simple implementation
+//      - doesn't support delayed output of y-dimension
+//      - simple interface (only one output format: 8-bit interleaved RGB)
+//      - doesn't try to recover corrupt jpegs
+//      - doesn't allow partial loading, loading multiple at once
+//      - still fast on x86 (copying globals into locals doesn't help x86)
+//      - allocates lots of intermediate memory (full size of all components)
+//        - non-interleaved case requires this anyway
+//        - allows good upsampling (see next)
+//    high-quality
+//      - upsampled channels are bilinearly interpolated, even across blocks
+//      - quality integer IDCT derived from IJG's 'slow'
+//    performance
+//      - fast huffman; reasonable integer IDCT
+//      - some SIMD kernels for common paths on targets with SSE2/NEON
+//      - uses a lot of intermediate memory, could cache poorly
+
+#ifndef STBI_NO_JPEG
+
+// huffman decoding acceleration
+#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
+
+typedef struct
+{
+   stbi_uc  fast[1 << FAST_BITS];
+   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
+   stbi__uint16 code[256];
+   stbi_uc  values[256];
+   stbi_uc  size[257];
+   unsigned int maxcode[18];
+   int    delta[17];   // old 'firstsymbol' - old 'firstcode'
+} stbi__huffman;
+
+typedef struct
+{
+   stbi__context *s;
+   stbi__huffman huff_dc[4];
+   stbi__huffman huff_ac[4];
+   stbi__uint16 dequant[4][64];
+   stbi__int16 fast_ac[4][1 << FAST_BITS];
+
+// sizes for components, interleaved MCUs
+   int img_h_max, img_v_max;
+   int img_mcu_x, img_mcu_y;
+   int img_mcu_w, img_mcu_h;
+
+// definition of jpeg image component
+   struct
+   {
+      int id;
+      int h,v;
+      int tq;
+      int hd,ha;
+      int dc_pred;
+
+      int x,y,w2,h2;
+      stbi_uc *data;
+      void *raw_data, *raw_coeff;
+      stbi_uc *linebuf;
+      short   *coeff;   // progressive only
+      int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
+   } img_comp[4];
+
+   stbi__uint32   code_buffer; // jpeg entropy-coded buffer
+   int            code_bits;   // number of valid bits
+   unsigned char  marker;      // marker seen while filling entropy buffer
+   int            nomore;      // flag if we saw a marker so must stop
+
+   int            progressive;
+   int            spec_start;
+   int            spec_end;
+   int            succ_high;
+   int            succ_low;
+   int            eob_run;
+   int            jfif;
+   int            app14_color_transform; // Adobe APP14 tag
+   int            rgb;
+
+   int scan_n, order[4];
+   int restart_interval, todo;
+
+// kernels
+   void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
+   void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
+   stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
+} stbi__jpeg;
+
+static int stbi__build_huffman(stbi__huffman *h, int *count)
+{
+   int i,j,k=0;
+   unsigned int code;
+   // build size list for each symbol (from JPEG spec)
+   for (i=0; i < 16; ++i)
+      for (j=0; j < count[i]; ++j)
+         h->size[k++] = (stbi_uc) (i+1);
+   h->size[k] = 0;
+
+   // compute actual symbols (from jpeg spec)
+   code = 0;
+   k = 0;
+   for(j=1; j <= 16; ++j) {
+      // compute delta to add to code to compute symbol id
+      h->delta[j] = k - code;
+      if (h->size[k] == j) {
+         while (h->size[k] == j)
+            h->code[k++] = (stbi__uint16) (code++);
+         if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
+      }
+      // compute largest code + 1 for this size, preshifted as needed later
+      h->maxcode[j] = code << (16-j);
+      code <<= 1;
+   }
+   h->maxcode[j] = 0xffffffff;
+
+   // build non-spec acceleration table; 255 is flag for not-accelerated
+   memset(h->fast, 255, 1 << FAST_BITS);
+   for (i=0; i < k; ++i) {
+      int s = h->size[i];
+      if (s <= FAST_BITS) {
+         int c = h->code[i] << (FAST_BITS-s);
+         int m = 1 << (FAST_BITS-s);
+         for (j=0; j < m; ++j) {
+            h->fast[c+j] = (stbi_uc) i;
+         }
+      }
+   }
+   return 1;
+}
+
+// build a table that decodes both magnitude and value of small ACs in
+// one go.
+static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
+{
+   int i;
+   for (i=0; i < (1 << FAST_BITS); ++i) {
+      stbi_uc fast = h->fast[i];
+      fast_ac[i] = 0;
+      if (fast < 255) {
+         int rs = h->values[fast];
+         int run = (rs >> 4) & 15;
+         int magbits = rs & 15;
+         int len = h->size[fast];
+
+         if (magbits && len + magbits <= FAST_BITS) {
+            // magnitude code followed by receive_extend code
+            int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
+            int m = 1 << (magbits - 1);
+            if (k < m) k += (~0U << magbits) + 1;
+            // if the result is small enough, we can fit it in fast_ac table
+            if (k >= -128 && k <= 127)
+               fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
+         }
+      }
+   }
+}
+
+static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
+{
+   do {
+      unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
+      if (b == 0xff) {
+         int c = stbi__get8(j->s);
+         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
+         if (c != 0) {
+            j->marker = (unsigned char) c;
+            j->nomore = 1;
+            return;
+         }
+      }
+      j->code_buffer |= b << (24 - j->code_bits);
+      j->code_bits += 8;
+   } while (j->code_bits <= 24);
+}
+
+// (1 << n) - 1
+static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
+
+// decode a jpeg huffman value from the bitstream
+stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
+{
+   unsigned int temp;
+   int c,k;
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+   // look at the top FAST_BITS and determine what symbol ID it is,
+   // if the code is <= FAST_BITS
+   c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+   k = h->fast[c];
+   if (k < 255) {
+      int s = h->size[k];
+      if (s > j->code_bits)
+         return -1;
+      j->code_buffer <<= s;
+      j->code_bits -= s;
+      return h->values[k];
+   }
+
+   // naive test is to shift the code_buffer down so k bits are
+   // valid, then test against maxcode. To speed this up, we've
+   // preshifted maxcode left so that it has (16-k) 0s at the
+   // end; in other words, regardless of the number of bits, it
+   // wants to be compared against something shifted to have 16;
+   // that way we don't need to shift inside the loop.
+   temp = j->code_buffer >> 16;
+   for (k=FAST_BITS+1 ; ; ++k)
+      if (temp < h->maxcode[k])
+         break;
+   if (k == 17) {
+      // error! code not found
+      j->code_bits -= 16;
+      return -1;
+   }
+
+   if (k > j->code_bits)
+      return -1;
+
+   // convert the huffman code to the symbol id
+   c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
+   STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
+
+   // convert the id to a symbol
+   j->code_bits -= k;
+   j->code_buffer <<= k;
+   return h->values[c];
+}
+
+// bias[n] = (-1<<n) + 1
+static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
+
+// combined JPEG 'receive' and JPEG 'extend', since baseline
+// always extends everything it receives.
+stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
+{
+   unsigned int k;
+   int sgn;
+   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+
+   sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
+   k = stbi_lrot(j->code_buffer, n);
+   STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
+   j->code_buffer = k & ~stbi__bmask[n];
+   k &= stbi__bmask[n];
+   j->code_bits -= n;
+   return k + (stbi__jbias[n] & ~sgn);
+}
+
+// get some unsigned bits
+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
+{
+   unsigned int k;
+   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+   k = stbi_lrot(j->code_buffer, n);
+   j->code_buffer = k & ~stbi__bmask[n];
+   k &= stbi__bmask[n];
+   j->code_bits -= n;
+   return k;
+}
+
+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
+{
+   unsigned int k;
+   if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
+   k = j->code_buffer;
+   j->code_buffer <<= 1;
+   --j->code_bits;
+   return k & 0x80000000;
+}
+
+// given a value that's at position X in the zigzag stream,
+// where does it appear in the 8x8 matrix coded as row-major?
+static const stbi_uc stbi__jpeg_dezigzag[64+15] =
+{
+    0,  1,  8, 16,  9,  2,  3, 10,
+   17, 24, 32, 25, 18, 11,  4,  5,
+   12, 19, 26, 33, 40, 48, 41, 34,
+   27, 20, 13,  6,  7, 14, 21, 28,
+   35, 42, 49, 56, 57, 50, 43, 36,
+   29, 22, 15, 23, 30, 37, 44, 51,
+   58, 59, 52, 45, 38, 31, 39, 46,
+   53, 60, 61, 54, 47, 55, 62, 63,
+   // let corrupt input sample past end
+   63, 63, 63, 63, 63, 63, 63, 63,
+   63, 63, 63, 63, 63, 63, 63
+};
+
+// decode one 64-entry block--
+static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
+{
+   int diff,dc,k;
+   int t;
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+   t = stbi__jpeg_huff_decode(j, hdc);
+   if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+
+   // 0 all the ac values now so we can do it 32-bits at a time
+   memset(data,0,64*sizeof(data[0]));
+
+   diff = t ? stbi__extend_receive(j, t) : 0;
+   dc = j->img_comp[b].dc_pred + diff;
+   j->img_comp[b].dc_pred = dc;
+   data[0] = (short) (dc * dequant[0]);
+
+   // decode AC components, see JPEG spec
+   k = 1;
+   do {
+      unsigned int zig;
+      int c,r,s;
+      if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+      r = fac[c];
+      if (r) { // fast-AC path
+         k += (r >> 4) & 15; // run
+         s = r & 15; // combined length
+         j->code_buffer <<= s;
+         j->code_bits -= s;
+         // decode into unzigzag'd location
+         zig = stbi__jpeg_dezigzag[k++];
+         data[zig] = (short) ((r >> 8) * dequant[zig]);
+      } else {
+         int rs = stbi__jpeg_huff_decode(j, hac);
+         if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+         s = rs & 15;
+         r = rs >> 4;
+         if (s == 0) {
+            if (rs != 0xf0) break; // end block
+            k += 16;
+         } else {
+            k += r;
+            // decode into unzigzag'd location
+            zig = stbi__jpeg_dezigzag[k++];
+            data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
+         }
+      }
+   } while (k < 64);
+   return 1;
+}
+
+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
+{
+   int diff,dc;
+   int t;
+   if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+   if (j->succ_high == 0) {
+      // first scan for DC coefficient, must be first
+      memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
+      t = stbi__jpeg_huff_decode(j, hdc);
+      diff = t ? stbi__extend_receive(j, t) : 0;
+
+      dc = j->img_comp[b].dc_pred + diff;
+      j->img_comp[b].dc_pred = dc;
+      data[0] = (short) (dc << j->succ_low);
+   } else {
+      // refinement scan for DC coefficient
+      if (stbi__jpeg_get_bit(j))
+         data[0] += (short) (1 << j->succ_low);
+   }
+   return 1;
+}
+
+// @OPTIMIZE: store non-zigzagged during the decode passes,
+// and only de-zigzag when dequantizing
+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
+{
+   int k;
+   if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+   if (j->succ_high == 0) {
+      int shift = j->succ_low;
+
+      if (j->eob_run) {
+         --j->eob_run;
+         return 1;
+      }
+
+      k = j->spec_start;
+      do {
+         unsigned int zig;
+         int c,r,s;
+         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
+         r = fac[c];
+         if (r) { // fast-AC path
+            k += (r >> 4) & 15; // run
+            s = r & 15; // combined length
+            j->code_buffer <<= s;
+            j->code_bits -= s;
+            zig = stbi__jpeg_dezigzag[k++];
+            data[zig] = (short) ((r >> 8) << shift);
+         } else {
+            int rs = stbi__jpeg_huff_decode(j, hac);
+            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+            s = rs & 15;
+            r = rs >> 4;
+            if (s == 0) {
+               if (r < 15) {
+                  j->eob_run = (1 << r);
+                  if (r)
+                     j->eob_run += stbi__jpeg_get_bits(j, r);
+                  --j->eob_run;
+                  break;
+               }
+               k += 16;
+            } else {
+               k += r;
+               zig = stbi__jpeg_dezigzag[k++];
+               data[zig] = (short) (stbi__extend_receive(j,s) << shift);
+            }
+         }
+      } while (k <= j->spec_end);
+   } else {
+      // refinement scan for these AC coefficients
+
+      short bit = (short) (1 << j->succ_low);
+
+      if (j->eob_run) {
+         --j->eob_run;
+         for (k = j->spec_start; k <= j->spec_end; ++k) {
+            short *p = &data[stbi__jpeg_dezigzag[k]];
+            if (*p != 0)
+               if (stbi__jpeg_get_bit(j))
+                  if ((*p & bit)==0) {
+                     if (*p > 0)
+                        *p += bit;
+                     else
+                        *p -= bit;
+                  }
+         }
+      } else {
+         k = j->spec_start;
+         do {
+            int r,s;
+            int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
+            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
+            s = rs & 15;
+            r = rs >> 4;
+            if (s == 0) {
+               if (r < 15) {
+                  j->eob_run = (1 << r) - 1;
+                  if (r)
+                     j->eob_run += stbi__jpeg_get_bits(j, r);
+                  r = 64; // force end of block
+               } else {
+                  // r=15 s=0 should write 16 0s, so we just do
+                  // a run of 15 0s and then write s (which is 0),
+                  // so we don't have to do anything special here
+               }
+            } else {
+               if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
+               // sign bit
+               if (stbi__jpeg_get_bit(j))
+                  s = bit;
+               else
+                  s = -bit;
+            }
+
+            // advance by r
+            while (k <= j->spec_end) {
+               short *p = &data[stbi__jpeg_dezigzag[k++]];
+               if (*p != 0) {
+                  if (stbi__jpeg_get_bit(j))
+                     if ((*p & bit)==0) {
+                        if (*p > 0)
+                           *p += bit;
+                        else
+                           *p -= bit;
+                     }
+               } else {
+                  if (r == 0) {
+                     *p = (short) s;
+                     break;
+                  }
+                  --r;
+               }
+            }
+         } while (k <= j->spec_end);
+      }
+   }
+   return 1;
+}
+
+// take a -128..127 value and stbi__clamp it and convert to 0..255
+stbi_inline static stbi_uc stbi__clamp(int x)
+{
+   // trick to use a single test to catch both cases
+   if ((unsigned int) x > 255) {
+      if (x < 0) return 0;
+      if (x > 255) return 255;
+   }
+   return (stbi_uc) x;
+}
+
+#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
+#define stbi__fsh(x)  ((x) * 4096)
+
+// derived from jidctint -- DCT_ISLOW
+#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
+   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
+   p2 = s2;                                    \
+   p3 = s6;                                    \
+   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
+   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
+   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
+   p2 = s0;                                    \
+   p3 = s4;                                    \
+   t0 = stbi__fsh(p2+p3);                      \
+   t1 = stbi__fsh(p2-p3);                      \
+   x0 = t0+t3;                                 \
+   x3 = t0-t3;                                 \
+   x1 = t1+t2;                                 \
+   x2 = t1-t2;                                 \
+   t0 = s7;                                    \
+   t1 = s5;                                    \
+   t2 = s3;                                    \
+   t3 = s1;                                    \
+   p3 = t0+t2;                                 \
+   p4 = t1+t3;                                 \
+   p1 = t0+t3;                                 \
+   p2 = t1+t2;                                 \
+   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
+   t0 = t0*stbi__f2f( 0.298631336f);           \
+   t1 = t1*stbi__f2f( 2.053119869f);           \
+   t2 = t2*stbi__f2f( 3.072711026f);           \
+   t3 = t3*stbi__f2f( 1.501321110f);           \
+   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
+   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
+   p3 = p3*stbi__f2f(-1.961570560f);           \
+   p4 = p4*stbi__f2f(-0.390180644f);           \
+   t3 += p1+p4;                                \
+   t2 += p2+p3;                                \
+   t1 += p2+p4;                                \
+   t0 += p1+p3;
+
+static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
+{
+   int i,val[64],*v=val;
+   stbi_uc *o;
+   short *d = data;
+
+   // columns
+   for (i=0; i < 8; ++i,++d, ++v) {
+      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
+      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
+           && d[40]==0 && d[48]==0 && d[56]==0) {
+         //    no shortcut                 0     seconds
+         //    (1|2|3|4|5|6|7)==0          0     seconds
+         //    all separate               -0.047 seconds
+         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
+         int dcterm = d[0]*4;
+         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
+      } else {
+         STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
+         // constants scaled things up by 1<<12; let's bring them back
+         // down, but keep 2 extra bits of precision
+         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
+         v[ 0] = (x0+t3) >> 10;
+         v[56] = (x0-t3) >> 10;
+         v[ 8] = (x1+t2) >> 10;
+         v[48] = (x1-t2) >> 10;
+         v[16] = (x2+t1) >> 10;
+         v[40] = (x2-t1) >> 10;
+         v[24] = (x3+t0) >> 10;
+         v[32] = (x3-t0) >> 10;
+      }
+   }
+
+   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
+      // no fast case since the first 1D IDCT spread components out
+      STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
+      // constants scaled things up by 1<<12, plus we had 1<<2 from first
+      // loop, plus horizontal and vertical each scale by sqrt(8) so together
+      // we've got an extra 1<<3, so 1<<17 total we need to remove.
+      // so we want to round that, which means adding 0.5 * 1<<17,
+      // aka 65536. Also, we'll end up with -128 to 127 that we want
+      // to encode as 0..255 by adding 128, so we'll add that before the shift
+      x0 += 65536 + (128<<17);
+      x1 += 65536 + (128<<17);
+      x2 += 65536 + (128<<17);
+      x3 += 65536 + (128<<17);
+      // tried computing the shifts into temps, or'ing the temps to see
+      // if any were out of range, but that was slower
+      o[0] = stbi__clamp((x0+t3) >> 17);
+      o[7] = stbi__clamp((x0-t3) >> 17);
+      o[1] = stbi__clamp((x1+t2) >> 17);
+      o[6] = stbi__clamp((x1-t2) >> 17);
+      o[2] = stbi__clamp((x2+t1) >> 17);
+      o[5] = stbi__clamp((x2-t1) >> 17);
+      o[3] = stbi__clamp((x3+t0) >> 17);
+      o[4] = stbi__clamp((x3-t0) >> 17);
+   }
+}
+
+#ifdef STBI_SSE2
+// sse2 integer IDCT. not the fastest possible implementation but it
+// produces bit-identical results to the generic C version so it's
+// fully "transparent".
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+   // This is constructed to match our regular (generic) integer IDCT exactly.
+   __m128i row0, row1, row2, row3, row4, row5, row6, row7;
+   __m128i tmp;
+
+   // dot product constant: even elems=x, odd elems=y
+   #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
+
+   // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
+   // out(1) = c1[even]*x + c1[odd]*y
+   #define dct_rot(out0,out1, x,y,c0,c1) \
+      __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
+      __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
+      __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
+      __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
+      __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
+      __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
+
+   // out = in << 12  (in 16-bit, out 32-bit)
+   #define dct_widen(out, in) \
+      __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
+      __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
+
+   // wide add
+   #define dct_wadd(out, a, b) \
+      __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
+      __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
+
+   // wide sub
+   #define dct_wsub(out, a, b) \
+      __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
+      __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
+
+   // butterfly a/b, add bias, then shift by "s" and pack
+   #define dct_bfly32o(out0, out1, a,b,bias,s) \
+      { \
+         __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
+         __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
+         dct_wadd(sum, abiased, b); \
+         dct_wsub(dif, abiased, b); \
+         out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
+         out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
+      }
+
+   // 8-bit interleave step (for transposes)
+   #define dct_interleave8(a, b) \
+      tmp = a; \
+      a = _mm_unpacklo_epi8(a, b); \
+      b = _mm_unpackhi_epi8(tmp, b)
+
+   // 16-bit interleave step (for transposes)
+   #define dct_interleave16(a, b) \
+      tmp = a; \
+      a = _mm_unpacklo_epi16(a, b); \
+      b = _mm_unpackhi_epi16(tmp, b)
+
+   #define dct_pass(bias,shift) \
+      { \
+         /* even part */ \
+         dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
+         __m128i sum04 = _mm_add_epi16(row0, row4); \
+         __m128i dif04 = _mm_sub_epi16(row0, row4); \
+         dct_widen(t0e, sum04); \
+         dct_widen(t1e, dif04); \
+         dct_wadd(x0, t0e, t3e); \
+         dct_wsub(x3, t0e, t3e); \
+         dct_wadd(x1, t1e, t2e); \
+         dct_wsub(x2, t1e, t2e); \
+         /* odd part */ \
+         dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
+         dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
+         __m128i sum17 = _mm_add_epi16(row1, row7); \
+         __m128i sum35 = _mm_add_epi16(row3, row5); \
+         dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
+         dct_wadd(x4, y0o, y4o); \
+         dct_wadd(x5, y1o, y5o); \
+         dct_wadd(x6, y2o, y5o); \
+         dct_wadd(x7, y3o, y4o); \
+         dct_bfly32o(row0,row7, x0,x7,bias,shift); \
+         dct_bfly32o(row1,row6, x1,x6,bias,shift); \
+         dct_bfly32o(row2,row5, x2,x5,bias,shift); \
+         dct_bfly32o(row3,row4, x3,x4,bias,shift); \
+      }
+
+   __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
+   __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
+   __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
+   __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
+   __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
+   __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
+   __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
+   __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
+
+   // rounding biases in column/row passes, see stbi__idct_block for explanation.
+   __m128i bias_0 = _mm_set1_epi32(512);
+   __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
+
+   // load
+   row0 = _mm_load_si128((const __m128i *) (data + 0*8));
+   row1 = _mm_load_si128((const __m128i *) (data + 1*8));
+   row2 = _mm_load_si128((const __m128i *) (data + 2*8));
+   row3 = _mm_load_si128((const __m128i *) (data + 3*8));
+   row4 = _mm_load_si128((const __m128i *) (data + 4*8));
+   row5 = _mm_load_si128((const __m128i *) (data + 5*8));
+   row6 = _mm_load_si128((const __m128i *) (data + 6*8));
+   row7 = _mm_load_si128((const __m128i *) (data + 7*8));
+
+   // column pass
+   dct_pass(bias_0, 10);
+
+   {
+      // 16bit 8x8 transpose pass 1
+      dct_interleave16(row0, row4);
+      dct_interleave16(row1, row5);
+      dct_interleave16(row2, row6);
+      dct_interleave16(row3, row7);
+
+      // transpose pass 2
+      dct_interleave16(row0, row2);
+      dct_interleave16(row1, row3);
+      dct_interleave16(row4, row6);
+      dct_interleave16(row5, row7);
+
+      // transpose pass 3
+      dct_interleave16(row0, row1);
+      dct_interleave16(row2, row3);
+      dct_interleave16(row4, row5);
+      dct_interleave16(row6, row7);
+   }
+
+   // row pass
+   dct_pass(bias_1, 17);
+
+   {
+      // pack
+      __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
+      __m128i p1 = _mm_packus_epi16(row2, row3);
+      __m128i p2 = _mm_packus_epi16(row4, row5);
+      __m128i p3 = _mm_packus_epi16(row6, row7);
+
+      // 8bit 8x8 transpose pass 1
+      dct_interleave8(p0, p2); // a0e0a1e1...
+      dct_interleave8(p1, p3); // c0g0c1g1...
+
+      // transpose pass 2
+      dct_interleave8(p0, p1); // a0c0e0g0...
+      dct_interleave8(p2, p3); // b0d0f0h0...
+
+      // transpose pass 3
+      dct_interleave8(p0, p2); // a0b0c0d0...
+      dct_interleave8(p1, p3); // a4b4c4d4...
+
+      // store
+      _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
+      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
+   }
+
+#undef dct_const
+#undef dct_rot
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_interleave8
+#undef dct_interleave16
+#undef dct_pass
+}
+
+#endif // STBI_SSE2
+
+#ifdef STBI_NEON
+
+// NEON integer IDCT. should produce bit-identical
+// results to the generic C version.
+static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
+{
+   int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+   int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
+   int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
+   int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
+   int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
+   int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
+   int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
+   int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
+   int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
+   int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
+   int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
+   int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
+   int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
+
+#define dct_long_mul(out, inq, coeff) \
+   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
+
+#define dct_long_mac(out, acc, inq, coeff) \
+   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
+
+#define dct_widen(out, inq) \
+   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
+   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
+
+// wide add
+#define dct_wadd(out, a, b) \
+   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
+
+// wide sub
+#define dct_wsub(out, a, b) \
+   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
+
+// butterfly a/b, then shift using "shiftop" by "s" and pack
+#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
+   { \
+      dct_wadd(sum, a, b); \
+      dct_wsub(dif, a, b); \
+      out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
+      out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
+   }
+
+#define dct_pass(shiftop, shift) \
+   { \
+      /* even part */ \
+      int16x8_t sum26 = vaddq_s16(row2, row6); \
+      dct_long_mul(p1e, sum26, rot0_0); \
+      dct_long_mac(t2e, p1e, row6, rot0_1); \
+      dct_long_mac(t3e, p1e, row2, rot0_2); \
+      int16x8_t sum04 = vaddq_s16(row0, row4); \
+      int16x8_t dif04 = vsubq_s16(row0, row4); \
+      dct_widen(t0e, sum04); \
+      dct_widen(t1e, dif04); \
+      dct_wadd(x0, t0e, t3e); \
+      dct_wsub(x3, t0e, t3e); \
+      dct_wadd(x1, t1e, t2e); \
+      dct_wsub(x2, t1e, t2e); \
+      /* odd part */ \
+      int16x8_t sum15 = vaddq_s16(row1, row5); \
+      int16x8_t sum17 = vaddq_s16(row1, row7); \
+      int16x8_t sum35 = vaddq_s16(row3, row5); \
+      int16x8_t sum37 = vaddq_s16(row3, row7); \
+      int16x8_t sumodd = vaddq_s16(sum17, sum35); \
+      dct_long_mul(p5o, sumodd, rot1_0); \
+      dct_long_mac(p1o, p5o, sum17, rot1_1); \
+      dct_long_mac(p2o, p5o, sum35, rot1_2); \
+      dct_long_mul(p3o, sum37, rot2_0); \
+      dct_long_mul(p4o, sum15, rot2_1); \
+      dct_wadd(sump13o, p1o, p3o); \
+      dct_wadd(sump24o, p2o, p4o); \
+      dct_wadd(sump23o, p2o, p3o); \
+      dct_wadd(sump14o, p1o, p4o); \
+      dct_long_mac(x4, sump13o, row7, rot3_0); \
+      dct_long_mac(x5, sump24o, row5, rot3_1); \
+      dct_long_mac(x6, sump23o, row3, rot3_2); \
+      dct_long_mac(x7, sump14o, row1, rot3_3); \
+      dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
+      dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
+      dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
+      dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
+   }
+
+   // load
+   row0 = vld1q_s16(data + 0*8);
+   row1 = vld1q_s16(data + 1*8);
+   row2 = vld1q_s16(data + 2*8);
+   row3 = vld1q_s16(data + 3*8);
+   row4 = vld1q_s16(data + 4*8);
+   row5 = vld1q_s16(data + 5*8);
+   row6 = vld1q_s16(data + 6*8);
+   row7 = vld1q_s16(data + 7*8);
+
+   // add DC bias
+   row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
+
+   // column pass
+   dct_pass(vrshrn_n_s32, 10);
+
+   // 16bit 8x8 transpose
+   {
+// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
+// whether compilers actually get this is another story, sadly.
+#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
+#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
+
+      // pass 1
+      dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
+      dct_trn16(row2, row3);
+      dct_trn16(row4, row5);
+      dct_trn16(row6, row7);
+
+      // pass 2
+      dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
+      dct_trn32(row1, row3);
+      dct_trn32(row4, row6);
+      dct_trn32(row5, row7);
+
+      // pass 3
+      dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
+      dct_trn64(row1, row5);
+      dct_trn64(row2, row6);
+      dct_trn64(row3, row7);
+
+#undef dct_trn16
+#undef dct_trn32
+#undef dct_trn64
+   }
+
+   // row pass
+   // vrshrn_n_s32 only supports shifts up to 16, we need
+   // 17. so do a non-rounding shift of 16 first then follow
+   // up with a rounding shift by 1.
+   dct_pass(vshrn_n_s32, 16);
+
+   {
+      // pack and round
+      uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
+      uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
+      uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
+      uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
+      uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
+      uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
+      uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
+      uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
+
+      // again, these can translate into one instruction, but often don't.
+#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
+#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
+
+      // sadly can't use interleaved stores here since we only write
+      // 8 bytes to each scan line!
+
+      // 8x8 8-bit transpose pass 1
+      dct_trn8_8(p0, p1);
+      dct_trn8_8(p2, p3);
+      dct_trn8_8(p4, p5);
+      dct_trn8_8(p6, p7);
+
+      // pass 2
+      dct_trn8_16(p0, p2);
+      dct_trn8_16(p1, p3);
+      dct_trn8_16(p4, p6);
+      dct_trn8_16(p5, p7);
+
+      // pass 3
+      dct_trn8_32(p0, p4);
+      dct_trn8_32(p1, p5);
+      dct_trn8_32(p2, p6);
+      dct_trn8_32(p3, p7);
+
+      // store
+      vst1_u8(out, p0); out += out_stride;
+      vst1_u8(out, p1); out += out_stride;
+      vst1_u8(out, p2); out += out_stride;
+      vst1_u8(out, p3); out += out_stride;
+      vst1_u8(out, p4); out += out_stride;
+      vst1_u8(out, p5); out += out_stride;
+      vst1_u8(out, p6); out += out_stride;
+      vst1_u8(out, p7);
+
+#undef dct_trn8_8
+#undef dct_trn8_16
+#undef dct_trn8_32
+   }
+
+#undef dct_long_mul
+#undef dct_long_mac
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_pass
+}
+
+#endif // STBI_NEON
+
+#define STBI__MARKER_none  0xff
+// if there's a pending marker from the entropy stream, return that
+// otherwise, fetch from the stream and get a marker. if there's no
+// marker, return 0xff, which is never a valid marker value
+static stbi_uc stbi__get_marker(stbi__jpeg *j)
+{
+   stbi_uc x;
+   if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
+   x = stbi__get8(j->s);
+   if (x != 0xff) return STBI__MARKER_none;
+   while (x == 0xff)
+      x = stbi__get8(j->s); // consume repeated 0xff fill bytes
+   return x;
+}
+
+// in each scan, we'll have scan_n components, and the order
+// of the components is specified by order[]
+#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
+
+// after a restart interval, stbi__jpeg_reset the entropy decoder and
+// the dc prediction
+static void stbi__jpeg_reset(stbi__jpeg *j)
+{
+   j->code_bits = 0;
+   j->code_buffer = 0;
+   j->nomore = 0;
+   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
+   j->marker = STBI__MARKER_none;
+   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
+   j->eob_run = 0;
+   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
+   // since we don't even allow 1<<30 pixels
+}
+
+static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
+{
+   stbi__jpeg_reset(z);
+   if (!z->progressive) {
+      if (z->scan_n == 1) {
+         int i,j;
+         STBI_SIMD_ALIGN(short, data[64]);
+         int n = z->order[0];
+         // non-interleaved data, we just need to process one block at a time,
+         // in trivial scanline order
+         // number of blocks to do just depends on how many actual "pixels" this
+         // component has, independent of interleaved MCU blocking and such
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               int ha = z->img_comp[n].ha;
+               if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+               // every data block is an MCU, so countdown the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  // if it's NOT a restart, then just bail, so we get corrupt data
+                  // rather than no data
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      } else { // interleaved
+         int i,j,k,x,y;
+         STBI_SIMD_ALIGN(short, data[64]);
+         for (j=0; j < z->img_mcu_y; ++j) {
+            for (i=0; i < z->img_mcu_x; ++i) {
+               // scan an interleaved mcu... process scan_n components in order
+               for (k=0; k < z->scan_n; ++k) {
+                  int n = z->order[k];
+                  // scan out an mcu's worth of this component; that's just determined
+                  // by the basic H and V specified for the component
+                  for (y=0; y < z->img_comp[n].v; ++y) {
+                     for (x=0; x < z->img_comp[n].h; ++x) {
+                        int x2 = (i*z->img_comp[n].h + x)*8;
+                        int y2 = (j*z->img_comp[n].v + y)*8;
+                        int ha = z->img_comp[n].ha;
+                        if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+                        z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
+                     }
+                  }
+               }
+               // after all interleaved components, that's an interleaved MCU,
+               // so now count down the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      }
+   } else {
+      if (z->scan_n == 1) {
+         int i,j;
+         int n = z->order[0];
+         // non-interleaved data, we just need to process one block at a time,
+         // in trivial scanline order
+         // number of blocks to do just depends on how many actual "pixels" this
+         // component has, independent of interleaved MCU blocking and such
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+               if (z->spec_start == 0) {
+                  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+                     return 0;
+               } else {
+                  int ha = z->img_comp[n].ha;
+                  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
+                     return 0;
+               }
+               // every data block is an MCU, so countdown the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      } else { // interleaved
+         int i,j,k,x,y;
+         for (j=0; j < z->img_mcu_y; ++j) {
+            for (i=0; i < z->img_mcu_x; ++i) {
+               // scan an interleaved mcu... process scan_n components in order
+               for (k=0; k < z->scan_n; ++k) {
+                  int n = z->order[k];
+                  // scan out an mcu's worth of this component; that's just determined
+                  // by the basic H and V specified for the component
+                  for (y=0; y < z->img_comp[n].v; ++y) {
+                     for (x=0; x < z->img_comp[n].h; ++x) {
+                        int x2 = (i*z->img_comp[n].h + x);
+                        int y2 = (j*z->img_comp[n].v + y);
+                        short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
+                        if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+                           return 0;
+                     }
+                  }
+               }
+               // after all interleaved components, that's an interleaved MCU,
+               // so now count down the restart interval
+               if (--z->todo <= 0) {
+                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+                  if (!STBI__RESTART(z->marker)) return 1;
+                  stbi__jpeg_reset(z);
+               }
+            }
+         }
+         return 1;
+      }
+   }
+}
+
+static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
+{
+   int i;
+   for (i=0; i < 64; ++i)
+      data[i] *= dequant[i];
+}
+
+static void stbi__jpeg_finish(stbi__jpeg *z)
+{
+   if (z->progressive) {
+      // dequantize and idct the data
+      int i,j,n;
+      for (n=0; n < z->s->img_n; ++n) {
+         int w = (z->img_comp[n].x+7) >> 3;
+         int h = (z->img_comp[n].y+7) >> 3;
+         for (j=0; j < h; ++j) {
+            for (i=0; i < w; ++i) {
+               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+               stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
+               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
+            }
+         }
+      }
+   }
+}
+
+static int stbi__process_marker(stbi__jpeg *z, int m)
+{
+   int L;
+   switch (m) {
+      case STBI__MARKER_none: // no marker found
+         return stbi__err("expected marker","Corrupt JPEG");
+
+      case 0xDD: // DRI - specify restart interval
+         if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
+         z->restart_interval = stbi__get16be(z->s);
+         return 1;
+
+      case 0xDB: // DQT - define quantization table
+         L = stbi__get16be(z->s)-2;
+         while (L > 0) {
+            int q = stbi__get8(z->s);
+            int p = q >> 4, sixteen = (p != 0);
+            int t = q & 15,i;
+            if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
+            if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
+
+            for (i=0; i < 64; ++i)
+               z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
+            L -= (sixteen ? 129 : 65);
+         }
+         return L==0;
+
+      case 0xC4: // DHT - define huffman table
+         L = stbi__get16be(z->s)-2;
+         while (L > 0) {
+            stbi_uc *v;
+            int sizes[16],i,n=0;
+            int q = stbi__get8(z->s);
+            int tc = q >> 4;
+            int th = q & 15;
+            if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
+            for (i=0; i < 16; ++i) {
+               sizes[i] = stbi__get8(z->s);
+               n += sizes[i];
+            }
+            L -= 17;
+            if (tc == 0) {
+               if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
+               v = z->huff_dc[th].values;
+            } else {
+               if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
+               v = z->huff_ac[th].values;
+            }
+            for (i=0; i < n; ++i)
+               v[i] = stbi__get8(z->s);
+            if (tc != 0)
+               stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
+            L -= n;
+         }
+         return L==0;
+   }
+
+   // check for comment block or APP blocks
+   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
+      L = stbi__get16be(z->s);
+      if (L < 2) {
+         if (m == 0xFE)
+            return stbi__err("bad COM len","Corrupt JPEG");
+         else
+            return stbi__err("bad APP len","Corrupt JPEG");
+      }
+      L -= 2;
+
+      if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
+         static const unsigned char tag[5] = {'J','F','I','F','\0'};
+         int ok = 1;
+         int i;
+         for (i=0; i < 5; ++i)
+            if (stbi__get8(z->s) != tag[i])
+               ok = 0;
+         L -= 5;
+         if (ok)
+            z->jfif = 1;
+      } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
+         static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
+         int ok = 1;
+         int i;
+         for (i=0; i < 6; ++i)
+            if (stbi__get8(z->s) != tag[i])
+               ok = 0;
+         L -= 6;
+         if (ok) {
+            stbi__get8(z->s); // version
+            stbi__get16be(z->s); // flags0
+            stbi__get16be(z->s); // flags1
+            z->app14_color_transform = stbi__get8(z->s); // color transform
+            L -= 6;
+         }
+      }
+
+      stbi__skip(z->s, L);
+      return 1;
+   }
+
+   return stbi__err("unknown marker","Corrupt JPEG");
+}
+
+// after we see SOS
+static int stbi__process_scan_header(stbi__jpeg *z)
+{
+   int i;
+   int Ls = stbi__get16be(z->s);
+   z->scan_n = stbi__get8(z->s);
+   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
+   if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
+   for (i=0; i < z->scan_n; ++i) {
+      int id = stbi__get8(z->s), which;
+      int q = stbi__get8(z->s);
+      for (which = 0; which < z->s->img_n; ++which)
+         if (z->img_comp[which].id == id)
+            break;
+      if (which == z->s->img_n) return 0; // no match
+      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
+      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
+      z->order[i] = which;
+   }
+
+   {
+      int aa;
+      z->spec_start = stbi__get8(z->s);
+      z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
+      aa = stbi__get8(z->s);
+      z->succ_high = (aa >> 4);
+      z->succ_low  = (aa & 15);
+      if (z->progressive) {
+         if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
+            return stbi__err("bad SOS", "Corrupt JPEG");
+      } else {
+         if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
+         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
+         z->spec_end = 63;
+      }
+   }
+
+   return 1;
+}
+
+static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
+{
+   int i;
+   for (i=0; i < ncomp; ++i) {
+      if (z->img_comp[i].raw_data) {
+         STBI_FREE(z->img_comp[i].raw_data);
+         z->img_comp[i].raw_data = NULL;
+         z->img_comp[i].data = NULL;
+      }
+      if (z->img_comp[i].raw_coeff) {
+         STBI_FREE(z->img_comp[i].raw_coeff);
+         z->img_comp[i].raw_coeff = 0;
+         z->img_comp[i].coeff = 0;
+      }
+      if (z->img_comp[i].linebuf) {
+         STBI_FREE(z->img_comp[i].linebuf);
+         z->img_comp[i].linebuf = NULL;
+      }
+   }
+   return why;
+}
+
+static int stbi__process_frame_header(stbi__jpeg *z, int scan)
+{
+   stbi__context *s = z->s;
+   int Lf,p,i,q, h_max=1,v_max=1,c;
+   Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
+   p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
+   s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
+   s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
+   c = stbi__get8(s);
+   if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
+   s->img_n = c;
+   for (i=0; i < c; ++i) {
+      z->img_comp[i].data = NULL;
+      z->img_comp[i].linebuf = NULL;
+   }
+
+   if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
+
+   z->rgb = 0;
+   for (i=0; i < s->img_n; ++i) {
+      static const unsigned char rgb[3] = { 'R', 'G', 'B' };
+      z->img_comp[i].id = stbi__get8(s);
+      if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
+         ++z->rgb;
+      q = stbi__get8(s);
+      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
+      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
+      z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
+   }
+
+   if (scan != STBI__SCAN_load) return 1;
+
+   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
+
+   for (i=0; i < s->img_n; ++i) {
+      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
+      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
+   }
+
+   // compute interleaved mcu info
+   z->img_h_max = h_max;
+   z->img_v_max = v_max;
+   z->img_mcu_w = h_max * 8;
+   z->img_mcu_h = v_max * 8;
+   // these sizes can't be more than 17 bits
+   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
+   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
+
+   for (i=0; i < s->img_n; ++i) {
+      // number of effective pixels (e.g. for non-interleaved MCU)
+      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
+      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
+      // to simplify generation, we'll allocate enough memory to decode
+      // the bogus oversized data from using interleaved MCUs and their
+      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
+      // discard the extra data until colorspace conversion
+      //
+      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+      // so these muls can't overflow with 32-bit ints (which we require)
+      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
+      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
+      z->img_comp[i].coeff = 0;
+      z->img_comp[i].raw_coeff = 0;
+      z->img_comp[i].linebuf = NULL;
+      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
+      if (z->img_comp[i].raw_data == NULL)
+         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
+      // align blocks for idct using mmx/sse
+      z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
+      if (z->progressive) {
+         // w2, h2 are multiples of 8 (see above)
+         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+         if (z->img_comp[i].raw_coeff == NULL)
+            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
+         z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
+      }
+   }
+
+   return 1;
+}
+
+// use comparisons since in some cases we handle more than one case (e.g. SOF)
+#define stbi__DNL(x)         ((x) == 0xdc)
+#define stbi__SOI(x)         ((x) == 0xd8)
+#define stbi__EOI(x)         ((x) == 0xd9)
+#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
+#define stbi__SOS(x)         ((x) == 0xda)
+
+#define stbi__SOF_progressive(x)   ((x) == 0xc2)
+
+static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
+{
+   int m;
+   z->jfif = 0;
+   z->app14_color_transform = -1; // valid values are 0,1,2
+   z->marker = STBI__MARKER_none; // initialize cached marker to empty
+   m = stbi__get_marker(z);
+   if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
+   if (scan == STBI__SCAN_type) return 1;
+   m = stbi__get_marker(z);
+   while (!stbi__SOF(m)) {
+      if (!stbi__process_marker(z,m)) return 0;
+      m = stbi__get_marker(z);
+      while (m == STBI__MARKER_none) {
+         // some files have extra padding after their blocks, so ok, we'll scan
+         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
+         m = stbi__get_marker(z);
+      }
+   }
+   z->progressive = stbi__SOF_progressive(m);
+   if (!stbi__process_frame_header(z, scan)) return 0;
+   return 1;
+}
+
+// decode image to YCbCr format
+static int stbi__decode_jpeg_image(stbi__jpeg *j)
+{
+   int m;
+   for (m = 0; m < 4; m++) {
+      j->img_comp[m].raw_data = NULL;
+      j->img_comp[m].raw_coeff = NULL;
+   }
+   j->restart_interval = 0;
+   if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
+   m = stbi__get_marker(j);
+   while (!stbi__EOI(m)) {
+      if (stbi__SOS(m)) {
+         if (!stbi__process_scan_header(j)) return 0;
+         if (!stbi__parse_entropy_coded_data(j)) return 0;
+         if (j->marker == STBI__MARKER_none ) {
+            // handle 0s at the end of image data from IP Kamera 9060
+            while (!stbi__at_eof(j->s)) {
+               int x = stbi__get8(j->s);
+               if (x == 255) {
+                  j->marker = stbi__get8(j->s);
+                  break;
+               }
+            }
+            // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
+         }
+      } else if (stbi__DNL(m)) {
+         int Ld = stbi__get16be(j->s);
+         stbi__uint32 NL = stbi__get16be(j->s);
+         if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
+         if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
+      } else {
+         if (!stbi__process_marker(j, m)) return 0;
+      }
+      m = stbi__get_marker(j);
+   }
+   if (j->progressive)
+      stbi__jpeg_finish(j);
+   return 1;
+}
+
+// static jfif-centered resampling (across block boundaries)
+
+typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
+                                    int w, int hs);
+
+#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
+
+static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   STBI_NOTUSED(out);
+   STBI_NOTUSED(in_far);
+   STBI_NOTUSED(w);
+   STBI_NOTUSED(hs);
+   return in_near;
+}
+
+static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate two samples vertically for every one in input
+   int i;
+   STBI_NOTUSED(hs);
+   for (i=0; i < w; ++i)
+      out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
+   return out;
+}
+
+static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate two samples horizontally for every one in input
+   int i;
+   stbi_uc *input = in_near;
+
+   if (w == 1) {
+      // if only one sample, can't do any interpolation
+      out[0] = out[1] = input[0];
+      return out;
+   }
+
+   out[0] = input[0];
+   out[1] = stbi__div4(input[0]*3 + input[1] + 2);
+   for (i=1; i < w-1; ++i) {
+      int n = 3*input[i]+2;
+      out[i*2+0] = stbi__div4(n+input[i-1]);
+      out[i*2+1] = stbi__div4(n+input[i+1]);
+   }
+   out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
+   out[i*2+1] = input[w-1];
+
+   STBI_NOTUSED(in_far);
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+
+#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
+
+static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate 2x2 samples for every one in input
+   int i,t0,t1;
+   if (w == 1) {
+      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+      return out;
+   }
+
+   t1 = 3*in_near[0] + in_far[0];
+   out[0] = stbi__div4(t1+2);
+   for (i=1; i < w; ++i) {
+      t0 = t1;
+      t1 = 3*in_near[i]+in_far[i];
+      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
+   }
+   out[w*2-1] = stbi__div4(t1+2);
+
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // need to generate 2x2 samples for every one in input
+   int i=0,t0,t1;
+
+   if (w == 1) {
+      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
+      return out;
+   }
+
+   t1 = 3*in_near[0] + in_far[0];
+   // process groups of 8 pixels for as long as we can.
+   // note we can't handle the last pixel in a row in this loop
+   // because we need to handle the filter boundary conditions.
+   for (; i < ((w-1) & ~7); i += 8) {
+#if defined(STBI_SSE2)
+      // load and perform the vertical filtering pass
+      // this uses 3*x + y = 4*x + (y - x)
+      __m128i zero  = _mm_setzero_si128();
+      __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
+      __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
+      __m128i farw  = _mm_unpacklo_epi8(farb, zero);
+      __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
+      __m128i diff  = _mm_sub_epi16(farw, nearw);
+      __m128i nears = _mm_slli_epi16(nearw, 2);
+      __m128i curr  = _mm_add_epi16(nears, diff); // current row
+
+      // horizontal filter works the same based on shifted vers of current
+      // row. "prev" is current row shifted right by 1 pixel; we need to
+      // insert the previous pixel value (from t1).
+      // "next" is current row shifted left by 1 pixel, with first pixel
+      // of next block of 8 pixels added in.
+      __m128i prv0 = _mm_slli_si128(curr, 2);
+      __m128i nxt0 = _mm_srli_si128(curr, 2);
+      __m128i prev = _mm_insert_epi16(prv0, t1, 0);
+      __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
+
+      // horizontal filter, polyphase implementation since it's convenient:
+      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
+      // note the shared term.
+      __m128i bias  = _mm_set1_epi16(8);
+      __m128i curs = _mm_slli_epi16(curr, 2);
+      __m128i prvd = _mm_sub_epi16(prev, curr);
+      __m128i nxtd = _mm_sub_epi16(next, curr);
+      __m128i curb = _mm_add_epi16(curs, bias);
+      __m128i even = _mm_add_epi16(prvd, curb);
+      __m128i odd  = _mm_add_epi16(nxtd, curb);
+
+      // interleave even and odd pixels, then undo scaling.
+      __m128i int0 = _mm_unpacklo_epi16(even, odd);
+      __m128i int1 = _mm_unpackhi_epi16(even, odd);
+      __m128i de0  = _mm_srli_epi16(int0, 4);
+      __m128i de1  = _mm_srli_epi16(int1, 4);
+
+      // pack and write output
+      __m128i outv = _mm_packus_epi16(de0, de1);
+      _mm_storeu_si128((__m128i *) (out + i*2), outv);
+#elif defined(STBI_NEON)
+      // load and perform the vertical filtering pass
+      // this uses 3*x + y = 4*x + (y - x)
+      uint8x8_t farb  = vld1_u8(in_far + i);
+      uint8x8_t nearb = vld1_u8(in_near + i);
+      int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
+      int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
+      int16x8_t curr  = vaddq_s16(nears, diff); // current row
+
+      // horizontal filter works the same based on shifted vers of current
+      // row. "prev" is current row shifted right by 1 pixel; we need to
+      // insert the previous pixel value (from t1).
+      // "next" is current row shifted left by 1 pixel, with first pixel
+      // of next block of 8 pixels added in.
+      int16x8_t prv0 = vextq_s16(curr, curr, 7);
+      int16x8_t nxt0 = vextq_s16(curr, curr, 1);
+      int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
+      int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
+
+      // horizontal filter, polyphase implementation since it's convenient:
+      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
+      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
+      // note the shared term.
+      int16x8_t curs = vshlq_n_s16(curr, 2);
+      int16x8_t prvd = vsubq_s16(prev, curr);
+      int16x8_t nxtd = vsubq_s16(next, curr);
+      int16x8_t even = vaddq_s16(curs, prvd);
+      int16x8_t odd  = vaddq_s16(curs, nxtd);
+
+      // undo scaling and round, then store with even/odd phases interleaved
+      uint8x8x2_t o;
+      o.val[0] = vqrshrun_n_s16(even, 4);
+      o.val[1] = vqrshrun_n_s16(odd,  4);
+      vst2_u8(out + i*2, o);
+#endif
+
+      // "previous" value for next iter
+      t1 = 3*in_near[i+7] + in_far[i+7];
+   }
+
+   t0 = t1;
+   t1 = 3*in_near[i] + in_far[i];
+   out[i*2] = stbi__div16(3*t1 + t0 + 8);
+
+   for (++i; i < w; ++i) {
+      t0 = t1;
+      t1 = 3*in_near[i]+in_far[i];
+      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
+      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
+   }
+   out[w*2-1] = stbi__div4(t1+2);
+
+   STBI_NOTUSED(hs);
+
+   return out;
+}
+#endif
+
+static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
+{
+   // resample with nearest-neighbor
+   int i,j;
+   STBI_NOTUSED(in_far);
+   for (i=0; i < w; ++i)
+      for (j=0; j < hs; ++j)
+         out[i*hs+j] = in_near[i];
+   return out;
+}
+
+// this is a reduced-precision calculation of YCbCr-to-RGB introduced
+// to make sure the code produces the same results in both SIMD and scalar
+#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
+static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
+{
+   int i;
+   for (i=0; i < count; ++i) {
+      int y_fixed = (y[i] << 20) + (1<<19); // rounding
+      int r,g,b;
+      int cr = pcr[i] - 128;
+      int cb = pcb[i] - 128;
+      r = y_fixed +  cr* stbi__float2fixed(1.40200f);
+      g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
+      r >>= 20;
+      g >>= 20;
+      b >>= 20;
+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+      out[0] = (stbi_uc)r;
+      out[1] = (stbi_uc)g;
+      out[2] = (stbi_uc)b;
+      out[3] = 255;
+      out += step;
+   }
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
+{
+   int i = 0;
+
+#ifdef STBI_SSE2
+   // step == 3 is pretty ugly on the final interleave, and i'm not convinced
+   // it's useful in practice (you wouldn't use it for textures, for example).
+   // so just accelerate step == 4 case.
+   if (step == 4) {
+      // this is a fairly straightforward implementation and not super-optimized.
+      __m128i signflip  = _mm_set1_epi8(-0x80);
+      __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
+      __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
+      __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
+      __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
+      __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
+      __m128i xw = _mm_set1_epi16(255); // alpha channel
+
+      for (; i+7 < count; i += 8) {
+         // load
+         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
+         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
+         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
+         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
+         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
+
+         // unpack to short (and left-shift cr, cb by 8)
+         __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
+         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
+         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
+
+         // color transform
+         __m128i yws = _mm_srli_epi16(yw, 4);
+         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
+         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
+         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
+         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
+         __m128i rws = _mm_add_epi16(cr0, yws);
+         __m128i gwt = _mm_add_epi16(cb0, yws);
+         __m128i bws = _mm_add_epi16(yws, cb1);
+         __m128i gws = _mm_add_epi16(gwt, cr1);
+
+         // descale
+         __m128i rw = _mm_srai_epi16(rws, 4);
+         __m128i bw = _mm_srai_epi16(bws, 4);
+         __m128i gw = _mm_srai_epi16(gws, 4);
+
+         // back to byte, set up for transpose
+         __m128i brb = _mm_packus_epi16(rw, bw);
+         __m128i gxb = _mm_packus_epi16(gw, xw);
+
+         // transpose to interleave channels
+         __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
+         __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
+         __m128i o0 = _mm_unpacklo_epi16(t0, t1);
+         __m128i o1 = _mm_unpackhi_epi16(t0, t1);
+
+         // store
+         _mm_storeu_si128((__m128i *) (out + 0), o0);
+         _mm_storeu_si128((__m128i *) (out + 16), o1);
+         out += 32;
+      }
+   }
+#endif
+
+#ifdef STBI_NEON
+   // in this version, step=3 support would be easy to add. but is there demand?
+   if (step == 4) {
+      // this is a fairly straightforward implementation and not super-optimized.
+      uint8x8_t signflip = vdup_n_u8(0x80);
+      int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
+      int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
+      int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
+      int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
+
+      for (; i+7 < count; i += 8) {
+         // load
+         uint8x8_t y_bytes  = vld1_u8(y + i);
+         uint8x8_t cr_bytes = vld1_u8(pcr + i);
+         uint8x8_t cb_bytes = vld1_u8(pcb + i);
+         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
+         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
+
+         // expand to s16
+         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
+         int16x8_t crw = vshll_n_s8(cr_biased, 7);
+         int16x8_t cbw = vshll_n_s8(cb_biased, 7);
+
+         // color transform
+         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
+         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
+         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
+         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
+         int16x8_t rws = vaddq_s16(yws, cr0);
+         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
+         int16x8_t bws = vaddq_s16(yws, cb1);
+
+         // undo scaling, round, convert to byte
+         uint8x8x4_t o;
+         o.val[0] = vqrshrun_n_s16(rws, 4);
+         o.val[1] = vqrshrun_n_s16(gws, 4);
+         o.val[2] = vqrshrun_n_s16(bws, 4);
+         o.val[3] = vdup_n_u8(255);
+
+         // store, interleaving r/g/b/a
+         vst4_u8(out, o);
+         out += 8*4;
+      }
+   }
+#endif
+
+   for (; i < count; ++i) {
+      int y_fixed = (y[i] << 20) + (1<<19); // rounding
+      int r,g,b;
+      int cr = pcr[i] - 128;
+      int cb = pcb[i] - 128;
+      r = y_fixed + cr* stbi__float2fixed(1.40200f);
+      g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
+      b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
+      r >>= 20;
+      g >>= 20;
+      b >>= 20;
+      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
+      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
+      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
+      out[0] = (stbi_uc)r;
+      out[1] = (stbi_uc)g;
+      out[2] = (stbi_uc)b;
+      out[3] = 255;
+      out += step;
+   }
+}
+#endif
+
+// set up the kernels
+static void stbi__setup_jpeg(stbi__jpeg *j)
+{
+   j->idct_block_kernel = stbi__idct_block;
+   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
+   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
+
+#ifdef STBI_SSE2
+   if (stbi__sse2_available()) {
+      j->idct_block_kernel = stbi__idct_simd;
+      j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+      j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+   }
+#endif
+
+#ifdef STBI_NEON
+   j->idct_block_kernel = stbi__idct_simd;
+   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+#endif
+}
+
+// clean up the temporary component buffers
+static void stbi__cleanup_jpeg(stbi__jpeg *j)
+{
+   stbi__free_jpeg_components(j, j->s->img_n, 0);
+}
+
+typedef struct
+{
+   resample_row_func resample;
+   stbi_uc *line0,*line1;
+   int hs,vs;   // expansion factor in each axis
+   int w_lores; // horizontal pixels pre-expansion
+   int ystep;   // how far through vertical expansion we are
+   int ypos;    // which pre-expansion row we're on
+} stbi__resample;
+
+// fast 0..255 * 0..255 => 0..255 rounded multiplication
+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
+{
+   unsigned int t = x*y + 128;
+   return (stbi_uc) ((t + (t >>8)) >> 8);
+}
+
+static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
+{
+   int n, decode_n, is_rgb;
+   z->s->img_n = 0; // make stbi__cleanup_jpeg safe
+
+   // validate req_comp
+   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+
+   // load a jpeg image from whichever source, but leave in YCbCr format
+   if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
+
+   // determine actual number of components to generate
+   n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
+
+   is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
+
+   if (z->s->img_n == 3 && n < 3 && !is_rgb)
+      decode_n = 1;
+   else
+      decode_n = z->s->img_n;
+
+   // resample and color-convert
+   {
+      int k;
+      unsigned int i,j;
+      stbi_uc *output;
+      stbi_uc *coutput[4];
+
+      stbi__resample res_comp[4];
+
+      for (k=0; k < decode_n; ++k) {
+         stbi__resample *r = &res_comp[k];
+
+         // allocate line buffer big enough for upsampling off the edges
+         // with upsample factor of 4
+         z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
+         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+         r->hs      = z->img_h_max / z->img_comp[k].h;
+         r->vs      = z->img_v_max / z->img_comp[k].v;
+         r->ystep   = r->vs >> 1;
+         r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
+         r->ypos    = 0;
+         r->line0   = r->line1 = z->img_comp[k].data;
+
+         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
+         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
+         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
+         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
+         else                               r->resample = stbi__resample_row_generic;
+      }
+
+      // can't error after this so, this is safe
+      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
+      if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+      // now go ahead and resample
+      for (j=0; j < z->s->img_y; ++j) {
+         stbi_uc *out = output + n * z->s->img_x * j;
+         for (k=0; k < decode_n; ++k) {
+            stbi__resample *r = &res_comp[k];
+            int y_bot = r->ystep >= (r->vs >> 1);
+            coutput[k] = r->resample(z->img_comp[k].linebuf,
+                                     y_bot ? r->line1 : r->line0,
+                                     y_bot ? r->line0 : r->line1,
+                                     r->w_lores, r->hs);
+            if (++r->ystep >= r->vs) {
+               r->ystep = 0;
+               r->line0 = r->line1;
+               if (++r->ypos < z->img_comp[k].y)
+                  r->line1 += z->img_comp[k].w2;
+            }
+         }
+         if (n >= 3) {
+            stbi_uc *y = coutput[0];
+            if (z->s->img_n == 3) {
+               if (is_rgb) {
+                  for (i=0; i < z->s->img_x; ++i) {
+                     out[0] = y[i];
+                     out[1] = coutput[1][i];
+                     out[2] = coutput[2][i];
+                     out[3] = 255;
+                     out += n;
+                  }
+               } else {
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+               }
+            } else if (z->s->img_n == 4) {
+               if (z->app14_color_transform == 0) { // CMYK
+                  for (i=0; i < z->s->img_x; ++i) {
+                     stbi_uc m = coutput[3][i];
+                     out[0] = stbi__blinn_8x8(coutput[0][i], m);
+                     out[1] = stbi__blinn_8x8(coutput[1][i], m);
+                     out[2] = stbi__blinn_8x8(coutput[2][i], m);
+                     out[3] = 255;
+                     out += n;
+                  }
+               } else if (z->app14_color_transform == 2) { // YCCK
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+                  for (i=0; i < z->s->img_x; ++i) {
+                     stbi_uc m = coutput[3][i];
+                     out[0] = stbi__blinn_8x8(255 - out[0], m);
+                     out[1] = stbi__blinn_8x8(255 - out[1], m);
+                     out[2] = stbi__blinn_8x8(255 - out[2], m);
+                     out += n;
+                  }
+               } else { // YCbCr + alpha?  Ignore the fourth channel for now
+                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+               }
+            } else
+               for (i=0; i < z->s->img_x; ++i) {
+                  out[0] = out[1] = out[2] = y[i];
+                  out[3] = 255; // not used if n==3
+                  out += n;
+               }
+         } else {
+            if (is_rgb) {
+               if (n == 1)
+                  for (i=0; i < z->s->img_x; ++i)
+                     *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+               else {
+                  for (i=0; i < z->s->img_x; ++i, out += 2) {
+                     out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+                     out[1] = 255;
+                  }
+               }
+            } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
+               for (i=0; i < z->s->img_x; ++i) {
+                  stbi_uc m = coutput[3][i];
+                  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
+                  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
+                  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
+                  out[0] = stbi__compute_y(r, g, b);
+                  out[1] = 255;
+                  out += n;
+               }
+            } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
+               for (i=0; i < z->s->img_x; ++i) {
+                  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
+                  out[1] = 255;
+                  out += n;
+               }
+            } else {
+               stbi_uc *y = coutput[0];
+               if (n == 1)
+                  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
+               else
+                  for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
+            }
+         }
+      }
+      stbi__cleanup_jpeg(z);
+      *out_x = z->s->img_x;
+      *out_y = z->s->img_y;
+      if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
+      return output;
+   }
+}
+
+static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   unsigned char* result;
+   stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
+   STBI_NOTUSED(ri);
+   j->s = s;
+   stbi__setup_jpeg(j);
+   result = load_jpeg_image(j, x,y,comp,req_comp);
+   STBI_FREE(j);
+   return result;
+}
+
+static int stbi__jpeg_test(stbi__context *s)
+{
+   int r;
+   stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+   j->s = s;
+   stbi__setup_jpeg(j);
+   r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
+   stbi__rewind(s);
+   STBI_FREE(j);
+   return r;
+}
+
+static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
+{
+   if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
+      stbi__rewind( j->s );
+      return 0;
+   }
+   if (x) *x = j->s->img_x;
+   if (y) *y = j->s->img_y;
+   if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
+   return 1;
+}
+
+static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int result;
+   stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
+   j->s = s;
+   result = stbi__jpeg_info_raw(j, x, y, comp);
+   STBI_FREE(j);
+   return result;
+}
+#endif
+
+// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
+//    simple implementation
+//      - all input must be provided in an upfront buffer
+//      - all output is written to a single output buffer (can malloc/realloc)
+//    performance
+//      - fast huffman
+
+#ifndef STBI_NO_ZLIB
+
+// fast-way is faster to check than jpeg huffman, but slow way is slower
+#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
+#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
+
+// zlib-style huffman encoding
+// (jpegs packs from left, zlib from right, so can't share code)
+typedef struct
+{
+   stbi__uint16 fast[1 << STBI__ZFAST_BITS];
+   stbi__uint16 firstcode[16];
+   int maxcode[17];
+   stbi__uint16 firstsymbol[16];
+   stbi_uc  size[288];
+   stbi__uint16 value[288];
+} stbi__zhuffman;
+
+stbi_inline static int stbi__bitreverse16(int n)
+{
+  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
+  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
+  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
+  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
+  return n;
+}
+
+stbi_inline static int stbi__bit_reverse(int v, int bits)
+{
+   STBI_ASSERT(bits <= 16);
+   // to bit reverse n bits, reverse 16 and shift
+   // e.g. 11 bits, bit reverse and shift away 5
+   return stbi__bitreverse16(v) >> (16-bits);
+}
+
+static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
+{
+   int i,k=0;
+   int code, next_code[16], sizes[17];
+
+   // DEFLATE spec for generating codes
+   memset(sizes, 0, sizeof(sizes));
+   memset(z->fast, 0, sizeof(z->fast));
+   for (i=0; i < num; ++i)
+      ++sizes[sizelist[i]];
+   sizes[0] = 0;
+   for (i=1; i < 16; ++i)
+      if (sizes[i] > (1 << i))
+         return stbi__err("bad sizes", "Corrupt PNG");
+   code = 0;
+   for (i=1; i < 16; ++i) {
+      next_code[i] = code;
+      z->firstcode[i] = (stbi__uint16) code;
+      z->firstsymbol[i] = (stbi__uint16) k;
+      code = (code + sizes[i]);
+      if (sizes[i])
+         if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
+      z->maxcode[i] = code << (16-i); // preshift for inner loop
+      code <<= 1;
+      k += sizes[i];
+   }
+   z->maxcode[16] = 0x10000; // sentinel
+   for (i=0; i < num; ++i) {
+      int s = sizelist[i];
+      if (s) {
+         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
+         stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
+         z->size [c] = (stbi_uc     ) s;
+         z->value[c] = (stbi__uint16) i;
+         if (s <= STBI__ZFAST_BITS) {
+            int j = stbi__bit_reverse(next_code[s],s);
+            while (j < (1 << STBI__ZFAST_BITS)) {
+               z->fast[j] = fastv;
+               j += (1 << s);
+            }
+         }
+         ++next_code[s];
+      }
+   }
+   return 1;
+}
+
+// zlib-from-memory implementation for PNG reading
+//    because PNG allows splitting the zlib stream arbitrarily,
+//    and it's annoying structurally to have PNG call ZLIB call PNG,
+//    we require PNG read all the IDATs and combine them into a single
+//    memory buffer
+
+typedef struct
+{
+   stbi_uc *zbuffer, *zbuffer_end;
+   int num_bits;
+   stbi__uint32 code_buffer;
+
+   char *zout;
+   char *zout_start;
+   char *zout_end;
+   int   z_expandable;
+
+   stbi__zhuffman z_length, z_distance;
+} stbi__zbuf;
+
+stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
+{
+   if (z->zbuffer >= z->zbuffer_end) return 0;
+   return *z->zbuffer++;
+}
+
+static void stbi__fill_bits(stbi__zbuf *z)
+{
+   do {
+      STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
+      z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
+      z->num_bits += 8;
+   } while (z->num_bits <= 24);
+}
+
+stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
+{
+   unsigned int k;
+   if (z->num_bits < n) stbi__fill_bits(z);
+   k = z->code_buffer & ((1 << n) - 1);
+   z->code_buffer >>= n;
+   z->num_bits -= n;
+   return k;
+}
+
+static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
+{
+   int b,s,k;
+   // not resolved by fast table, so compute it the slow way
+   // use jpeg approach, which requires MSbits at top
+   k = stbi__bit_reverse(a->code_buffer, 16);
+   for (s=STBI__ZFAST_BITS+1; ; ++s)
+      if (k < z->maxcode[s])
+         break;
+   if (s == 16) return -1; // invalid code!
+   // code size is s, so:
+   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
+   STBI_ASSERT(z->size[b] == s);
+   a->code_buffer >>= s;
+   a->num_bits -= s;
+   return z->value[b];
+}
+
+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
+{
+   int b,s;
+   if (a->num_bits < 16) stbi__fill_bits(a);
+   b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
+   if (b) {
+      s = b >> 9;
+      a->code_buffer >>= s;
+      a->num_bits -= s;
+      return b & 511;
+   }
+   return stbi__zhuffman_decode_slowpath(a, z);
+}
+
+static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
+{
+   char *q;
+   int cur, limit, old_limit;
+   z->zout = zout;
+   if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
+   cur   = (int) (z->zout     - z->zout_start);
+   limit = old_limit = (int) (z->zout_end - z->zout_start);
+   while (cur + n > limit)
+      limit *= 2;
+   q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
+   STBI_NOTUSED(old_limit);
+   if (q == NULL) return stbi__err("outofmem", "Out of memory");
+   z->zout_start = q;
+   z->zout       = q + cur;
+   z->zout_end   = q + limit;
+   return 1;
+}
+
+static const int stbi__zlength_base[31] = {
+   3,4,5,6,7,8,9,10,11,13,
+   15,17,19,23,27,31,35,43,51,59,
+   67,83,99,115,131,163,195,227,258,0,0 };
+
+static const int stbi__zlength_extra[31]=
+{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+
+static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
+
+static const int stbi__zdist_extra[32] =
+{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+static int stbi__parse_huffman_block(stbi__zbuf *a)
+{
+   char *zout = a->zout;
+   for(;;) {
+      int z = stbi__zhuffman_decode(a, &a->z_length);
+      if (z < 256) {
+         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
+         if (zout >= a->zout_end) {
+            if (!stbi__zexpand(a, zout, 1)) return 0;
+            zout = a->zout;
+         }
+         *zout++ = (char) z;
+      } else {
+         stbi_uc *p;
+         int len,dist;
+         if (z == 256) {
+            a->zout = zout;
+            return 1;
+         }
+         z -= 257;
+         len = stbi__zlength_base[z];
+         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
+         z = stbi__zhuffman_decode(a, &a->z_distance);
+         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
+         dist = stbi__zdist_base[z];
+         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
+         if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
+         if (zout + len > a->zout_end) {
+            if (!stbi__zexpand(a, zout, len)) return 0;
+            zout = a->zout;
+         }
+         p = (stbi_uc *) (zout - dist);
+         if (dist == 1) { // run of one byte; common in images.
+            stbi_uc v = *p;
+            if (len) { do *zout++ = v; while (--len); }
+         } else {
+            if (len) { do *zout++ = *p++; while (--len); }
+         }
+      }
+   }
+}
+
+static int stbi__compute_huffman_codes(stbi__zbuf *a)
+{
+   static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+   stbi__zhuffman z_codelength;
+   stbi_uc lencodes[286+32+137];//padding for maximum single op
+   stbi_uc codelength_sizes[19];
+   int i,n;
+
+   int hlit  = stbi__zreceive(a,5) + 257;
+   int hdist = stbi__zreceive(a,5) + 1;
+   int hclen = stbi__zreceive(a,4) + 4;
+   int ntot  = hlit + hdist;
+
+   memset(codelength_sizes, 0, sizeof(codelength_sizes));
+   for (i=0; i < hclen; ++i) {
+      int s = stbi__zreceive(a,3);
+      codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
+   }
+   if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
+
+   n = 0;
+   while (n < ntot) {
+      int c = stbi__zhuffman_decode(a, &z_codelength);
+      if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
+      if (c < 16)
+         lencodes[n++] = (stbi_uc) c;
+      else {
+         stbi_uc fill = 0;
+         if (c == 16) {
+            c = stbi__zreceive(a,2)+3;
+            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
+            fill = lencodes[n-1];
+         } else if (c == 17)
+            c = stbi__zreceive(a,3)+3;
+         else {
+            STBI_ASSERT(c == 18);
+            c = stbi__zreceive(a,7)+11;
+         }
+         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
+         memset(lencodes+n, fill, c);
+         n += c;
+      }
+   }
+   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
+   if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
+   if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
+   return 1;
+}
+
+static int stbi__parse_uncompressed_block(stbi__zbuf *a)
+{
+   stbi_uc header[4];
+   int len,nlen,k;
+   if (a->num_bits & 7)
+      stbi__zreceive(a, a->num_bits & 7); // discard
+   // drain the bit-packed data into header
+   k = 0;
+   while (a->num_bits > 0) {
+      header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
+      a->code_buffer >>= 8;
+      a->num_bits -= 8;
+   }
+   STBI_ASSERT(a->num_bits == 0);
+   // now fill header the normal way
+   while (k < 4)
+      header[k++] = stbi__zget8(a);
+   len  = header[1] * 256 + header[0];
+   nlen = header[3] * 256 + header[2];
+   if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
+   if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
+   if (a->zout + len > a->zout_end)
+      if (!stbi__zexpand(a, a->zout, len)) return 0;
+   memcpy(a->zout, a->zbuffer, len);
+   a->zbuffer += len;
+   a->zout += len;
+   return 1;
+}
+
+static int stbi__parse_zlib_header(stbi__zbuf *a)
+{
+   int cmf   = stbi__zget8(a);
+   int cm    = cmf & 15;
+   /* int cinfo = cmf >> 4; */
+   int flg   = stbi__zget8(a);
+   if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
+   if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
+   if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
+   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
+   return 1;
+}
+
+static const stbi_uc stbi__zdefault_length[288] =
+{
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
+};
+static const stbi_uc stbi__zdefault_distance[32] =
+{
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
+};
+/*
+Init algorithm:
+{
+   int i;   // use <= to match clearly with spec
+   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
+   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
+   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
+   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
+
+   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
+}
+*/
+
+static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
+{
+   int final, type;
+   if (parse_header)
+      if (!stbi__parse_zlib_header(a)) return 0;
+   a->num_bits = 0;
+   a->code_buffer = 0;
+   do {
+      final = stbi__zreceive(a,1);
+      type = stbi__zreceive(a,2);
+      if (type == 0) {
+         if (!stbi__parse_uncompressed_block(a)) return 0;
+      } else if (type == 3) {
+         return 0;
+      } else {
+         if (type == 1) {
+            // use fixed code lengths
+            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
+            if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
+         } else {
+            if (!stbi__compute_huffman_codes(a)) return 0;
+         }
+         if (!stbi__parse_huffman_block(a)) return 0;
+      }
+   } while (!final);
+   return 1;
+}
+
+static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
+{
+   a->zout_start = obuf;
+   a->zout       = obuf;
+   a->zout_end   = obuf + olen;
+   a->z_expandable = exp;
+
+   return stbi__parse_zlib(a, parse_header);
+}
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(initial_size);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer + len;
+   if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
+{
+   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
+}
+
+STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(initial_size);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer + len;
+   if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
+{
+   stbi__zbuf a;
+   a.zbuffer = (stbi_uc *) ibuffer;
+   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
+   if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
+      return (int) (a.zout - a.zout_start);
+   else
+      return -1;
+}
+
+STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
+{
+   stbi__zbuf a;
+   char *p = (char *) stbi__malloc(16384);
+   if (p == NULL) return NULL;
+   a.zbuffer = (stbi_uc *) buffer;
+   a.zbuffer_end = (stbi_uc *) buffer+len;
+   if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
+      if (outlen) *outlen = (int) (a.zout - a.zout_start);
+      return a.zout_start;
+   } else {
+      STBI_FREE(a.zout_start);
+      return NULL;
+   }
+}
+
+STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
+{
+   stbi__zbuf a;
+   a.zbuffer = (stbi_uc *) ibuffer;
+   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
+   if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
+      return (int) (a.zout - a.zout_start);
+   else
+      return -1;
+}
+#endif
+
+// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
+//    simple implementation
+//      - only 8-bit samples
+//      - no CRC checking
+//      - allocates lots of intermediate memory
+//        - avoids problem of streaming data between subsystems
+//        - avoids explicit window management
+//    performance
+//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
+
+#ifndef STBI_NO_PNG
+typedef struct
+{
+   stbi__uint32 length;
+   stbi__uint32 type;
+} stbi__pngchunk;
+
+static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
+{
+   stbi__pngchunk c;
+   c.length = stbi__get32be(s);
+   c.type   = stbi__get32be(s);
+   return c;
+}
+
+static int stbi__check_png_header(stbi__context *s)
+{
+   static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
+   int i;
+   for (i=0; i < 8; ++i)
+      if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
+   return 1;
+}
+
+typedef struct
+{
+   stbi__context *s;
+   stbi_uc *idata, *expanded, *out;
+   int depth;
+} stbi__png;
+
+
+enum {
+   STBI__F_none=0,
+   STBI__F_sub=1,
+   STBI__F_up=2,
+   STBI__F_avg=3,
+   STBI__F_paeth=4,
+   // synthetic filters used for first scanline to avoid needing a dummy row of 0s
+   STBI__F_avg_first,
+   STBI__F_paeth_first
+};
+
+static stbi_uc first_row_filter[5] =
+{
+   STBI__F_none,
+   STBI__F_sub,
+   STBI__F_none,
+   STBI__F_avg_first,
+   STBI__F_paeth_first
+};
+
+static int stbi__paeth(int a, int b, int c)
+{
+   int p = a + b - c;
+   int pa = abs(p-a);
+   int pb = abs(p-b);
+   int pc = abs(p-c);
+   if (pa <= pb && pa <= pc) return a;
+   if (pb <= pc) return b;
+   return c;
+}
+
+static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+
+// create the png data from post-deflated data
+static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
+{
+   int bytes = (depth == 16? 2 : 1);
+   stbi__context *s = a->s;
+   stbi__uint32 i,j,stride = x*out_n*bytes;
+   stbi__uint32 img_len, img_width_bytes;
+   int k;
+   int img_n = s->img_n; // copy it into a local for later
+
+   int output_bytes = out_n*bytes;
+   int filter_bytes = img_n*bytes;
+   int width = x;
+
+   STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
+   a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
+   if (!a->out) return stbi__err("outofmem", "Out of memory");
+
+   if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
+   img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+   img_len = (img_width_bytes + 1) * y;
+
+   // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
+   // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
+   // so just check for raw_len < img_len always.
+   if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
+
+   for (j=0; j < y; ++j) {
+      stbi_uc *cur = a->out + stride*j;
+      stbi_uc *prior;
+      int filter = *raw++;
+
+      if (filter > 4)
+         return stbi__err("invalid filter","Corrupt PNG");
+
+      if (depth < 8) {
+         STBI_ASSERT(img_width_bytes <= x);
+         cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
+         filter_bytes = 1;
+         width = img_width_bytes;
+      }
+      prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
+
+      // if first row, use special filter that doesn't sample previous row
+      if (j == 0) filter = first_row_filter[filter];
+
+      // handle first byte explicitly
+      for (k=0; k < filter_bytes; ++k) {
+         switch (filter) {
+            case STBI__F_none       : cur[k] = raw[k]; break;
+            case STBI__F_sub        : cur[k] = raw[k]; break;
+            case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
+            case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
+            case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
+            case STBI__F_avg_first  : cur[k] = raw[k]; break;
+            case STBI__F_paeth_first: cur[k] = raw[k]; break;
+         }
+      }
+
+      if (depth == 8) {
+         if (img_n != out_n)
+            cur[img_n] = 255; // first pixel
+         raw += img_n;
+         cur += out_n;
+         prior += out_n;
+      } else if (depth == 16) {
+         if (img_n != out_n) {
+            cur[filter_bytes]   = 255; // first pixel top byte
+            cur[filter_bytes+1] = 255; // first pixel bottom byte
+         }
+         raw += filter_bytes;
+         cur += output_bytes;
+         prior += output_bytes;
+      } else {
+         raw += 1;
+         cur += 1;
+         prior += 1;
+      }
+
+      // this is a little gross, so that we don't switch per-pixel or per-component
+      if (depth < 8 || img_n == out_n) {
+         int nk = (width - 1)*filter_bytes;
+         #define STBI__CASE(f) \
+             case f:     \
+                for (k=0; k < nk; ++k)
+         switch (filter) {
+            // "none" filter turns into a memcpy here; make that explicit.
+            case STBI__F_none:         memcpy(cur, raw, nk); break;
+            STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
+            STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+            STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
+            STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
+            STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
+            STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
+         }
+         #undef STBI__CASE
+         raw += nk;
+      } else {
+         STBI_ASSERT(img_n+1 == out_n);
+         #define STBI__CASE(f) \
+             case f:     \
+                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
+                   for (k=0; k < filter_bytes; ++k)
+         switch (filter) {
+            STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
+            STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
+            STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+            STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
+            STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
+            STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
+            STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
+         }
+         #undef STBI__CASE
+
+         // the loop above sets the high byte of the pixels' alpha, but for
+         // 16 bit png files we also need the low byte set. we'll do that here.
+         if (depth == 16) {
+            cur = a->out + stride*j; // start at the beginning of the row again
+            for (i=0; i < x; ++i,cur+=output_bytes) {
+               cur[filter_bytes+1] = 255;
+            }
+         }
+      }
+   }
+
+   // we make a separate pass to expand bits to pixels; for performance,
+   // this could run two scanlines behind the above code, so it won't
+   // intefere with filtering but will still be in the cache.
+   if (depth < 8) {
+      for (j=0; j < y; ++j) {
+         stbi_uc *cur = a->out + stride*j;
+         stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
+         // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
+         // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
+         stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+
+         // note that the final byte might overshoot and write more data than desired.
+         // we can allocate enough data that this never writes out of memory, but it
+         // could also overwrite the next scanline. can it overwrite non-empty data
+         // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
+         // so we need to explicitly clamp the final ones
+
+         if (depth == 4) {
+            for (k=x*img_n; k >= 2; k-=2, ++in) {
+               *cur++ = scale * ((*in >> 4)       );
+               *cur++ = scale * ((*in     ) & 0x0f);
+            }
+            if (k > 0) *cur++ = scale * ((*in >> 4)       );
+         } else if (depth == 2) {
+            for (k=x*img_n; k >= 4; k-=4, ++in) {
+               *cur++ = scale * ((*in >> 6)       );
+               *cur++ = scale * ((*in >> 4) & 0x03);
+               *cur++ = scale * ((*in >> 2) & 0x03);
+               *cur++ = scale * ((*in     ) & 0x03);
+            }
+            if (k > 0) *cur++ = scale * ((*in >> 6)       );
+            if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
+            if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
+         } else if (depth == 1) {
+            for (k=x*img_n; k >= 8; k-=8, ++in) {
+               *cur++ = scale * ((*in >> 7)       );
+               *cur++ = scale * ((*in >> 6) & 0x01);
+               *cur++ = scale * ((*in >> 5) & 0x01);
+               *cur++ = scale * ((*in >> 4) & 0x01);
+               *cur++ = scale * ((*in >> 3) & 0x01);
+               *cur++ = scale * ((*in >> 2) & 0x01);
+               *cur++ = scale * ((*in >> 1) & 0x01);
+               *cur++ = scale * ((*in     ) & 0x01);
+            }
+            if (k > 0) *cur++ = scale * ((*in >> 7)       );
+            if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
+            if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
+            if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
+            if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
+            if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
+            if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
+         }
+         if (img_n != out_n) {
+            int q;
+            // insert alpha = 255
+            cur = a->out + stride*j;
+            if (img_n == 1) {
+               for (q=x-1; q >= 0; --q) {
+                  cur[q*2+1] = 255;
+                  cur[q*2+0] = cur[q];
+               }
+            } else {
+               STBI_ASSERT(img_n == 3);
+               for (q=x-1; q >= 0; --q) {
+                  cur[q*4+3] = 255;
+                  cur[q*4+2] = cur[q*3+2];
+                  cur[q*4+1] = cur[q*3+1];
+                  cur[q*4+0] = cur[q*3+0];
+               }
+            }
+         }
+      }
+   } else if (depth == 16) {
+      // force the image data from big-endian to platform-native.
+      // this is done in a separate pass due to the decoding relying
+      // on the data being untouched, but could probably be done
+      // per-line during decode if care is taken.
+      stbi_uc *cur = a->out;
+      stbi__uint16 *cur16 = (stbi__uint16*)cur;
+
+      for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
+         *cur16 = (cur[0] << 8) | cur[1];
+      }
+   }
+
+   return 1;
+}
+
+static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
+{
+   int bytes = (depth == 16 ? 2 : 1);
+   int out_bytes = out_n * bytes;
+   stbi_uc *final;
+   int p;
+   if (!interlaced)
+      return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
+
+   // de-interlacing
+   final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+   for (p=0; p < 7; ++p) {
+      int xorig[] = { 0,4,0,2,0,1,0 };
+      int yorig[] = { 0,0,4,0,2,0,1 };
+      int xspc[]  = { 8,8,4,4,2,2,1 };
+      int yspc[]  = { 8,8,8,4,4,2,2 };
+      int i,j,x,y;
+      // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
+      x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
+      y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
+      if (x && y) {
+         stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
+         if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
+            STBI_FREE(final);
+            return 0;
+         }
+         for (j=0; j < y; ++j) {
+            for (i=0; i < x; ++i) {
+               int out_y = j*yspc[p]+yorig[p];
+               int out_x = i*xspc[p]+xorig[p];
+               memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
+                      a->out + (j*x+i)*out_bytes, out_bytes);
+            }
+         }
+         STBI_FREE(a->out);
+         image_data += img_len;
+         image_data_len -= img_len;
+      }
+   }
+   a->out = final;
+
+   return 1;
+}
+
+static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi_uc *p = z->out;
+
+   // compute color-based transparency, assuming we've
+   // already got 255 as the alpha value in the output
+   STBI_ASSERT(out_n == 2 || out_n == 4);
+
+   if (out_n == 2) {
+      for (i=0; i < pixel_count; ++i) {
+         p[1] = (p[0] == tc[0] ? 0 : 255);
+         p += 2;
+      }
+   } else {
+      for (i=0; i < pixel_count; ++i) {
+         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+            p[3] = 0;
+         p += 4;
+      }
+   }
+   return 1;
+}
+
+static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi__uint16 *p = (stbi__uint16*) z->out;
+
+   // compute color-based transparency, assuming we've
+   // already got 65535 as the alpha value in the output
+   STBI_ASSERT(out_n == 2 || out_n == 4);
+
+   if (out_n == 2) {
+      for (i = 0; i < pixel_count; ++i) {
+         p[1] = (p[0] == tc[0] ? 0 : 65535);
+         p += 2;
+      }
+   } else {
+      for (i = 0; i < pixel_count; ++i) {
+         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+            p[3] = 0;
+         p += 4;
+      }
+   }
+   return 1;
+}
+
+static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
+{
+   stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
+   stbi_uc *p, *temp_out, *orig = a->out;
+
+   p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
+   if (p == NULL) return stbi__err("outofmem", "Out of memory");
+
+   // between here and free(out) below, exitting would leak
+   temp_out = p;
+
+   if (pal_img_n == 3) {
+      for (i=0; i < pixel_count; ++i) {
+         int n = orig[i]*4;
+         p[0] = palette[n  ];
+         p[1] = palette[n+1];
+         p[2] = palette[n+2];
+         p += 3;
+      }
+   } else {
+      for (i=0; i < pixel_count; ++i) {
+         int n = orig[i]*4;
+         p[0] = palette[n  ];
+         p[1] = palette[n+1];
+         p[2] = palette[n+2];
+         p[3] = palette[n+3];
+         p += 4;
+      }
+   }
+   STBI_FREE(a->out);
+   a->out = temp_out;
+
+   STBI_NOTUSED(len);
+
+   return 1;
+}
+
+static int stbi__unpremultiply_on_load = 0;
+static int stbi__de_iphone_flag = 0;
+
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+{
+   stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+{
+   stbi__de_iphone_flag = flag_true_if_should_convert;
+}
+
+static void stbi__de_iphone(stbi__png *z)
+{
+   stbi__context *s = z->s;
+   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+   stbi_uc *p = z->out;
+
+   if (s->img_out_n == 3) {  // convert bgr to rgb
+      for (i=0; i < pixel_count; ++i) {
+         stbi_uc t = p[0];
+         p[0] = p[2];
+         p[2] = t;
+         p += 3;
+      }
+   } else {
+      STBI_ASSERT(s->img_out_n == 4);
+      if (stbi__unpremultiply_on_load) {
+         // convert bgr to rgb and unpremultiply
+         for (i=0; i < pixel_count; ++i) {
+            stbi_uc a = p[3];
+            stbi_uc t = p[0];
+            if (a) {
+               stbi_uc half = a / 2;
+               p[0] = (p[2] * 255 + half) / a;
+               p[1] = (p[1] * 255 + half) / a;
+               p[2] = ( t   * 255 + half) / a;
+            } else {
+               p[0] = p[2];
+               p[2] = t;
+            }
+            p += 4;
+         }
+      } else {
+         // convert bgr to rgb
+         for (i=0; i < pixel_count; ++i) {
+            stbi_uc t = p[0];
+            p[0] = p[2];
+            p[2] = t;
+            p += 4;
+         }
+      }
+   }
+}
+
+#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
+
+static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
+{
+   stbi_uc palette[1024], pal_img_n=0;
+   stbi_uc has_trans=0, tc[3]={0};
+   stbi__uint16 tc16[3];
+   stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
+   int first=1,k,interlace=0, color=0, is_iphone=0;
+   stbi__context *s = z->s;
+
+   z->expanded = NULL;
+   z->idata = NULL;
+   z->out = NULL;
+
+   if (!stbi__check_png_header(s)) return 0;
+
+   if (scan == STBI__SCAN_type) return 1;
+
+   for (;;) {
+      stbi__pngchunk c = stbi__get_chunk_header(s);
+      switch (c.type) {
+         case STBI__PNG_TYPE('C','g','B','I'):
+            is_iphone = 1;
+            stbi__skip(s, c.length);
+            break;
+         case STBI__PNG_TYPE('I','H','D','R'): {
+            int comp,filter;
+            if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
+            first = 0;
+            if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
+            s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
+            s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
+            z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
+            color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
+            if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
+            if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
+            comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
+            filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
+            interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
+            if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
+            if (!pal_img_n) {
+               s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
+               if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+               if (scan == STBI__SCAN_header) return 1;
+            } else {
+               // if paletted, then pal_n is our final components, and
+               // img_n is # components to decompress/filter.
+               s->img_n = 1;
+               if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
+               // if SCAN_header, have to scan to see if we have a tRNS
+            }
+            break;
+         }
+
+         case STBI__PNG_TYPE('P','L','T','E'):  {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
+            pal_len = c.length / 3;
+            if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
+            for (i=0; i < pal_len; ++i) {
+               palette[i*4+0] = stbi__get8(s);
+               palette[i*4+1] = stbi__get8(s);
+               palette[i*4+2] = stbi__get8(s);
+               palette[i*4+3] = 255;
+            }
+            break;
+         }
+
+         case STBI__PNG_TYPE('t','R','N','S'): {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
+            if (pal_img_n) {
+               if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
+               if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
+               if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
+               pal_img_n = 4;
+               for (i=0; i < c.length; ++i)
+                  palette[i*4+3] = stbi__get8(s);
+            } else {
+               if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
+               if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
+               has_trans = 1;
+               if (z->depth == 16) {
+                  for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
+               } else {
+                  for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+               }
+            }
+            break;
+         }
+
+         case STBI__PNG_TYPE('I','D','A','T'): {
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
+            if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
+            if ((int)(ioff + c.length) < (int)ioff) return 0;
+            if (ioff + c.length > idata_limit) {
+               stbi__uint32 idata_limit_old = idata_limit;
+               stbi_uc *p;
+               if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
+               while (ioff + c.length > idata_limit)
+                  idata_limit *= 2;
+               STBI_NOTUSED(idata_limit_old);
+               p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
+               z->idata = p;
+            }
+            if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
+            ioff += c.length;
+            break;
+         }
+
+         case STBI__PNG_TYPE('I','E','N','D'): {
+            stbi__uint32 raw_len, bpl;
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if (scan != STBI__SCAN_load) return 1;
+            if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
+            // initial guess for decoded data size to avoid unnecessary reallocs
+            bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
+            raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+            z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
+            if (z->expanded == NULL) return 0; // zlib should set error
+            STBI_FREE(z->idata); z->idata = NULL;
+            if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
+               s->img_out_n = s->img_n+1;
+            else
+               s->img_out_n = s->img_n;
+            if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
+            if (has_trans) {
+               if (z->depth == 16) {
+                  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
+               } else {
+                  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
+               }
+            }
+            if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
+               stbi__de_iphone(z);
+            if (pal_img_n) {
+               // pal_img_n == 3 or 4
+               s->img_n = pal_img_n; // record the actual colors we had
+               s->img_out_n = pal_img_n;
+               if (req_comp >= 3) s->img_out_n = req_comp;
+               if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
+                  return 0;
+            } else if (has_trans) {
+               // non-paletted image with tRNS -> source image has (constant) alpha
+               ++s->img_n;
+            }
+            STBI_FREE(z->expanded); z->expanded = NULL;
+            return 1;
+         }
+
+         default:
+            // if critical, fail
+            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+            if ((c.type & (1 << 29)) == 0) {
+               #ifndef STBI_NO_FAILURE_STRINGS
+               // not threadsafe
+               static char invalid_chunk[] = "XXXX PNG chunk not known";
+               invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
+               invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
+               invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
+               invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
+               #endif
+               return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
+            }
+            stbi__skip(s, c.length);
+            break;
+      }
+      // end of PNG chunk, read and skip CRC
+      stbi__get32be(s);
+   }
+}
+
+static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
+{
+   void *result=NULL;
+   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+   if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
+      if (p->depth < 8)
+         ri->bits_per_channel = 8;
+      else
+         ri->bits_per_channel = p->depth;
+      result = p->out;
+      p->out = NULL;
+      if (req_comp && req_comp != p->s->img_out_n) {
+         if (ri->bits_per_channel == 8)
+            result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         else
+            result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+         p->s->img_out_n = req_comp;
+         if (result == NULL) return result;
+      }
+      *x = p->s->img_x;
+      *y = p->s->img_y;
+      if (n) *n = p->s->img_n;
+   }
+   STBI_FREE(p->out);      p->out      = NULL;
+   STBI_FREE(p->expanded); p->expanded = NULL;
+   STBI_FREE(p->idata);    p->idata    = NULL;
+
+   return result;
+}
+
+static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   stbi__png p;
+   p.s = s;
+   return stbi__do_png(&p, x,y,comp,req_comp, ri);
+}
+
+static int stbi__png_test(stbi__context *s)
+{
+   int r;
+   r = stbi__check_png_header(s);
+   stbi__rewind(s);
+   return r;
+}
+
+static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
+{
+   if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
+      stbi__rewind( p->s );
+      return 0;
+   }
+   if (x) *x = p->s->img_x;
+   if (y) *y = p->s->img_y;
+   if (comp) *comp = p->s->img_n;
+   return 1;
+}
+
+static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   stbi__png p;
+   p.s = s;
+   return stbi__png_info_raw(&p, x, y, comp);
+}
+
+static int stbi__png_is16(stbi__context *s)
+{
+   stbi__png p;
+   p.s = s;
+   if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
+	   return 0;
+   if (p.depth != 16) {
+      stbi__rewind(p.s);
+      return 0;
+   }
+   return 1;
+}
+#endif
+
+// Microsoft/Windows BMP image
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_test_raw(stbi__context *s)
+{
+   int r;
+   int sz;
+   if (stbi__get8(s) != 'B') return 0;
+   if (stbi__get8(s) != 'M') return 0;
+   stbi__get32le(s); // discard filesize
+   stbi__get16le(s); // discard reserved
+   stbi__get16le(s); // discard reserved
+   stbi__get32le(s); // discard data offset
+   sz = stbi__get32le(s);
+   r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
+   return r;
+}
+
+static int stbi__bmp_test(stbi__context *s)
+{
+   int r = stbi__bmp_test_raw(s);
+   stbi__rewind(s);
+   return r;
+}
+
+
+// returns 0..31 for the highest set bit
+static int stbi__high_bit(unsigned int z)
+{
+   int n=0;
+   if (z == 0) return -1;
+   if (z >= 0x10000) { n += 16; z >>= 16; }
+   if (z >= 0x00100) { n +=  8; z >>=  8; }
+   if (z >= 0x00010) { n +=  4; z >>=  4; }
+   if (z >= 0x00004) { n +=  2; z >>=  2; }
+   if (z >= 0x00002) { n +=  1; z >>=  1; }
+   return n;
+}
+
+static int stbi__bitcount(unsigned int a)
+{
+   a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
+   a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
+   a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
+   a = (a + (a >> 8)); // max 16 per 8 bits
+   a = (a + (a >> 16)); // max 32 per 8 bits
+   return a & 0xff;
+}
+
+// extract an arbitrarily-aligned N-bit value (N=bits)
+// from v, and then make it 8-bits long and fractionally
+// extend it to full full range.
+static int stbi__shiftsigned(unsigned int v, int shift, int bits)
+{
+   static unsigned int mul_table[9] = {
+      0,
+      0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
+      0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
+   };
+   static unsigned int shift_table[9] = {
+      0, 0,0,1,0,2,4,6,0,
+   };
+   if (shift < 0)
+      v <<= -shift;
+   else
+      v >>= shift;
+   STBI_ASSERT(v >= 0 && v < 256);
+   v >>= (8-bits);
+   STBI_ASSERT(bits >= 0 && bits <= 8);
+   return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
+}
+
+typedef struct
+{
+   int bpp, offset, hsz;
+   unsigned int mr,mg,mb,ma, all_a;
+} stbi__bmp_data;
+
+static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
+{
+   int hsz;
+   if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
+   stbi__get32le(s); // discard filesize
+   stbi__get16le(s); // discard reserved
+   stbi__get16le(s); // discard reserved
+   info->offset = stbi__get32le(s);
+   info->hsz = hsz = stbi__get32le(s);
+   info->mr = info->mg = info->mb = info->ma = 0;
+
+   if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
+   if (hsz == 12) {
+      s->img_x = stbi__get16le(s);
+      s->img_y = stbi__get16le(s);
+   } else {
+      s->img_x = stbi__get32le(s);
+      s->img_y = stbi__get32le(s);
+   }
+   if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
+   info->bpp = stbi__get16le(s);
+   if (hsz != 12) {
+      int compress = stbi__get32le(s);
+      if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+      stbi__get32le(s); // discard sizeof
+      stbi__get32le(s); // discard hres
+      stbi__get32le(s); // discard vres
+      stbi__get32le(s); // discard colorsused
+      stbi__get32le(s); // discard max important
+      if (hsz == 40 || hsz == 56) {
+         if (hsz == 56) {
+            stbi__get32le(s);
+            stbi__get32le(s);
+            stbi__get32le(s);
+            stbi__get32le(s);
+         }
+         if (info->bpp == 16 || info->bpp == 32) {
+            if (compress == 0) {
+               if (info->bpp == 32) {
+                  info->mr = 0xffu << 16;
+                  info->mg = 0xffu <<  8;
+                  info->mb = 0xffu <<  0;
+                  info->ma = 0xffu << 24;
+                  info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+               } else {
+                  info->mr = 31u << 10;
+                  info->mg = 31u <<  5;
+                  info->mb = 31u <<  0;
+               }
+            } else if (compress == 3) {
+               info->mr = stbi__get32le(s);
+               info->mg = stbi__get32le(s);
+               info->mb = stbi__get32le(s);
+               // not documented, but generated by photoshop and handled by mspaint
+               if (info->mr == info->mg && info->mg == info->mb) {
+                  // ?!?!?
+                  return stbi__errpuc("bad BMP", "bad BMP");
+               }
+            } else
+               return stbi__errpuc("bad BMP", "bad BMP");
+         }
+      } else {
+         int i;
+         if (hsz != 108 && hsz != 124)
+            return stbi__errpuc("bad BMP", "bad BMP");
+         info->mr = stbi__get32le(s);
+         info->mg = stbi__get32le(s);
+         info->mb = stbi__get32le(s);
+         info->ma = stbi__get32le(s);
+         stbi__get32le(s); // discard color space
+         for (i=0; i < 12; ++i)
+            stbi__get32le(s); // discard color space parameters
+         if (hsz == 124) {
+            stbi__get32le(s); // discard rendering intent
+            stbi__get32le(s); // discard offset of profile data
+            stbi__get32le(s); // discard size of profile data
+            stbi__get32le(s); // discard reserved
+         }
+      }
+   }
+   return (void *) 1;
+}
+
+
+static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   stbi_uc *out;
+   unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
+   stbi_uc pal[256][4];
+   int psize=0,i,j,width;
+   int flip_vertically, pad, target;
+   stbi__bmp_data info;
+   STBI_NOTUSED(ri);
+
+   info.all_a = 255;
+   if (stbi__bmp_parse_header(s, &info) == NULL)
+      return NULL; // error code already set
+
+   flip_vertically = ((int) s->img_y) > 0;
+   s->img_y = abs((int) s->img_y);
+
+   mr = info.mr;
+   mg = info.mg;
+   mb = info.mb;
+   ma = info.ma;
+   all_a = info.all_a;
+
+   if (info.hsz == 12) {
+      if (info.bpp < 24)
+         psize = (info.offset - 14 - 24) / 3;
+   } else {
+      if (info.bpp < 16)
+         psize = (info.offset - 14 - info.hsz) >> 2;
+   }
+
+   s->img_n = ma ? 4 : 3;
+   if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
+      target = req_comp;
+   else
+      target = s->img_n; // if they want monochrome, we'll post-convert
+
+   // sanity-check size
+   if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+      return stbi__errpuc("too large", "Corrupt BMP");
+
+   out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   if (info.bpp < 16) {
+      int z=0;
+      if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
+      for (i=0; i < psize; ++i) {
+         pal[i][2] = stbi__get8(s);
+         pal[i][1] = stbi__get8(s);
+         pal[i][0] = stbi__get8(s);
+         if (info.hsz != 12) stbi__get8(s);
+         pal[i][3] = 255;
+      }
+      stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+      if (info.bpp == 1) width = (s->img_x + 7) >> 3;
+      else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
+      else if (info.bpp == 8) width = s->img_x;
+      else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
+      pad = (-width)&3;
+      if (info.bpp == 1) {
+         for (j=0; j < (int) s->img_y; ++j) {
+            int bit_offset = 7, v = stbi__get8(s);
+            for (i=0; i < (int) s->img_x; ++i) {
+               int color = (v>>bit_offset)&0x1;
+               out[z++] = pal[color][0];
+               out[z++] = pal[color][1];
+               out[z++] = pal[color][2];
+               if (target == 4) out[z++] = 255;
+               if (i+1 == (int) s->img_x) break;
+               if((--bit_offset) < 0) {
+                  bit_offset = 7;
+                  v = stbi__get8(s);
+               }
+            }
+            stbi__skip(s, pad);
+         }
+      } else {
+         for (j=0; j < (int) s->img_y; ++j) {
+            for (i=0; i < (int) s->img_x; i += 2) {
+               int v=stbi__get8(s),v2=0;
+               if (info.bpp == 4) {
+                  v2 = v & 15;
+                  v >>= 4;
+               }
+               out[z++] = pal[v][0];
+               out[z++] = pal[v][1];
+               out[z++] = pal[v][2];
+               if (target == 4) out[z++] = 255;
+               if (i+1 == (int) s->img_x) break;
+               v = (info.bpp == 8) ? stbi__get8(s) : v2;
+               out[z++] = pal[v][0];
+               out[z++] = pal[v][1];
+               out[z++] = pal[v][2];
+               if (target == 4) out[z++] = 255;
+            }
+            stbi__skip(s, pad);
+         }
+      }
+   } else {
+      int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
+      int z = 0;
+      int easy=0;
+      stbi__skip(s, info.offset - 14 - info.hsz);
+      if (info.bpp == 24) width = 3 * s->img_x;
+      else if (info.bpp == 16) width = 2*s->img_x;
+      else /* bpp = 32 and pad = 0 */ width=0;
+      pad = (-width) & 3;
+      if (info.bpp == 24) {
+         easy = 1;
+      } else if (info.bpp == 32) {
+         if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
+            easy = 2;
+      }
+      if (!easy) {
+         if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+         // right shift amt to put high bit in position #7
+         rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
+         gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
+         bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
+         ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
+      }
+      for (j=0; j < (int) s->img_y; ++j) {
+         if (easy) {
+            for (i=0; i < (int) s->img_x; ++i) {
+               unsigned char a;
+               out[z+2] = stbi__get8(s);
+               out[z+1] = stbi__get8(s);
+               out[z+0] = stbi__get8(s);
+               z += 3;
+               a = (easy == 2 ? stbi__get8(s) : 255);
+               all_a |= a;
+               if (target == 4) out[z++] = a;
+            }
+         } else {
+            int bpp = info.bpp;
+            for (i=0; i < (int) s->img_x; ++i) {
+               stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
+               unsigned int a;
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
+               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
+               a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
+               all_a |= a;
+               if (target == 4) out[z++] = STBI__BYTECAST(a);
+            }
+         }
+         stbi__skip(s, pad);
+      }
+   }
+
+   // if alpha channel is all 0s, replace with all 255s
+   if (target == 4 && all_a == 0)
+      for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
+         out[i] = 255;
+
+   if (flip_vertically) {
+      stbi_uc t;
+      for (j=0; j < (int) s->img_y>>1; ++j) {
+         stbi_uc *p1 = out +      j     *s->img_x*target;
+         stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
+         for (i=0; i < (int) s->img_x*target; ++i) {
+            t = p1[i]; p1[i] = p2[i]; p2[i] = t;
+         }
+      }
+   }
+
+   if (req_comp && req_comp != target) {
+      out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+
+   *x = s->img_x;
+   *y = s->img_y;
+   if (comp) *comp = s->img_n;
+   return out;
+}
+#endif
+
+// Targa Truevision - TGA
+// by Jonathan Dummer
+#ifndef STBI_NO_TGA
+// returns STBI_rgb or whatever, 0 on error
+static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
+{
+   // only RGB or RGBA (incl. 16bit) or grey allowed
+   if (is_rgb16) *is_rgb16 = 0;
+   switch(bits_per_pixel) {
+      case 8:  return STBI_grey;
+      case 16: if(is_grey) return STBI_grey_alpha;
+               // fallthrough
+      case 15: if(is_rgb16) *is_rgb16 = 1;
+               return STBI_rgb;
+      case 24: // fallthrough
+      case 32: return bits_per_pixel/8;
+      default: return 0;
+   }
+}
+
+static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
+{
+    int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
+    int sz, tga_colormap_type;
+    stbi__get8(s);                   // discard Offset
+    tga_colormap_type = stbi__get8(s); // colormap type
+    if( tga_colormap_type > 1 ) {
+        stbi__rewind(s);
+        return 0;      // only RGB or indexed allowed
+    }
+    tga_image_type = stbi__get8(s); // image type
+    if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
+        if (tga_image_type != 1 && tga_image_type != 9) {
+            stbi__rewind(s);
+            return 0;
+        }
+        stbi__skip(s,4);       // skip index of first colormap entry and number of entries
+        sz = stbi__get8(s);    //   check bits per palette color entry
+        if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
+            stbi__rewind(s);
+            return 0;
+        }
+        stbi__skip(s,4);       // skip image x and y origin
+        tga_colormap_bpp = sz;
+    } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
+        if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
+            stbi__rewind(s);
+            return 0; // only RGB or grey allowed, +/- RLE
+        }
+        stbi__skip(s,9); // skip colormap specification and image x/y origin
+        tga_colormap_bpp = 0;
+    }
+    tga_w = stbi__get16le(s);
+    if( tga_w < 1 ) {
+        stbi__rewind(s);
+        return 0;   // test width
+    }
+    tga_h = stbi__get16le(s);
+    if( tga_h < 1 ) {
+        stbi__rewind(s);
+        return 0;   // test height
+    }
+    tga_bits_per_pixel = stbi__get8(s); // bits per pixel
+    stbi__get8(s); // ignore alpha bits
+    if (tga_colormap_bpp != 0) {
+        if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
+            // when using a colormap, tga_bits_per_pixel is the size of the indexes
+            // I don't think anything but 8 or 16bit indexes makes sense
+            stbi__rewind(s);
+            return 0;
+        }
+        tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
+    } else {
+        tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
+    }
+    if(!tga_comp) {
+      stbi__rewind(s);
+      return 0;
+    }
+    if (x) *x = tga_w;
+    if (y) *y = tga_h;
+    if (comp) *comp = tga_comp;
+    return 1;                   // seems to have passed everything
+}
+
+static int stbi__tga_test(stbi__context *s)
+{
+   int res = 0;
+   int sz, tga_color_type;
+   stbi__get8(s);      //   discard Offset
+   tga_color_type = stbi__get8(s);   //   color type
+   if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
+   sz = stbi__get8(s);   //   image type
+   if ( tga_color_type == 1 ) { // colormapped (paletted) image
+      if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
+      stbi__skip(s,4);       // skip index of first colormap entry and number of entries
+      sz = stbi__get8(s);    //   check bits per palette color entry
+      if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
+      stbi__skip(s,4);       // skip image x and y origin
+   } else { // "normal" image w/o colormap
+      if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
+      stbi__skip(s,9); // skip colormap specification and image x/y origin
+   }
+   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
+   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
+   sz = stbi__get8(s);   //   bits per pixel
+   if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
+   if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
+
+   res = 1; // if we got this far, everything's good and we can return 1 instead of 0
+
+errorEnd:
+   stbi__rewind(s);
+   return res;
+}
+
+// read 16bit value and convert to 24bit RGB
+static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
+{
+   stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
+   stbi__uint16 fiveBitMask = 31;
+   // we have 3 channels with 5bits each
+   int r = (px >> 10) & fiveBitMask;
+   int g = (px >> 5) & fiveBitMask;
+   int b = px & fiveBitMask;
+   // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
+   out[0] = (stbi_uc)((r * 255)/31);
+   out[1] = (stbi_uc)((g * 255)/31);
+   out[2] = (stbi_uc)((b * 255)/31);
+
+   // some people claim that the most significant bit might be used for alpha
+   // (possibly if an alpha-bit is set in the "image descriptor byte")
+   // but that only made 16bit test images completely translucent..
+   // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
+}
+
+static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   //   read in the TGA header stuff
+   int tga_offset = stbi__get8(s);
+   int tga_indexed = stbi__get8(s);
+   int tga_image_type = stbi__get8(s);
+   int tga_is_RLE = 0;
+   int tga_palette_start = stbi__get16le(s);
+   int tga_palette_len = stbi__get16le(s);
+   int tga_palette_bits = stbi__get8(s);
+   int tga_x_origin = stbi__get16le(s);
+   int tga_y_origin = stbi__get16le(s);
+   int tga_width = stbi__get16le(s);
+   int tga_height = stbi__get16le(s);
+   int tga_bits_per_pixel = stbi__get8(s);
+   int tga_comp, tga_rgb16=0;
+   int tga_inverted = stbi__get8(s);
+   // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
+   //   image data
+   unsigned char *tga_data;
+   unsigned char *tga_palette = NULL;
+   int i, j;
+   unsigned char raw_data[4] = {0};
+   int RLE_count = 0;
+   int RLE_repeating = 0;
+   int read_next_pixel = 1;
+   STBI_NOTUSED(ri);
+
+   //   do a tiny bit of precessing
+   if ( tga_image_type >= 8 )
+   {
+      tga_image_type -= 8;
+      tga_is_RLE = 1;
+   }
+   tga_inverted = 1 - ((tga_inverted >> 5) & 1);
+
+   //   If I'm paletted, then I'll use the number of bits from the palette
+   if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
+   else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
+
+   if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
+      return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
+
+   //   tga info
+   *x = tga_width;
+   *y = tga_height;
+   if (comp) *comp = tga_comp;
+
+   if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
+      return stbi__errpuc("too large", "Corrupt TGA");
+
+   tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
+   if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
+
+   // skip to the data's starting position (offset usually = 0)
+   stbi__skip(s, tga_offset );
+
+   if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
+      for (i=0; i < tga_height; ++i) {
+         int row = tga_inverted ? tga_height -i - 1 : i;
+         stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
+         stbi__getn(s, tga_row, tga_width * tga_comp);
+      }
+   } else  {
+      //   do I need to load a palette?
+      if ( tga_indexed)
+      {
+         //   any data to skip? (offset usually = 0)
+         stbi__skip(s, tga_palette_start );
+         //   load the palette
+         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
+         if (!tga_palette) {
+            STBI_FREE(tga_data);
+            return stbi__errpuc("outofmem", "Out of memory");
+         }
+         if (tga_rgb16) {
+            stbi_uc *pal_entry = tga_palette;
+            STBI_ASSERT(tga_comp == STBI_rgb);
+            for (i=0; i < tga_palette_len; ++i) {
+               stbi__tga_read_rgb16(s, pal_entry);
+               pal_entry += tga_comp;
+            }
+         } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
+               STBI_FREE(tga_data);
+               STBI_FREE(tga_palette);
+               return stbi__errpuc("bad palette", "Corrupt TGA");
+         }
+      }
+      //   load the data
+      for (i=0; i < tga_width * tga_height; ++i)
+      {
+         //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
+         if ( tga_is_RLE )
+         {
+            if ( RLE_count == 0 )
+            {
+               //   yep, get the next byte as a RLE command
+               int RLE_cmd = stbi__get8(s);
+               RLE_count = 1 + (RLE_cmd & 127);
+               RLE_repeating = RLE_cmd >> 7;
+               read_next_pixel = 1;
+            } else if ( !RLE_repeating )
+            {
+               read_next_pixel = 1;
+            }
+         } else
+         {
+            read_next_pixel = 1;
+         }
+         //   OK, if I need to read a pixel, do it now
+         if ( read_next_pixel )
+         {
+            //   load however much data we did have
+            if ( tga_indexed )
+            {
+               // read in index, then perform the lookup
+               int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
+               if ( pal_idx >= tga_palette_len ) {
+                  // invalid index
+                  pal_idx = 0;
+               }
+               pal_idx *= tga_comp;
+               for (j = 0; j < tga_comp; ++j) {
+                  raw_data[j] = tga_palette[pal_idx+j];
+               }
+            } else if(tga_rgb16) {
+               STBI_ASSERT(tga_comp == STBI_rgb);
+               stbi__tga_read_rgb16(s, raw_data);
+            } else {
+               //   read in the data raw
+               for (j = 0; j < tga_comp; ++j) {
+                  raw_data[j] = stbi__get8(s);
+               }
+            }
+            //   clear the reading flag for the next pixel
+            read_next_pixel = 0;
+         } // end of reading a pixel
+
+         // copy data
+         for (j = 0; j < tga_comp; ++j)
+           tga_data[i*tga_comp+j] = raw_data[j];
+
+         //   in case we're in RLE mode, keep counting down
+         --RLE_count;
+      }
+      //   do I need to invert the image?
+      if ( tga_inverted )
+      {
+         for (j = 0; j*2 < tga_height; ++j)
+         {
+            int index1 = j * tga_width * tga_comp;
+            int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
+            for (i = tga_width * tga_comp; i > 0; --i)
+            {
+               unsigned char temp = tga_data[index1];
+               tga_data[index1] = tga_data[index2];
+               tga_data[index2] = temp;
+               ++index1;
+               ++index2;
+            }
+         }
+      }
+      //   clear my palette, if I had one
+      if ( tga_palette != NULL )
+      {
+         STBI_FREE( tga_palette );
+      }
+   }
+
+   // swap RGB - if the source data was RGB16, it already is in the right order
+   if (tga_comp >= 3 && !tga_rgb16)
+   {
+      unsigned char* tga_pixel = tga_data;
+      for (i=0; i < tga_width * tga_height; ++i)
+      {
+         unsigned char temp = tga_pixel[0];
+         tga_pixel[0] = tga_pixel[2];
+         tga_pixel[2] = temp;
+         tga_pixel += tga_comp;
+      }
+   }
+
+   // convert to target component count
+   if (req_comp && req_comp != tga_comp)
+      tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
+
+   //   the things I do to get rid of an error message, and yet keep
+   //   Microsoft's C compilers happy... [8^(
+   tga_palette_start = tga_palette_len = tga_palette_bits =
+         tga_x_origin = tga_y_origin = 0;
+   //   OK, done
+   return tga_data;
+}
+#endif
+
+// *************************************************************************************************
+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_test(stbi__context *s)
+{
+   int r = (stbi__get32be(s) == 0x38425053);
+   stbi__rewind(s);
+   return r;
+}
+
+static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
+{
+   int count, nleft, len;
+
+   count = 0;
+   while ((nleft = pixelCount - count) > 0) {
+      len = stbi__get8(s);
+      if (len == 128) {
+         // No-op.
+      } else if (len < 128) {
+         // Copy next len+1 bytes literally.
+         len++;
+         if (len > nleft) return 0; // corrupt data
+         count += len;
+         while (len) {
+            *p = stbi__get8(s);
+            p += 4;
+            len--;
+         }
+      } else if (len > 128) {
+         stbi_uc   val;
+         // Next -len+1 bytes in the dest are replicated from next source byte.
+         // (Interpret len as a negative 8-bit int.)
+         len = 257 - len;
+         if (len > nleft) return 0; // corrupt data
+         val = stbi__get8(s);
+         count += len;
+         while (len) {
+            *p = val;
+            p += 4;
+            len--;
+         }
+      }
+   }
+
+   return 1;
+}
+
+static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
+{
+   int pixelCount;
+   int channelCount, compression;
+   int channel, i;
+   int bitdepth;
+   int w,h;
+   stbi_uc *out;
+   STBI_NOTUSED(ri);
+
+   // Check identifier
+   if (stbi__get32be(s) != 0x38425053)   // "8BPS"
+      return stbi__errpuc("not PSD", "Corrupt PSD image");
+
+   // Check file type version.
+   if (stbi__get16be(s) != 1)
+      return stbi__errpuc("wrong version", "Unsupported version of PSD image");
+
+   // Skip 6 reserved bytes.
+   stbi__skip(s, 6 );
+
+   // Read the number of channels (R, G, B, A, etc).
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16)
+      return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
+
+   // Read the rows and columns of the image.
+   h = stbi__get32be(s);
+   w = stbi__get32be(s);
+
+   // Make sure the depth is 8 bits.
+   bitdepth = stbi__get16be(s);
+   if (bitdepth != 8 && bitdepth != 16)
+      return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
+
+   // Make sure the color mode is RGB.
+   // Valid options are:
+   //   0: Bitmap
+   //   1: Grayscale
+   //   2: Indexed color
+   //   3: RGB color
+   //   4: CMYK color
+   //   7: Multichannel
+   //   8: Duotone
+   //   9: Lab color
+   if (stbi__get16be(s) != 3)
+      return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
+
+   // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
+   stbi__skip(s,stbi__get32be(s) );
+
+   // Skip the image resources.  (resolution, pen tool paths, etc)
+   stbi__skip(s, stbi__get32be(s) );
+
+   // Skip the reserved data.
+   stbi__skip(s, stbi__get32be(s) );
+
+   // Find out if the data is compressed.
+   // Known values:
+   //   0: no compression
+   //   1: RLE compressed
+   compression = stbi__get16be(s);
+   if (compression > 1)
+      return stbi__errpuc("bad compression", "PSD has an unknown compression format");
+
+   // Check size
+   if (!stbi__mad3sizes_valid(4, w, h, 0))
+      return stbi__errpuc("too large", "Corrupt PSD");
+
+   // Create the destination image.
+
+   if (!compression && bitdepth == 16 && bpc == 16) {
+      out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
+      ri->bits_per_channel = 16;
+   } else
+      out = (stbi_uc *) stbi__malloc(4 * w*h);
+
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   pixelCount = w*h;
+
+   // Initialize the data to zero.
+   //memset( out, 0, pixelCount * 4 );
+
+   // Finally, the image data.
+   if (compression) {
+      // RLE as used by .PSD and .TIFF
+      // Loop until you get the number of unpacked bytes you are expecting:
+      //     Read the next source byte into n.
+      //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
+      //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
+      //     Else if n is 128, noop.
+      // Endloop
+
+      // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
+      // which we're going to just skip.
+      stbi__skip(s, h * channelCount * 2 );
+
+      // Read the RLE data by channel.
+      for (channel = 0; channel < 4; channel++) {
+         stbi_uc *p;
+
+         p = out+channel;
+         if (channel >= channelCount) {
+            // Fill this channel with default data.
+            for (i = 0; i < pixelCount; i++, p += 4)
+               *p = (channel == 3 ? 255 : 0);
+         } else {
+            // Read the RLE data.
+            if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+               STBI_FREE(out);
+               return stbi__errpuc("corrupt", "bad RLE data");
+            }
+         }
+      }
+
+   } else {
+      // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
+      // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
+
+      // Read the data by channel.
+      for (channel = 0; channel < 4; channel++) {
+         if (channel >= channelCount) {
+            // Fill this channel with default data.
+            if (bitdepth == 16 && bpc == 16) {
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               stbi__uint16 val = channel == 3 ? 65535 : 0;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = val;
+            } else {
+               stbi_uc *p = out+channel;
+               stbi_uc val = channel == 3 ? 255 : 0;
+               for (i = 0; i < pixelCount; i++, p += 4)
+                  *p = val;
+            }
+         } else {
+            if (ri->bits_per_channel == 16) {    // output bpc
+               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
+               for (i = 0; i < pixelCount; i++, q += 4)
+                  *q = (stbi__uint16) stbi__get16be(s);
+            } else {
+               stbi_uc *p = out+channel;
+               if (bitdepth == 16) {  // input bpc
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = (stbi_uc) (stbi__get16be(s) >> 8);
+               } else {
+                  for (i = 0; i < pixelCount; i++, p += 4)
+                     *p = stbi__get8(s);
+               }
+            }
+         }
+      }
+   }
+
+   // remove weird white matte from PSD
+   if (channelCount >= 4) {
+      if (ri->bits_per_channel == 16) {
+         for (i=0; i < w*h; ++i) {
+            stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 65535) {
+               float a = pixel[3] / 65535.0f;
+               float ra = 1.0f / a;
+               float inv_a = 65535.0f * (1 - ra);
+               pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
+               pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
+               pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
+            }
+         }
+      } else {
+         for (i=0; i < w*h; ++i) {
+            unsigned char *pixel = out + 4*i;
+            if (pixel[3] != 0 && pixel[3] != 255) {
+               float a = pixel[3] / 255.0f;
+               float ra = 1.0f / a;
+               float inv_a = 255.0f * (1 - ra);
+               pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
+               pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
+               pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
+            }
+         }
+      }
+   }
+
+   // convert to desired output format
+   if (req_comp && req_comp != 4) {
+      if (ri->bits_per_channel == 16)
+         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
+      else
+         out = stbi__convert_format(out, 4, req_comp, w, h);
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+
+   if (comp) *comp = 4;
+   *y = h;
+   *x = w;
+
+   return out;
+}
+#endif
+
+// *************************************************************************************************
+// Softimage PIC loader
+// by Tom Seddon
+//
+// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
+// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_is4(stbi__context *s,const char *str)
+{
+   int i;
+   for (i=0; i<4; ++i)
+      if (stbi__get8(s) != (stbi_uc)str[i])
+         return 0;
+
+   return 1;
+}
+
+static int stbi__pic_test_core(stbi__context *s)
+{
+   int i;
+
+   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
+      return 0;
+
+   for(i=0;i<84;++i)
+      stbi__get8(s);
+
+   if (!stbi__pic_is4(s,"PICT"))
+      return 0;
+
+   return 1;
+}
+
+typedef struct
+{
+   stbi_uc size,type,channel;
+} stbi__pic_packet;
+
+static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
+{
+   int mask=0x80, i;
+
+   for (i=0; i<4; ++i, mask>>=1) {
+      if (channel & mask) {
+         if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
+         dest[i]=stbi__get8(s);
+      }
+   }
+
+   return dest;
+}
+
+static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
+{
+   int mask=0x80,i;
+
+   for (i=0;i<4; ++i, mask>>=1)
+      if (channel&mask)
+         dest[i]=src[i];
+}
+
+static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
+{
+   int act_comp=0,num_packets=0,y,chained;
+   stbi__pic_packet packets[10];
+
+   // this will (should...) cater for even some bizarre stuff like having data
+    // for the same channel in multiple packets.
+   do {
+      stbi__pic_packet *packet;
+
+      if (num_packets==sizeof(packets)/sizeof(packets[0]))
+         return stbi__errpuc("bad format","too many packets");
+
+      packet = &packets[num_packets++];
+
+      chained = stbi__get8(s);
+      packet->size    = stbi__get8(s);
+      packet->type    = stbi__get8(s);
+      packet->channel = stbi__get8(s);
+
+      act_comp |= packet->channel;
+
+      if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
+      if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
+   } while (chained);
+
+   *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
+
+   for(y=0; y<height; ++y) {
+      int packet_idx;
+
+      for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
+         stbi__pic_packet *packet = &packets[packet_idx];
+         stbi_uc *dest = result+y*width*4;
+
+         switch (packet->type) {
+            default:
+               return stbi__errpuc("bad format","packet has bad compression type");
+
+            case 0: {//uncompressed
+               int x;
+
+               for(x=0;x<width;++x, dest+=4)
+                  if (!stbi__readval(s,packet->channel,dest))
+                     return 0;
+               break;
+            }
+
+            case 1://Pure RLE
+               {
+                  int left=width, i;
+
+                  while (left>0) {
+                     stbi_uc count,value[4];
+
+                     count=stbi__get8(s);
+                     if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
+
+                     if (count > left)
+                        count = (stbi_uc) left;
+
+                     if (!stbi__readval(s,packet->channel,value))  return 0;
+
+                     for(i=0; i<count; ++i,dest+=4)
+                        stbi__copyval(packet->channel,dest,value);
+                     left -= count;
+                  }
+               }
+               break;
+
+            case 2: {//Mixed RLE
+               int left=width;
+               while (left>0) {
+                  int count = stbi__get8(s), i;
+                  if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
+
+                  if (count >= 128) { // Repeated
+                     stbi_uc value[4];
+
+                     if (count==128)
+                        count = stbi__get16be(s);
+                     else
+                        count -= 127;
+                     if (count > left)
+                        return stbi__errpuc("bad file","scanline overrun");
+
+                     if (!stbi__readval(s,packet->channel,value))
+                        return 0;
+
+                     for(i=0;i<count;++i, dest += 4)
+                        stbi__copyval(packet->channel,dest,value);
+                  } else { // Raw
+                     ++count;
+                     if (count>left) return stbi__errpuc("bad file","scanline overrun");
+
+                     for(i=0;i<count;++i, dest+=4)
+                        if (!stbi__readval(s,packet->channel,dest))
+                           return 0;
+                  }
+                  left-=count;
+               }
+               break;
+            }
+         }
+      }
+   }
+
+   return result;
+}
+
+static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
+{
+   stbi_uc *result;
+   int i, x,y, internal_comp;
+   STBI_NOTUSED(ri);
+
+   if (!comp) comp = &internal_comp;
+
+   for (i=0; i<92; ++i)
+      stbi__get8(s);
+
+   x = stbi__get16be(s);
+   y = stbi__get16be(s);
+   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
+   if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
+
+   stbi__get32be(s); //skip `ratio'
+   stbi__get16be(s); //skip `fields'
+   stbi__get16be(s); //skip `pad'
+
+   // intermediate buffer is RGBA
+   result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
+   memset(result, 0xff, x*y*4);
+
+   if (!stbi__pic_load_core(s,x,y,comp, result)) {
+      STBI_FREE(result);
+      result=0;
+   }
+   *px = x;
+   *py = y;
+   if (req_comp == 0) req_comp = *comp;
+   result=stbi__convert_format(result,4,req_comp,x,y);
+
+   return result;
+}
+
+static int stbi__pic_test(stbi__context *s)
+{
+   int r = stbi__pic_test_core(s);
+   stbi__rewind(s);
+   return r;
+}
+#endif
+
+// *************************************************************************************************
+// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
+
+#ifndef STBI_NO_GIF
+typedef struct
+{
+   stbi__int16 prefix;
+   stbi_uc first;
+   stbi_uc suffix;
+} stbi__gif_lzw;
+
+typedef struct
+{
+   int w,h;
+   stbi_uc *out;                 // output buffer (always 4 components)
+   stbi_uc *background;          // The current "background" as far as a gif is concerned
+   stbi_uc *history;
+   int flags, bgindex, ratio, transparent, eflags;
+   stbi_uc  pal[256][4];
+   stbi_uc lpal[256][4];
+   stbi__gif_lzw codes[8192];
+   stbi_uc *color_table;
+   int parse, step;
+   int lflags;
+   int start_x, start_y;
+   int max_x, max_y;
+   int cur_x, cur_y;
+   int line_size;
+   int delay;
+} stbi__gif;
+
+static int stbi__gif_test_raw(stbi__context *s)
+{
+   int sz;
+   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
+   sz = stbi__get8(s);
+   if (sz != '9' && sz != '7') return 0;
+   if (stbi__get8(s) != 'a') return 0;
+   return 1;
+}
+
+static int stbi__gif_test(stbi__context *s)
+{
+   int r = stbi__gif_test_raw(s);
+   stbi__rewind(s);
+   return r;
+}
+
+static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
+{
+   int i;
+   for (i=0; i < num_entries; ++i) {
+      pal[i][2] = stbi__get8(s);
+      pal[i][1] = stbi__get8(s);
+      pal[i][0] = stbi__get8(s);
+      pal[i][3] = transp == i ? 0 : 255;
+   }
+}
+
+static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
+{
+   stbi_uc version;
+   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
+      return stbi__err("not GIF", "Corrupt GIF");
+
+   version = stbi__get8(s);
+   if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
+   if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
+
+   stbi__g_failure_reason = "";
+   g->w = stbi__get16le(s);
+   g->h = stbi__get16le(s);
+   g->flags = stbi__get8(s);
+   g->bgindex = stbi__get8(s);
+   g->ratio = stbi__get8(s);
+   g->transparent = -1;
+
+   if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
+
+   if (is_info) return 1;
+
+   if (g->flags & 0x80)
+      stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
+
+   return 1;
+}
+
+static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
+{
+   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
+   if (!stbi__gif_header(s, g, comp, 1)) {
+      STBI_FREE(g);
+      stbi__rewind( s );
+      return 0;
+   }
+   if (x) *x = g->w;
+   if (y) *y = g->h;
+   STBI_FREE(g);
+   return 1;
+}
+
+static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
+{
+   stbi_uc *p, *c;
+   int idx;
+
+   // recurse to decode the prefixes, since the linked-list is backwards,
+   // and working backwards through an interleaved image would be nasty
+   if (g->codes[code].prefix >= 0)
+      stbi__out_gif_code(g, g->codes[code].prefix);
+
+   if (g->cur_y >= g->max_y) return;
+
+   idx = g->cur_x + g->cur_y;
+   p = &g->out[idx];
+   g->history[idx / 4] = 1;
+
+   c = &g->color_table[g->codes[code].suffix * 4];
+   if (c[3] > 128) { // don't render transparent pixels;
+      p[0] = c[2];
+      p[1] = c[1];
+      p[2] = c[0];
+      p[3] = c[3];
+   }
+   g->cur_x += 4;
+
+   if (g->cur_x >= g->max_x) {
+      g->cur_x = g->start_x;
+      g->cur_y += g->step;
+
+      while (g->cur_y >= g->max_y && g->parse > 0) {
+         g->step = (1 << g->parse) * g->line_size;
+         g->cur_y = g->start_y + (g->step >> 1);
+         --g->parse;
+      }
+   }
+}
+
+static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
+{
+   stbi_uc lzw_cs;
+   stbi__int32 len, init_code;
+   stbi__uint32 first;
+   stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
+   stbi__gif_lzw *p;
+
+   lzw_cs = stbi__get8(s);
+   if (lzw_cs > 12) return NULL;
+   clear = 1 << lzw_cs;
+   first = 1;
+   codesize = lzw_cs + 1;
+   codemask = (1 << codesize) - 1;
+   bits = 0;
+   valid_bits = 0;
+   for (init_code = 0; init_code < clear; init_code++) {
+      g->codes[init_code].prefix = -1;
+      g->codes[init_code].first = (stbi_uc) init_code;
+      g->codes[init_code].suffix = (stbi_uc) init_code;
+   }
+
+   // support no starting clear code
+   avail = clear+2;
+   oldcode = -1;
+
+   len = 0;
+   for(;;) {
+      if (valid_bits < codesize) {
+         if (len == 0) {
+            len = stbi__get8(s); // start new block
+            if (len == 0)
+               return g->out;
+         }
+         --len;
+         bits |= (stbi__int32) stbi__get8(s) << valid_bits;
+         valid_bits += 8;
+      } else {
+         stbi__int32 code = bits & codemask;
+         bits >>= codesize;
+         valid_bits -= codesize;
+         // @OPTIMIZE: is there some way we can accelerate the non-clear path?
+         if (code == clear) {  // clear code
+            codesize = lzw_cs + 1;
+            codemask = (1 << codesize) - 1;
+            avail = clear + 2;
+            oldcode = -1;
+            first = 0;
+         } else if (code == clear + 1) { // end of stream code
+            stbi__skip(s, len);
+            while ((len = stbi__get8(s)) > 0)
+               stbi__skip(s,len);
+            return g->out;
+         } else if (code <= avail) {
+            if (first) {
+               return stbi__errpuc("no clear code", "Corrupt GIF");
+            }
+
+            if (oldcode >= 0) {
+               p = &g->codes[avail++];
+               if (avail > 8192) {
+                  return stbi__errpuc("too many codes", "Corrupt GIF");
+               }
+
+               p->prefix = (stbi__int16) oldcode;
+               p->first = g->codes[oldcode].first;
+               p->suffix = (code == avail) ? p->first : g->codes[code].first;
+            } else if (code == avail)
+               return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+
+            stbi__out_gif_code(g, (stbi__uint16) code);
+
+            if ((avail & codemask) == 0 && avail <= 0x0FFF) {
+               codesize++;
+               codemask = (1 << codesize) - 1;
+            }
+
+            oldcode = code;
+         } else {
+            return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+         }
+      }
+   }
+}
+
+// this function is designed to support animated gifs, although stb_image doesn't support it
+// two back is the image from two frames ago, used for a very specific disposal format
+static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
+{
+   int dispose;
+   int first_frame;
+   int pi;
+   int pcount;
+   STBI_NOTUSED(req_comp);
+
+   // on first frame, any non-written pixels get the background colour (non-transparent)
+   first_frame = 0;
+   if (g->out == 0) {
+      if (!stbi__gif_header(s, g, comp,0))     return 0; // stbi__g_failure_reason set by stbi__gif_header
+      g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
+      g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
+      g->history = (stbi_uc *) stbi__malloc(g->w * g->h);
+      if (g->out == 0)                      return stbi__errpuc("outofmem", "Out of memory");
+
+      // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
+      // background colour is only used for pixels that are not rendered first frame, after that "background"
+      // color refers to the color that was there the previous frame.
+      memset( g->out, 0x00, 4 * g->w * g->h );
+      memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent)
+      memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
+      first_frame = 1;
+   } else {
+      // second frame - how do we dispoase of the previous one?
+      dispose = (g->eflags & 0x1C) >> 2;
+      pcount = g->w * g->h;
+
+      if ((dispose == 3) && (two_back == 0)) {
+         dispose = 2; // if I don't have an image to revert back to, default to the old background
+      }
+
+      if (dispose == 3) { // use previous graphic
+         for (pi = 0; pi < pcount; ++pi) {
+            if (g->history[pi]) {
+               memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
+            }
+         }
+      } else if (dispose == 2) {
+         // restore what was changed last frame to background before that frame;
+         for (pi = 0; pi < pcount; ++pi) {
+            if (g->history[pi]) {
+               memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
+            }
+         }
+      } else {
+         // This is a non-disposal case eithe way, so just
+         // leave the pixels as is, and they will become the new background
+         // 1: do not dispose
+         // 0:  not specified.
+      }
+
+      // background is what out is after the undoing of the previou frame;
+      memcpy( g->background, g->out, 4 * g->w * g->h );
+   }
+
+   // clear my history;
+   memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
+
+   for (;;) {
+      int tag = stbi__get8(s);
+      switch (tag) {
+         case 0x2C: /* Image Descriptor */
+         {
+            stbi__int32 x, y, w, h;
+            stbi_uc *o;
+
+            x = stbi__get16le(s);
+            y = stbi__get16le(s);
+            w = stbi__get16le(s);
+            h = stbi__get16le(s);
+            if (((x + w) > (g->w)) || ((y + h) > (g->h)))
+               return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
+
+            g->line_size = g->w * 4;
+            g->start_x = x * 4;
+            g->start_y = y * g->line_size;
+            g->max_x   = g->start_x + w * 4;
+            g->max_y   = g->start_y + h * g->line_size;
+            g->cur_x   = g->start_x;
+            g->cur_y   = g->start_y;
+
+            g->lflags = stbi__get8(s);
+
+            if (g->lflags & 0x40) {
+               g->step = 8 * g->line_size; // first interlaced spacing
+               g->parse = 3;
+            } else {
+               g->step = g->line_size;
+               g->parse = 0;
+            }
+
+            if (g->lflags & 0x80) {
+               stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
+               g->color_table = (stbi_uc *) g->lpal;
+            } else if (g->flags & 0x80) {
+               g->color_table = (stbi_uc *) g->pal;
+            } else
+               return stbi__errpuc("missing color table", "Corrupt GIF");
+
+            o = stbi__process_gif_raster(s, g);
+            if (o == NULL) return NULL;
+
+            // if this was the first frame,
+            pcount = g->w * g->h;
+            if (first_frame && (g->bgindex > 0)) {
+               // if first frame, any pixel not drawn to gets the background color
+               for (pi = 0; pi < pcount; ++pi) {
+                  if (g->history[pi] == 0) {
+                     g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
+                     memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
+                  }
+               }
+            }
+
+            return o;
+         }
+
+         case 0x21: // Comment Extension.
+         {
+            int len;
+            int ext = stbi__get8(s);
+            if (ext == 0xF9) { // Graphic Control Extension.
+               len = stbi__get8(s);
+               if (len == 4) {
+                  g->eflags = stbi__get8(s);
+                  g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
+
+                  // unset old transparent
+                  if (g->transparent >= 0) {
+                     g->pal[g->transparent][3] = 255;
+                  }
+                  if (g->eflags & 0x01) {
+                     g->transparent = stbi__get8(s);
+                     if (g->transparent >= 0) {
+                        g->pal[g->transparent][3] = 0;
+                     }
+                  } else {
+                     // don't need transparent
+                     stbi__skip(s, 1);
+                     g->transparent = -1;
+                  }
+               } else {
+                  stbi__skip(s, len);
+                  break;
+               }
+            }
+            while ((len = stbi__get8(s)) != 0) {
+               stbi__skip(s, len);
+            }
+            break;
+         }
+
+         case 0x3B: // gif stream termination code
+            return (stbi_uc *) s; // using '1' causes warning on some compilers
+
+         default:
+            return stbi__errpuc("unknown code", "Corrupt GIF");
+      }
+   }
+}
+
+static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
+{
+   if (stbi__gif_test(s)) {
+      int layers = 0;
+      stbi_uc *u = 0;
+      stbi_uc *out = 0;
+      stbi_uc *two_back = 0;
+      stbi__gif g;
+      int stride;
+      memset(&g, 0, sizeof(g));
+      if (delays) {
+         *delays = 0;
+      }
+
+      do {
+         u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
+         if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
+
+         if (u) {
+            *x = g.w;
+            *y = g.h;
+            ++layers;
+            stride = g.w * g.h * 4;
+
+            if (out) {
+               out = (stbi_uc*) STBI_REALLOC( out, layers * stride );
+               if (delays) {
+                  *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers );
+               }
+            } else {
+               out = (stbi_uc*)stbi__malloc( layers * stride );
+               if (delays) {
+                  *delays = (int*) stbi__malloc( layers * sizeof(int) );
+               }
+            }
+            memcpy( out + ((layers - 1) * stride), u, stride );
+            if (layers >= 2) {
+               two_back = out - 2 * stride;
+            }
+
+            if (delays) {
+               (*delays)[layers - 1U] = g.delay;
+            }
+         }
+      } while (u != 0);
+
+      // free temp buffer;
+      STBI_FREE(g.out);
+      STBI_FREE(g.history);
+      STBI_FREE(g.background);
+
+      // do the final conversion after loading everything;
+      if (req_comp && req_comp != 4)
+         out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
+
+      *z = layers;
+      return out;
+   } else {
+      return stbi__errpuc("not GIF", "Image was not as a gif type.");
+   }
+}
+
+static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   stbi_uc *u = 0;
+   stbi__gif g;
+   memset(&g, 0, sizeof(g));
+   STBI_NOTUSED(ri);
+
+   u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
+   if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
+   if (u) {
+      *x = g.w;
+      *y = g.h;
+
+      // moved conversion to after successful load so that the same
+      // can be done for multiple frames.
+      if (req_comp && req_comp != 4)
+         u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
+   }
+
+   // free buffers needed for multiple frame loading;
+   STBI_FREE(g.history);
+   STBI_FREE(g.background);
+
+   return u;
+}
+
+static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   return stbi__gif_info_raw(s,x,y,comp);
+}
+#endif
+
+// *************************************************************************************************
+// Radiance RGBE HDR loader
+// originally by Nicolas Schulz
+#ifndef STBI_NO_HDR
+static int stbi__hdr_test_core(stbi__context *s, const char *signature)
+{
+   int i;
+   for (i=0; signature[i]; ++i)
+      if (stbi__get8(s) != signature[i])
+          return 0;
+   stbi__rewind(s);
+   return 1;
+}
+
+static int stbi__hdr_test(stbi__context* s)
+{
+   int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
+   stbi__rewind(s);
+   if(!r) {
+       r = stbi__hdr_test_core(s, "#?RGBE\n");
+       stbi__rewind(s);
+   }
+   return r;
+}
+
+#define STBI__HDR_BUFLEN  1024
+static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
+{
+   int len=0;
+   char c = '\0';
+
+   c = (char) stbi__get8(z);
+
+   while (!stbi__at_eof(z) && c != '\n') {
+      buffer[len++] = c;
+      if (len == STBI__HDR_BUFLEN-1) {
+         // flush to end of line
+         while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
+            ;
+         break;
+      }
+      c = (char) stbi__get8(z);
+   }
+
+   buffer[len] = 0;
+   return buffer;
+}
+
+static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
+{
+   if ( input[3] != 0 ) {
+      float f1;
+      // Exponent
+      f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
+      if (req_comp <= 2)
+         output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
+      else {
+         output[0] = input[0] * f1;
+         output[1] = input[1] * f1;
+         output[2] = input[2] * f1;
+      }
+      if (req_comp == 2) output[1] = 1;
+      if (req_comp == 4) output[3] = 1;
+   } else {
+      switch (req_comp) {
+         case 4: output[3] = 1; /* fallthrough */
+         case 3: output[0] = output[1] = output[2] = 0;
+                 break;
+         case 2: output[1] = 1; /* fallthrough */
+         case 1: output[0] = 0;
+                 break;
+      }
+   }
+}
+
+static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   char buffer[STBI__HDR_BUFLEN];
+   char *token;
+   int valid = 0;
+   int width, height;
+   stbi_uc *scanline;
+   float *hdr_data;
+   int len;
+   unsigned char count, value;
+   int i, j, k, c1,c2, z;
+   const char *headerToken;
+   STBI_NOTUSED(ri);
+
+   // Check identifier
+   headerToken = stbi__hdr_gettoken(s,buffer);
+   if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
+      return stbi__errpf("not HDR", "Corrupt HDR image");
+
+   // Parse header
+   for(;;) {
+      token = stbi__hdr_gettoken(s,buffer);
+      if (token[0] == 0) break;
+      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+   }
+
+   if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
+
+   // Parse width and height
+   // can't use sscanf() if we're not using stdio!
+   token = stbi__hdr_gettoken(s,buffer);
+   if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+   token += 3;
+   height = (int) strtol(token, &token, 10);
+   while (*token == ' ') ++token;
+   if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+   token += 3;
+   width = (int) strtol(token, NULL, 10);
+
+   *x = width;
+   *y = height;
+
+   if (comp) *comp = 3;
+   if (req_comp == 0) req_comp = 3;
+
+   if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+      return stbi__errpf("too large", "HDR image is too large");
+
+   // Read data
+   hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+   if (!hdr_data)
+      return stbi__errpf("outofmem", "Out of memory");
+
+   // Load image data
+   // image data is stored as some number of sca
+   if ( width < 8 || width >= 32768) {
+      // Read flat data
+      for (j=0; j < height; ++j) {
+         for (i=0; i < width; ++i) {
+            stbi_uc rgbe[4];
+           main_decode_loop:
+            stbi__getn(s, rgbe, 4);
+            stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
+         }
+      }
+   } else {
+      // Read RLE-encoded data
+      scanline = NULL;
+
+      for (j = 0; j < height; ++j) {
+         c1 = stbi__get8(s);
+         c2 = stbi__get8(s);
+         len = stbi__get8(s);
+         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
+            // not run-length encoded, so we have to actually use THIS data as a decoded
+            // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
+            stbi_uc rgbe[4];
+            rgbe[0] = (stbi_uc) c1;
+            rgbe[1] = (stbi_uc) c2;
+            rgbe[2] = (stbi_uc) len;
+            rgbe[3] = (stbi_uc) stbi__get8(s);
+            stbi__hdr_convert(hdr_data, rgbe, req_comp);
+            i = 1;
+            j = 0;
+            STBI_FREE(scanline);
+            goto main_decode_loop; // yes, this makes no sense
+         }
+         len <<= 8;
+         len |= stbi__get8(s);
+         if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
+         if (scanline == NULL) {
+            scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
+            if (!scanline) {
+               STBI_FREE(hdr_data);
+               return stbi__errpf("outofmem", "Out of memory");
+            }
+         }
+
+         for (k = 0; k < 4; ++k) {
+            int nleft;
+            i = 0;
+            while ((nleft = width - i) > 0) {
+               count = stbi__get8(s);
+               if (count > 128) {
+                  // Run
+                  value = stbi__get8(s);
+                  count -= 128;
+                  if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+                  for (z = 0; z < count; ++z)
+                     scanline[i++ * 4 + k] = value;
+               } else {
+                  // Dump
+                  if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+                  for (z = 0; z < count; ++z)
+                     scanline[i++ * 4 + k] = stbi__get8(s);
+               }
+            }
+         }
+         for (i=0; i < width; ++i)
+            stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
+      }
+      if (scanline)
+         STBI_FREE(scanline);
+   }
+
+   return hdr_data;
+}
+
+static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   char buffer[STBI__HDR_BUFLEN];
+   char *token;
+   int valid = 0;
+   int dummy;
+
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
+   if (stbi__hdr_test(s) == 0) {
+       stbi__rewind( s );
+       return 0;
+   }
+
+   for(;;) {
+      token = stbi__hdr_gettoken(s,buffer);
+      if (token[0] == 0) break;
+      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+   }
+
+   if (!valid) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token = stbi__hdr_gettoken(s,buffer);
+   if (strncmp(token, "-Y ", 3)) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token += 3;
+   *y = (int) strtol(token, &token, 10);
+   while (*token == ' ') ++token;
+   if (strncmp(token, "+X ", 3)) {
+       stbi__rewind( s );
+       return 0;
+   }
+   token += 3;
+   *x = (int) strtol(token, NULL, 10);
+   *comp = 3;
+   return 1;
+}
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   void *p;
+   stbi__bmp_data info;
+
+   info.all_a = 255;
+   p = stbi__bmp_parse_header(s, &info);
+   stbi__rewind( s );
+   if (p == NULL)
+      return 0;
+   if (x) *x = s->img_x;
+   if (y) *y = s->img_y;
+   if (comp) *comp = info.ma ? 4 : 3;
+   return 1;
+}
+#endif
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int channelCount, dummy, depth;
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+   if (stbi__get32be(s) != 0x38425053) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 1) {
+       stbi__rewind( s );
+       return 0;
+   }
+   stbi__skip(s, 6);
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   *y = stbi__get32be(s);
+   *x = stbi__get32be(s);
+   depth = stbi__get16be(s);
+   if (depth != 8 && depth != 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 3) {
+       stbi__rewind( s );
+       return 0;
+   }
+   *comp = 4;
+   return 1;
+}
+
+static int stbi__psd_is16(stbi__context *s)
+{
+   int channelCount, depth;
+   if (stbi__get32be(s) != 0x38425053) {
+       stbi__rewind( s );
+       return 0;
+   }
+   if (stbi__get16be(s) != 1) {
+       stbi__rewind( s );
+       return 0;
+   }
+   stbi__skip(s, 6);
+   channelCount = stbi__get16be(s);
+   if (channelCount < 0 || channelCount > 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   (void) stbi__get32be(s);
+   (void) stbi__get32be(s);
+   depth = stbi__get16be(s);
+   if (depth != 16) {
+       stbi__rewind( s );
+       return 0;
+   }
+   return 1;
+}
+#endif
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int act_comp=0,num_packets=0,chained,dummy;
+   stbi__pic_packet packets[10];
+
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
+   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
+      stbi__rewind(s);
+      return 0;
+   }
+
+   stbi__skip(s, 88);
+
+   *x = stbi__get16be(s);
+   *y = stbi__get16be(s);
+   if (stbi__at_eof(s)) {
+      stbi__rewind( s);
+      return 0;
+   }
+   if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
+      stbi__rewind( s );
+      return 0;
+   }
+
+   stbi__skip(s, 8);
+
+   do {
+      stbi__pic_packet *packet;
+
+      if (num_packets==sizeof(packets)/sizeof(packets[0]))
+         return 0;
+
+      packet = &packets[num_packets++];
+      chained = stbi__get8(s);
+      packet->size    = stbi__get8(s);
+      packet->type    = stbi__get8(s);
+      packet->channel = stbi__get8(s);
+      act_comp |= packet->channel;
+
+      if (stbi__at_eof(s)) {
+          stbi__rewind( s );
+          return 0;
+      }
+      if (packet->size != 8) {
+          stbi__rewind( s );
+          return 0;
+      }
+   } while (chained);
+
+   *comp = (act_comp & 0x10 ? 4 : 3);
+
+   return 1;
+}
+#endif
+
+// *************************************************************************************************
+// Portable Gray Map and Portable Pixel Map loader
+// by Ken Miller
+//
+// PGM: http://netpbm.sourceforge.net/doc/pgm.html
+// PPM: http://netpbm.sourceforge.net/doc/ppm.html
+//
+// Known limitations:
+//    Does not support comments in the header section
+//    Does not support ASCII image data (formats P2 and P3)
+//    Does not support 16-bit-per-channel
+
+#ifndef STBI_NO_PNM
+
+static int      stbi__pnm_test(stbi__context *s)
+{
+   char p, t;
+   p = (char) stbi__get8(s);
+   t = (char) stbi__get8(s);
+   if (p != 'P' || (t != '5' && t != '6')) {
+       stbi__rewind( s );
+       return 0;
+   }
+   return 1;
+}
+
+static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
+{
+   stbi_uc *out;
+   STBI_NOTUSED(ri);
+
+   if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
+      return 0;
+
+   *x = s->img_x;
+   *y = s->img_y;
+   if (comp) *comp = s->img_n;
+
+   if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
+      return stbi__errpuc("too large", "PNM too large");
+
+   out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
+   if (!out) return stbi__errpuc("outofmem", "Out of memory");
+   stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
+
+   if (req_comp && req_comp != s->img_n) {
+      out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+      if (out == NULL) return out; // stbi__convert_format frees input on failure
+   }
+   return out;
+}
+
+static int      stbi__pnm_isspace(char c)
+{
+   return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
+}
+
+static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
+{
+   for (;;) {
+      while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
+         *c = (char) stbi__get8(s);
+
+      if (stbi__at_eof(s) || *c != '#')
+         break;
+
+      while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
+         *c = (char) stbi__get8(s);
+   }
+}
+
+static int      stbi__pnm_isdigit(char c)
+{
+   return c >= '0' && c <= '9';
+}
+
+static int      stbi__pnm_getinteger(stbi__context *s, char *c)
+{
+   int value = 0;
+
+   while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
+      value = value*10 + (*c - '0');
+      *c = (char) stbi__get8(s);
+   }
+
+   return value;
+}
+
+static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
+{
+   int maxv, dummy;
+   char c, p, t;
+
+   if (!x) x = &dummy;
+   if (!y) y = &dummy;
+   if (!comp) comp = &dummy;
+
+   stbi__rewind(s);
+
+   // Get identifier
+   p = (char) stbi__get8(s);
+   t = (char) stbi__get8(s);
+   if (p != 'P' || (t != '5' && t != '6')) {
+       stbi__rewind(s);
+       return 0;
+   }
+
+   *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
+
+   c = (char) stbi__get8(s);
+   stbi__pnm_skip_whitespace(s, &c);
+
+   *x = stbi__pnm_getinteger(s, &c); // read width
+   stbi__pnm_skip_whitespace(s, &c);
+
+   *y = stbi__pnm_getinteger(s, &c); // read height
+   stbi__pnm_skip_whitespace(s, &c);
+
+   maxv = stbi__pnm_getinteger(s, &c);  // read max value
+
+   if (maxv > 255)
+      return stbi__err("max value > 255", "PPM image not 8-bit");
+   else
+      return 1;
+}
+#endif
+
+static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
+{
+   #ifndef STBI_NO_JPEG
+   if (stbi__jpeg_info(s, x, y, comp)) return 1;
+   #endif
+
+   #ifndef STBI_NO_PNG
+   if (stbi__png_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_GIF
+   if (stbi__gif_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_BMP
+   if (stbi__bmp_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PIC
+   if (stbi__pic_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PNM
+   if (stbi__pnm_info(s, x, y, comp))  return 1;
+   #endif
+
+   #ifndef STBI_NO_HDR
+   if (stbi__hdr_info(s, x, y, comp))  return 1;
+   #endif
+
+   // test tga last because it's a crappy test!
+   #ifndef STBI_NO_TGA
+   if (stbi__tga_info(s, x, y, comp))
+       return 1;
+   #endif
+   return stbi__err("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static int stbi__is_16_main(stbi__context *s)
+{
+   #ifndef STBI_NO_PNG
+   if (stbi__png_is16(s))  return 1;
+   #endif
+
+   #ifndef STBI_NO_PSD
+   if (stbi__psd_is16(s))  return 1;
+   #endif
+
+   return 0;
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
+{
+    FILE *f = stbi__fopen(filename, "rb");
+    int result;
+    if (!f) return stbi__err("can't fopen", "Unable to open file");
+    result = stbi_info_from_file(f, x, y, comp);
+    fclose(f);
+    return result;
+}
+
+STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
+{
+   int r;
+   stbi__context s;
+   long pos = ftell(f);
+   stbi__start_file(&s, f);
+   r = stbi__info_main(&s,x,y,comp);
+   fseek(f,pos,SEEK_SET);
+   return r;
+}
+
+STBIDEF int stbi_is_16_bit(char const *filename)
+{
+    FILE *f = stbi__fopen(filename, "rb");
+    int result;
+    if (!f) return stbi__err("can't fopen", "Unable to open file");
+    result = stbi_is_16_bit_from_file(f);
+    fclose(f);
+    return result;
+}
+
+STBIDEF int stbi_is_16_bit_from_file(FILE *f)
+{
+   int r;
+   stbi__context s;
+   long pos = ftell(f);
+   stbi__start_file(&s, f);
+   r = stbi__is_16_main(&s);
+   fseek(f,pos,SEEK_SET);
+   return r;
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__info_main(&s,x,y,comp);
+}
+
+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+   return stbi__info_main(&s,x,y,comp);
+}
+
+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
+{
+   stbi__context s;
+   stbi__start_mem(&s,buffer,len);
+   return stbi__is_16_main(&s);
+}
+
+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
+{
+   stbi__context s;
+   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
+   return stbi__is_16_main(&s);
+}
+
+#endif // STB_IMAGE_IMPLEMENTATION
+
+/*
+   revision history:
+      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
+      2.19  (2018-02-11) fix warning
+      2.18  (2018-01-30) fix warnings
+      2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
+                         1-bit BMP
+                         *_is_16_bit api
+                         avoid warnings
+      2.16  (2017-07-23) all functions have 16-bit variants;
+                         STBI_NO_STDIO works again;
+                         compilation fixes;
+                         fix rounding in unpremultiply;
+                         optimize vertical flip;
+                         disable raw_len validation;
+                         documentation fixes
+      2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
+                         warning fixes; disable run-time SSE detection on gcc;
+                         uniform handling of optional "return" values;
+                         thread-safe initialization of zlib tables
+      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
+      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+      2.11  (2016-04-02) allocate large structures on the stack
+                         remove white matting for transparent PSD
+                         fix reported channel count for PNG & BMP
+                         re-enable SSE2 in non-gcc 64-bit
+                         support RGB-formatted JPEG
+                         read 16-bit PNGs (only as 8-bit)
+      2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
+      2.09  (2016-01-16) allow comments in PNM files
+                         16-bit-per-pixel TGA (not bit-per-component)
+                         info() for TGA could break due to .hdr handling
+                         info() for BMP to shares code instead of sloppy parse
+                         can use STBI_REALLOC_SIZED if allocator doesn't support realloc
+                         code cleanup
+      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
+      2.07  (2015-09-13) fix compiler warnings
+                         partial animated GIF support
+                         limited 16-bpc PSD support
+                         #ifdef unused functions
+                         bug with < 92 byte PIC,PNM,HDR,TGA
+      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
+      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
+      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
+      2.03  (2015-04-12) extra corruption checking (mmozeiko)
+                         stbi_set_flip_vertically_on_load (nguillemot)
+                         fix NEON support; fix mingw support
+      2.02  (2015-01-19) fix incorrect assert, fix warning
+      2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
+      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+      2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
+                         progressive JPEG (stb)
+                         PGM/PPM support (Ken Miller)
+                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
+                         GIF bugfix -- seemingly never worked
+                         STBI_NO_*, STBI_ONLY_*
+      1.48  (2014-12-14) fix incorrectly-named assert()
+      1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
+                         optimize PNG (ryg)
+                         fix bug in interlaced PNG with user-specified channel count (stb)
+      1.46  (2014-08-26)
+              fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
+      1.45  (2014-08-16)
+              fix MSVC-ARM internal compiler error by wrapping malloc
+      1.44  (2014-08-07)
+              various warning fixes from Ronny Chevalier
+      1.43  (2014-07-15)
+              fix MSVC-only compiler problem in code changed in 1.42
+      1.42  (2014-07-09)
+              don't define _CRT_SECURE_NO_WARNINGS (affects user code)
+              fixes to stbi__cleanup_jpeg path
+              added STBI_ASSERT to avoid requiring assert.h
+      1.41  (2014-06-25)
+              fix search&replace from 1.36 that messed up comments/error messages
+      1.40  (2014-06-22)
+              fix gcc struct-initialization warning
+      1.39  (2014-06-15)
+              fix to TGA optimization when req_comp != number of components in TGA;
+              fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
+              add support for BMP version 5 (more ignored fields)
+      1.38  (2014-06-06)
+              suppress MSVC warnings on integer casts truncating values
+              fix accidental rename of 'skip' field of I/O
+      1.37  (2014-06-04)
+              remove duplicate typedef
+      1.36  (2014-06-03)
+              convert to header file single-file library
+              if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
+      1.35  (2014-05-27)
+              various warnings
+              fix broken STBI_SIMD path
+              fix bug where stbi_load_from_file no longer left file pointer in correct place
+              fix broken non-easy path for 32-bit BMP (possibly never used)
+              TGA optimization by Arseny Kapoulkine
+      1.34  (unknown)
+              use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
+      1.33  (2011-07-14)
+              make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
+      1.32  (2011-07-13)
+              support for "info" function for all supported filetypes (SpartanJ)
+      1.31  (2011-06-20)
+              a few more leak fixes, bug in PNG handling (SpartanJ)
+      1.30  (2011-06-11)
+              added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
+              removed deprecated format-specific test/load functions
+              removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
+              error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
+              fix inefficiency in decoding 32-bit BMP (David Woo)
+      1.29  (2010-08-16)
+              various warning fixes from Aurelien Pocheville
+      1.28  (2010-08-01)
+              fix bug in GIF palette transparency (SpartanJ)
+      1.27  (2010-08-01)
+              cast-to-stbi_uc to fix warnings
+      1.26  (2010-07-24)
+              fix bug in file buffering for PNG reported by SpartanJ
+      1.25  (2010-07-17)
+              refix trans_data warning (Won Chun)
+      1.24  (2010-07-12)
+              perf improvements reading from files on platforms with lock-heavy fgetc()
+              minor perf improvements for jpeg
+              deprecated type-specific functions so we'll get feedback if they're needed
+              attempt to fix trans_data warning (Won Chun)
+      1.23    fixed bug in iPhone support
+      1.22  (2010-07-10)
+              removed image *writing* support
+              stbi_info support from Jetro Lauha
+              GIF support from Jean-Marc Lienher
+              iPhone PNG-extensions from James Brown
+              warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
+      1.21    fix use of 'stbi_uc' in header (reported by jon blow)
+      1.20    added support for Softimage PIC, by Tom Seddon
+      1.19    bug in interlaced PNG corruption check (found by ryg)
+      1.18  (2008-08-02)
+              fix a threading bug (local mutable static)
+      1.17    support interlaced PNG
+      1.16    major bugfix - stbi__convert_format converted one too many pixels
+      1.15    initialize some fields for thread safety
+      1.14    fix threadsafe conversion bug
+              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
+      1.13    threadsafe
+      1.12    const qualifiers in the API
+      1.11    Support installable IDCT, colorspace conversion routines
+      1.10    Fixes for 64-bit (don't use "unsigned long")
+              optimized upsampling by Fabian "ryg" Giesen
+      1.09    Fix format-conversion for PSD code (bad global variables!)
+      1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
+      1.07    attempt to fix C++ warning/errors again
+      1.06    attempt to fix C++ warning/errors again
+      1.05    fix TGA loading to return correct *comp and use good luminance calc
+      1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
+      1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
+      1.02    support for (subset of) HDR files, float interface for preferred access to them
+      1.01    fix bug: possible bug in handling right-side up bmps... not sure
+              fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
+      1.00    interface to zlib that skips zlib header
+      0.99    correct handling of alpha in palette
+      0.98    TGA loader by lonesock; dynamically add loaders (untested)
+      0.97    jpeg errors on too large a file; also catch another malloc failure
+      0.96    fix detection of invalid v value - particleman@mollyrocket forum
+      0.95    during header scan, seek to markers in case of padding
+      0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
+      0.93    handle jpegtran output; verbose errors
+      0.92    read 4,8,16,24,32-bit BMP files of several formats
+      0.91    output 24-bit Windows 3.0 BMP files
+      0.90    fix a few more warnings; bump version number to approach 1.0
+      0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
+      0.60    fix compiling as c++
+      0.59    fix warnings: merge Dave Moore's -Wall fixes
+      0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
+      0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
+      0.56    fix bug: zlib uncompressed mode len vs. nlen
+      0.55    fix bug: restart_interval not initialized to 0
+      0.54    allow NULL for 'int *comp'
+      0.53    fix bug in png 3->4; speedup png decoding
+      0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
+      0.51    obey req_comp requests, 1-component jpegs return as 1-component,
+              on 'test' only check type, not whether we support this variant
+      0.50  (2006-11-19)
+              first released version
+*/
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
\ No newline at end of file
diff --git a/third-party/tiny_gltf/stb_image_write.h b/third-party/tiny_gltf/stb_image_write.h
new file mode 100644
index 0000000000..c1e4f3479a
--- /dev/null
+++ b/third-party/tiny_gltf/stb_image_write.h
@@ -0,0 +1,1621 @@
+/* stb_image_write - v1.11 - public domain - http://nothings.org/stb/stb_image_write.h
+   writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
+                                     no warranty implied; use at your own risk
+
+   Before #including,
+
+       #define STB_IMAGE_WRITE_IMPLEMENTATION
+
+   in the file that you want to have the implementation.
+
+   Will probably not work correctly with strict-aliasing optimizations.
+
+   If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause
+   compilation warnings or even errors. To avoid this, also before #including,
+
+       #define STBI_MSC_SECURE_CRT
+
+ABOUT:
+
+   This header file is a library for writing images to C stdio or a callback.
+
+   The PNG output is not optimal; it is 20-50% larger than the file
+   written by a decent optimizing implementation; though providing a custom
+   zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that.
+   This library is designed for source code compactness and simplicity,
+   not optimal image file size or run-time performance.
+
+BUILDING:
+
+   You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.
+   You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace
+   malloc,realloc,free.
+   You can #define STBIW_MEMMOVE() to replace memmove()
+   You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function
+   for PNG compression (instead of the builtin one), it must have the following signature:
+   unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality);
+   The returned data will be freed with STBIW_FREE() (free() by default),
+   so it must be heap allocated with STBIW_MALLOC() (malloc() by default),
+
+UNICODE:
+
+   If compiling for Windows and you wish to use Unicode filenames, compile
+   with
+       #define STBIW_WINDOWS_UTF8
+   and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert
+   Windows wchar_t filenames to utf8.
+
+USAGE:
+
+   There are five functions, one for each image file format:
+
+     int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes);
+     int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
+     int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
+     int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality);
+     int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
+
+     void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically
+
+   There are also five equivalent functions that use an arbitrary write function. You are
+   expected to open/close your file-equivalent before and after calling these:
+
+     int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data, int stride_in_bytes);
+     int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data);
+     int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data);
+     int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
+     int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality);
+
+   where the callback is:
+      void stbi_write_func(void *context, void *data, int size);
+
+   You can configure it with these global variables:
+      int stbi_write_tga_with_rle;             // defaults to true; set to 0 to disable RLE
+      int stbi_write_png_compression_level;    // defaults to 8; set to higher for more compression
+      int stbi_write_force_png_filter;         // defaults to -1; set to 0..5 to force a filter mode
+
+
+   You can define STBI_WRITE_NO_STDIO to disable the file variant of these
+   functions, so the library will not use stdio.h at all. However, this will
+   also disable HDR writing, because it requires stdio for formatted output.
+
+   Each function returns 0 on failure and non-0 on success.
+
+   The functions create an image file defined by the parameters. The image
+   is a rectangle of pixels stored from left-to-right, top-to-bottom.
+   Each pixel contains 'comp' channels of data stored interleaved with 8-bits
+   per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is
+   monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.
+   The *data pointer points to the first byte of the top-left-most pixel.
+   For PNG, "stride_in_bytes" is the distance in bytes from the first byte of
+   a row of pixels to the first byte of the next row of pixels.
+
+   PNG creates output files with the same number of components as the input.
+   The BMP format expands Y to RGB in the file format and does not
+   output alpha.
+
+   PNG supports writing rectangles of data even when the bytes storing rows of
+   data are not consecutive in memory (e.g. sub-rectangles of a larger image),
+   by supplying the stride between the beginning of adjacent rows. The other
+   formats do not. (Thus you cannot write a native-format BMP through the BMP
+   writer, both because it is in BGR order and because it may have padding
+   at the end of the line.)
+
+   PNG allows you to set the deflate compression level by setting the global
+   variable 'stbi_write_png_compression_level' (it defaults to 8).
+
+   HDR expects linear float data. Since the format is always 32-bit rgb(e)
+   data, alpha (if provided) is discarded, and for monochrome data it is
+   replicated across all three channels.
+
+   TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed
+   data, set the global variable 'stbi_write_tga_with_rle' to 0.
+
+   JPEG does ignore alpha channels in input data; quality is between 1 and 100.
+   Higher quality looks better but results in a bigger image.
+   JPEG baseline (no JPEG progressive).
+
+CREDITS:
+
+
+   Sean Barrett           -    PNG/BMP/TGA
+   Baldur Karlsson        -    HDR
+   Jean-Sebastien Guay    -    TGA monochrome
+   Tim Kelsey             -    misc enhancements
+   Alan Hickman           -    TGA RLE
+   Emmanuel Julien        -    initial file IO callback implementation
+   Jon Olick              -    original jo_jpeg.cpp code
+   Daniel Gibson          -    integrate JPEG, allow external zlib
+   Aarni Koskela          -    allow choosing PNG filter
+
+   bugfixes:
+      github:Chribba
+      Guillaume Chereau
+      github:jry2
+      github:romigrou
+      Sergio Gonzalez
+      Jonas Karlsson
+      Filip Wasil
+      Thatcher Ulrich
+      github:poppolopoppo
+      Patrick Boettcher
+      github:xeekworx
+      Cap Petschulat
+      Simon Rodriguez
+      Ivan Tikhonov
+      github:ignotion
+      Adam Schackart
+
+LICENSE
+
+  See end of file for license information.
+
+*/
+
+#ifndef INCLUDE_STB_IMAGE_WRITE_H
+#define INCLUDE_STB_IMAGE_WRITE_H
+
+#include <stdlib.h>
+
+// if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline'
+#ifndef STBIWDEF
+#ifdef STB_IMAGE_WRITE_STATIC
+#define STBIWDEF  static
+#else
+#ifdef __cplusplus
+#define STBIWDEF  extern "C"
+#else
+#define STBIWDEF  extern
+#endif
+#endif
+#endif
+
+#ifndef STB_IMAGE_WRITE_STATIC  // C++ forbids static forward declarations
+extern int stbi_write_tga_with_rle;
+extern int stbi_write_png_compression_level;
+extern int stbi_write_force_png_filter;
+#endif
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void  *data, int stride_in_bytes);
+STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void  *data);
+STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void  *data);
+STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
+STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void  *data, int quality);
+
+#ifdef STBI_WINDOWS_UTF8
+STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
+#endif
+#endif
+
+typedef void stbi_write_func(void *context, void *data, int size);
+
+STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data, int stride_in_bytes);
+STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data);
+STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void  *data);
+STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
+STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void  *data, int quality);
+
+STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean);
+
+#endif//INCLUDE_STB_IMAGE_WRITE_H
+
+#ifdef STB_IMAGE_WRITE_IMPLEMENTATION
+
+#ifdef _WIN32
+   #ifndef _CRT_SECURE_NO_WARNINGS
+   #define _CRT_SECURE_NO_WARNINGS
+   #endif
+   #ifndef _CRT_NONSTDC_NO_DEPRECATE
+   #define _CRT_NONSTDC_NO_DEPRECATE
+   #endif
+#endif
+
+#ifndef STBI_WRITE_NO_STDIO
+#include <stdio.h>
+#endif // STBI_WRITE_NO_STDIO
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))
+// ok
+#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)
+// ok
+#else
+#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."
+#endif
+
+#ifndef STBIW_MALLOC
+#define STBIW_MALLOC(sz)        malloc(sz)
+#define STBIW_REALLOC(p,newsz)  realloc(p,newsz)
+#define STBIW_FREE(p)           free(p)
+#endif
+
+#ifndef STBIW_REALLOC_SIZED
+#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz)
+#endif
+
+
+#ifndef STBIW_MEMMOVE
+#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz)
+#endif
+
+
+#ifndef STBIW_ASSERT
+#include <assert.h>
+#define STBIW_ASSERT(x) assert(x)
+#endif
+
+#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff)
+
+#ifdef STB_IMAGE_WRITE_STATIC
+static int stbi__flip_vertically_on_write=0;
+static int stbi_write_png_compression_level = 8;
+static int stbi_write_tga_with_rle = 1;
+static int stbi_write_force_png_filter = -1;
+#else
+int stbi_write_png_compression_level = 8;
+int stbi__flip_vertically_on_write=0;
+int stbi_write_tga_with_rle = 1;
+int stbi_write_force_png_filter = -1;
+#endif
+
+STBIWDEF void stbi_flip_vertically_on_write(int flag)
+{
+   stbi__flip_vertically_on_write = flag;
+}
+
+typedef struct
+{
+   stbi_write_func *func;
+   void *context;
+} stbi__write_context;
+
+// initialize a callback-based context
+static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context)
+{
+   s->func    = c;
+   s->context = context;
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+
+static void stbi__stdio_write(void *context, void *data, int size)
+{
+   fwrite(data,1,size,(FILE*) context);
+}
+
+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+#ifdef __cplusplus
+#define STBIW_EXTERN extern "C"
+#else
+#define STBIW_EXTERN extern
+#endif
+STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
+STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
+
+STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
+{
+	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, bufferlen, NULL, NULL);
+}
+#endif
+
+static FILE *stbiw__fopen(char const *filename, char const *mode)
+{
+   FILE *f;
+#if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
+   wchar_t wMode[64];
+   wchar_t wFilename[1024];
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
+      return 0;
+
+	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
+      return 0;
+
+#if _MSC_VER >= 1400
+	if (0 != _wfopen_s(&f, wFilename, wMode))
+		f = 0;
+#else
+   f = _wfopen(wFilename, wMode);
+#endif
+
+#elif defined(_MSC_VER) && _MSC_VER >= 1400
+   if (0 != fopen_s(&f, filename, mode))
+      f=0;
+#else
+   f = fopen(filename, mode);
+#endif
+   return f;
+}
+
+static int stbi__start_write_file(stbi__write_context *s, const char *filename)
+{
+   FILE *f = stbiw__fopen(filename, "wb");
+   stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f);
+   return f != NULL;
+}
+
+static void stbi__end_write_file(stbi__write_context *s)
+{
+   fclose((FILE *)s->context);
+}
+
+#endif // !STBI_WRITE_NO_STDIO
+
+typedef unsigned int stbiw_uint32;
+typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1];
+
+static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v)
+{
+   while (*fmt) {
+      switch (*fmt++) {
+         case ' ': break;
+         case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int));
+                     s->func(s->context,&x,1);
+                     break; }
+         case '2': { int x = va_arg(v,int);
+                     unsigned char b[2];
+                     b[0] = STBIW_UCHAR(x);
+                     b[1] = STBIW_UCHAR(x>>8);
+                     s->func(s->context,b,2);
+                     break; }
+         case '4': { stbiw_uint32 x = va_arg(v,int);
+                     unsigned char b[4];
+                     b[0]=STBIW_UCHAR(x);
+                     b[1]=STBIW_UCHAR(x>>8);
+                     b[2]=STBIW_UCHAR(x>>16);
+                     b[3]=STBIW_UCHAR(x>>24);
+                     s->func(s->context,b,4);
+                     break; }
+         default:
+            STBIW_ASSERT(0);
+            return;
+      }
+   }
+}
+
+static void stbiw__writef(stbi__write_context *s, const char *fmt, ...)
+{
+   va_list v;
+   va_start(v, fmt);
+   stbiw__writefv(s, fmt, v);
+   va_end(v);
+}
+
+static void stbiw__putc(stbi__write_context *s, unsigned char c)
+{
+   s->func(s->context, &c, 1);
+}
+
+static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c)
+{
+   unsigned char arr[3];
+   arr[0] = a, arr[1] = b, arr[2] = c;
+   s->func(s->context, arr, 3);
+}
+
+static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d)
+{
+   unsigned char bg[3] = { 255, 0, 255}, px[3];
+   int k;
+
+   if (write_alpha < 0)
+      s->func(s->context, &d[comp - 1], 1);
+
+   switch (comp) {
+      case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case
+      case 1:
+         if (expand_mono)
+            stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp
+         else
+            s->func(s->context, d, 1);  // monochrome TGA
+         break;
+      case 4:
+         if (!write_alpha) {
+            // composite against pink background
+            for (k = 0; k < 3; ++k)
+               px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;
+            stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);
+            break;
+         }
+         /* FALLTHROUGH */
+      case 3:
+         stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);
+         break;
+   }
+   if (write_alpha > 0)
+      s->func(s->context, &d[comp - 1], 1);
+}
+
+static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono)
+{
+   stbiw_uint32 zero = 0;
+   int i,j, j_end;
+
+   if (y <= 0)
+      return;
+
+   if (stbi__flip_vertically_on_write)
+      vdir *= -1;
+
+   if (vdir < 0)
+      j_end = -1, j = y-1;
+   else
+      j_end =  y, j = 0;
+
+   for (; j != j_end; j += vdir) {
+      for (i=0; i < x; ++i) {
+         unsigned char *d = (unsigned char *) data + (j*x+i)*comp;
+         stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);
+      }
+      s->func(s->context, &zero, scanline_pad);
+   }
+}
+
+static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...)
+{
+   if (y < 0 || x < 0) {
+      return 0;
+   } else {
+      va_list v;
+      va_start(v, fmt);
+      stbiw__writefv(s, fmt, v);
+      va_end(v);
+      stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono);
+      return 1;
+   }
+}
+
+static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data)
+{
+   int pad = (-x*3) & 3;
+   return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad,
+           "11 4 22 4" "4 44 22 444444",
+           'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40,  // file header
+            40, x,y, 1,24, 0,0,0,0,0,0);             // bitmap header
+}
+
+STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
+{
+   stbi__write_context s;
+   stbi__start_write_callbacks(&s, func, context);
+   return stbi_write_bmp_core(&s, x, y, comp, data);
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data)
+{
+   stbi__write_context s;
+   if (stbi__start_write_file(&s,filename)) {
+      int r = stbi_write_bmp_core(&s, x, y, comp, data);
+      stbi__end_write_file(&s);
+      return r;
+   } else
+      return 0;
+}
+#endif //!STBI_WRITE_NO_STDIO
+
+static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data)
+{
+   int has_alpha = (comp == 2 || comp == 4);
+   int colorbytes = has_alpha ? comp-1 : comp;
+   int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3
+
+   if (y < 0 || x < 0)
+      return 0;
+
+   if (!stbi_write_tga_with_rle) {
+      return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0,
+         "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);
+   } else {
+      int i,j,k;
+      int jend, jdir;
+
+      stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8);
+
+      if (stbi__flip_vertically_on_write) {
+         j = 0;
+         jend = y;
+         jdir = 1;
+      } else {
+         j = y-1;
+         jend = -1;
+         jdir = -1;
+      }
+      for (; j != jend; j += jdir) {
+         unsigned char *row = (unsigned char *) data + j * x * comp;
+         int len;
+
+         for (i = 0; i < x; i += len) {
+            unsigned char *begin = row + i * comp;
+            int diff = 1;
+            len = 1;
+
+            if (i < x - 1) {
+               ++len;
+               diff = memcmp(begin, row + (i + 1) * comp, comp);
+               if (diff) {
+                  const unsigned char *prev = begin;
+                  for (k = i + 2; k < x && len < 128; ++k) {
+                     if (memcmp(prev, row + k * comp, comp)) {
+                        prev += comp;
+                        ++len;
+                     } else {
+                        --len;
+                        break;
+                     }
+                  }
+               } else {
+                  for (k = i + 2; k < x && len < 128; ++k) {
+                     if (!memcmp(begin, row + k * comp, comp)) {
+                        ++len;
+                     } else {
+                        break;
+                     }
+                  }
+               }
+            }
+
+            if (diff) {
+               unsigned char header = STBIW_UCHAR(len - 1);
+               s->func(s->context, &header, 1);
+               for (k = 0; k < len; ++k) {
+                  stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);
+               }
+            } else {
+               unsigned char header = STBIW_UCHAR(len - 129);
+               s->func(s->context, &header, 1);
+               stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);
+            }
+         }
+      }
+   }
+   return 1;
+}
+
+STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
+{
+   stbi__write_context s;
+   stbi__start_write_callbacks(&s, func, context);
+   return stbi_write_tga_core(&s, x, y, comp, (void *) data);
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data)
+{
+   stbi__write_context s;
+   if (stbi__start_write_file(&s,filename)) {
+      int r = stbi_write_tga_core(&s, x, y, comp, (void *) data);
+      stbi__end_write_file(&s);
+      return r;
+   } else
+      return 0;
+}
+#endif
+
+// *************************************************************************************************
+// Radiance RGBE HDR writer
+// by Baldur Karlsson
+
+#define stbiw__max(a, b)  ((a) > (b) ? (a) : (b))
+
+static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
+{
+   int exponent;
+   float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));
+
+   if (maxcomp < 1e-32f) {
+      rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;
+   } else {
+      float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp;
+
+      rgbe[0] = (unsigned char)(linear[0] * normalize);
+      rgbe[1] = (unsigned char)(linear[1] * normalize);
+      rgbe[2] = (unsigned char)(linear[2] * normalize);
+      rgbe[3] = (unsigned char)(exponent + 128);
+   }
+}
+
+static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte)
+{
+   unsigned char lengthbyte = STBIW_UCHAR(length+128);
+   STBIW_ASSERT(length+128 <= 255);
+   s->func(s->context, &lengthbyte, 1);
+   s->func(s->context, &databyte, 1);
+}
+
+static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data)
+{
+   unsigned char lengthbyte = STBIW_UCHAR(length);
+   STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code
+   s->func(s->context, &lengthbyte, 1);
+   s->func(s->context, data, length);
+}
+
+static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline)
+{
+   unsigned char scanlineheader[4] = { 2, 2, 0, 0 };
+   unsigned char rgbe[4];
+   float linear[3];
+   int x;
+
+   scanlineheader[2] = (width&0xff00)>>8;
+   scanlineheader[3] = (width&0x00ff);
+
+   /* skip RLE for images too small or large */
+   if (width < 8 || width >= 32768) {
+      for (x=0; x < width; x++) {
+         switch (ncomp) {
+            case 4: /* fallthrough */
+            case 3: linear[2] = scanline[x*ncomp + 2];
+                    linear[1] = scanline[x*ncomp + 1];
+                    linear[0] = scanline[x*ncomp + 0];
+                    break;
+            default:
+                    linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
+                    break;
+         }
+         stbiw__linear_to_rgbe(rgbe, linear);
+         s->func(s->context, rgbe, 4);
+      }
+   } else {
+      int c,r;
+      /* encode into scratch buffer */
+      for (x=0; x < width; x++) {
+         switch(ncomp) {
+            case 4: /* fallthrough */
+            case 3: linear[2] = scanline[x*ncomp + 2];
+                    linear[1] = scanline[x*ncomp + 1];
+                    linear[0] = scanline[x*ncomp + 0];
+                    break;
+            default:
+                    linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
+                    break;
+         }
+         stbiw__linear_to_rgbe(rgbe, linear);
+         scratch[x + width*0] = rgbe[0];
+         scratch[x + width*1] = rgbe[1];
+         scratch[x + width*2] = rgbe[2];
+         scratch[x + width*3] = rgbe[3];
+      }
+
+      s->func(s->context, scanlineheader, 4);
+
+      /* RLE each component separately */
+      for (c=0; c < 4; c++) {
+         unsigned char *comp = &scratch[width*c];
+
+         x = 0;
+         while (x < width) {
+            // find first run
+            r = x;
+            while (r+2 < width) {
+               if (comp[r] == comp[r+1] && comp[r] == comp[r+2])
+                  break;
+               ++r;
+            }
+            if (r+2 >= width)
+               r = width;
+            // dump up to first run
+            while (x < r) {
+               int len = r-x;
+               if (len > 128) len = 128;
+               stbiw__write_dump_data(s, len, &comp[x]);
+               x += len;
+            }
+            // if there's a run, output it
+            if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd
+               // find next byte after run
+               while (r < width && comp[r] == comp[x])
+                  ++r;
+               // output run up to r
+               while (x < r) {
+                  int len = r-x;
+                  if (len > 127) len = 127;
+                  stbiw__write_run_data(s, len, comp[x]);
+                  x += len;
+               }
+            }
+         }
+      }
+   }
+}
+
+static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data)
+{
+   if (y <= 0 || x <= 0 || data == NULL)
+      return 0;
+   else {
+      // Each component is stored separately. Allocate scratch space for full output scanline.
+      unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4);
+      int i, len;
+      char buffer[128];
+      char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";
+      s->func(s->context, header, sizeof(header)-1);
+
+#ifdef STBI_MSC_SECURE_CRT
+      len = sprintf_s(buffer, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);
+#else
+      len = sprintf(buffer, "EXPOSURE=          1.0000000000000\n\n-Y %d +X %d\n", y, x);
+#endif
+      s->func(s->context, buffer, len);
+
+      for(i=0; i < y; i++)
+         stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i));
+      STBIW_FREE(scratch);
+      return 1;
+   }
+}
+
+STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data)
+{
+   stbi__write_context s;
+   stbi__start_write_callbacks(&s, func, context);
+   return stbi_write_hdr_core(&s, x, y, comp, (float *) data);
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
+{
+   stbi__write_context s;
+   if (stbi__start_write_file(&s,filename)) {
+      int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data);
+      stbi__end_write_file(&s);
+      return r;
+   } else
+      return 0;
+}
+#endif // STBI_WRITE_NO_STDIO
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// PNG writer
+//
+
+#ifndef STBIW_ZLIB_COMPRESS
+// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size()
+#define stbiw__sbraw(a) ((int *) (a) - 2)
+#define stbiw__sbm(a)   stbiw__sbraw(a)[0]
+#define stbiw__sbn(a)   stbiw__sbraw(a)[1]
+
+#define stbiw__sbneedgrow(a,n)  ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a))
+#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0)
+#define stbiw__sbgrow(a,n)  stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a)))
+
+#define stbiw__sbpush(a, v)      (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v))
+#define stbiw__sbcount(a)        ((a) ? stbiw__sbn(a) : 0)
+#define stbiw__sbfree(a)         ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0)
+
+static void *stbiw__sbgrowf(void **arr, int increment, int itemsize)
+{
+   int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1;
+   void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2);
+   STBIW_ASSERT(p);
+   if (p) {
+      if (!*arr) ((int *) p)[1] = 0;
+      *arr = (void *) ((int *) p + 2);
+      stbiw__sbm(*arr) = m;
+   }
+   return *arr;
+}
+
+static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount)
+{
+   while (*bitcount >= 8) {
+      stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));
+      *bitbuffer >>= 8;
+      *bitcount -= 8;
+   }
+   return data;
+}
+
+static int stbiw__zlib_bitrev(int code, int codebits)
+{
+   int res=0;
+   while (codebits--) {
+      res = (res << 1) | (code & 1);
+      code >>= 1;
+   }
+   return res;
+}
+
+static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit)
+{
+   int i;
+   for (i=0; i < limit && i < 258; ++i)
+      if (a[i] != b[i]) break;
+   return i;
+}
+
+static unsigned int stbiw__zhash(unsigned char *data)
+{
+   stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);
+   hash ^= hash << 3;
+   hash += hash >> 5;
+   hash ^= hash << 4;
+   hash += hash >> 17;
+   hash ^= hash << 25;
+   hash += hash >> 6;
+   return hash;
+}
+
+#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))
+#define stbiw__zlib_add(code,codebits) \
+      (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())
+#define stbiw__zlib_huffa(b,c)  stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c)
+// default huffman tables
+#define stbiw__zlib_huff1(n)  stbiw__zlib_huffa(0x30 + (n), 8)
+#define stbiw__zlib_huff2(n)  stbiw__zlib_huffa(0x190 + (n)-144, 9)
+#define stbiw__zlib_huff3(n)  stbiw__zlib_huffa(0 + (n)-256,7)
+#define stbiw__zlib_huff4(n)  stbiw__zlib_huffa(0xc0 + (n)-280,8)
+#define stbiw__zlib_huff(n)  ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))
+#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))
+
+#define stbiw__ZHASH   16384
+
+#endif // STBIW_ZLIB_COMPRESS
+
+STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality)
+{
+#ifdef STBIW_ZLIB_COMPRESS
+   // user provided a zlib compress implementation, use that
+   return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality);
+#else // use builtin
+   static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 };
+   static unsigned char  lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,  5,  5,  5,  0 };
+   static unsigned short distc[]   = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 };
+   static unsigned char  disteb[]  = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
+   unsigned int bitbuf=0;
+   int i,j, bitcount=0;
+   unsigned char *out = NULL;
+   unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**));
+   if (hash_table == NULL)
+      return NULL;
+   if (quality < 5) quality = 5;
+
+   stbiw__sbpush(out, 0x78);   // DEFLATE 32K window
+   stbiw__sbpush(out, 0x5e);   // FLEVEL = 1
+   stbiw__zlib_add(1,1);  // BFINAL = 1
+   stbiw__zlib_add(1,2);  // BTYPE = 1 -- fixed huffman
+
+   for (i=0; i < stbiw__ZHASH; ++i)
+      hash_table[i] = NULL;
+
+   i=0;
+   while (i < data_len-3) {
+      // hash next 3 bytes of data to be compressed
+      int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3;
+      unsigned char *bestloc = 0;
+      unsigned char **hlist = hash_table[h];
+      int n = stbiw__sbcount(hlist);
+      for (j=0; j < n; ++j) {
+         if (hlist[j]-data > i-32768) { // if entry lies within window
+            int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i);
+            if (d >= best) best=d,bestloc=hlist[j];
+         }
+      }
+      // when hash table entry is too long, delete half the entries
+      if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) {
+         STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality);
+         stbiw__sbn(hash_table[h]) = quality;
+      }
+      stbiw__sbpush(hash_table[h],data+i);
+
+      if (bestloc) {
+         // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal
+         h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1);
+         hlist = hash_table[h];
+         n = stbiw__sbcount(hlist);
+         for (j=0; j < n; ++j) {
+            if (hlist[j]-data > i-32767) {
+               int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1);
+               if (e > best) { // if next match is better, bail on current match
+                  bestloc = NULL;
+                  break;
+               }
+            }
+         }
+      }
+
+      if (bestloc) {
+         int d = (int) (data+i - bestloc); // distance back
+         STBIW_ASSERT(d <= 32767 && best <= 258);
+         for (j=0; best > lengthc[j+1]-1; ++j);
+         stbiw__zlib_huff(j+257);
+         if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]);
+         for (j=0; d > distc[j+1]-1; ++j);
+         stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5);
+         if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]);
+         i += best;
+      } else {
+         stbiw__zlib_huffb(data[i]);
+         ++i;
+      }
+   }
+   // write out final bytes
+   for (;i < data_len; ++i)
+      stbiw__zlib_huffb(data[i]);
+   stbiw__zlib_huff(256); // end of block
+   // pad with 0 bits to byte boundary
+   while (bitcount)
+      stbiw__zlib_add(0,1);
+
+   for (i=0; i < stbiw__ZHASH; ++i)
+      (void) stbiw__sbfree(hash_table[i]);
+   STBIW_FREE(hash_table);
+
+   {
+      // compute adler32 on input
+      unsigned int s1=1, s2=0;
+      int blocklen = (int) (data_len % 5552);
+      j=0;
+      while (j < data_len) {
+         for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1;
+         s1 %= 65521, s2 %= 65521;
+         j += blocklen;
+         blocklen = 5552;
+      }
+      stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));
+      stbiw__sbpush(out, STBIW_UCHAR(s2));
+      stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));
+      stbiw__sbpush(out, STBIW_UCHAR(s1));
+   }
+   *out_len = stbiw__sbn(out);
+   // make returned pointer freeable
+   STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);
+   return (unsigned char *) stbiw__sbraw(out);
+#endif // STBIW_ZLIB_COMPRESS
+}
+
+static unsigned int stbiw__crc32(unsigned char *buffer, int len)
+{
+#ifdef STBIW_CRC32
+    return STBIW_CRC32(buffer, len);
+#else
+   static unsigned int crc_table[256] =
+   {
+      0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
+      0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
+      0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+      0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
+      0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
+      0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+      0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+      0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
+      0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+      0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
+      0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
+      0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+      0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
+      0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+      0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+      0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
+      0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
+      0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+      0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
+      0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
+      0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+      0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
+      0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
+      0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+      0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
+      0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
+      0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+      0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+      0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
+      0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+      0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
+      0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+   };
+
+   unsigned int crc = ~0u;
+   int i;
+   for (i=0; i < len; ++i)
+      crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];
+   return ~crc;
+#endif
+}
+
+#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4)
+#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v));
+#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3])
+
+static void stbiw__wpcrc(unsigned char **data, int len)
+{
+   unsigned int crc = stbiw__crc32(*data - len - 4, len+4);
+   stbiw__wp32(*data, crc);
+}
+
+static unsigned char stbiw__paeth(int a, int b, int c)
+{
+   int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c);
+   if (pa <= pb && pa <= pc) return STBIW_UCHAR(a);
+   if (pb <= pc) return STBIW_UCHAR(b);
+   return STBIW_UCHAR(c);
+}
+
+// @OPTIMIZE: provide an option that always forces left-predict or paeth predict
+static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer)
+{
+   static int mapping[] = { 0,1,2,3,4 };
+   static int firstmap[] = { 0,1,0,5,6 };
+   int *mymap = (y != 0) ? mapping : firstmap;
+   int i;
+   int type = mymap[filter_type];
+   unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y);
+   int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes;
+
+   if (type==0) {
+      memcpy(line_buffer, z, width*n);
+      return;
+   }
+
+   // first loop isn't optimized since it's just one pixel
+   for (i = 0; i < n; ++i) {
+      switch (type) {
+         case 1: line_buffer[i] = z[i]; break;
+         case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break;
+         case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break;
+         case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break;
+         case 5: line_buffer[i] = z[i]; break;
+         case 6: line_buffer[i] = z[i]; break;
+      }
+   }
+   switch (type) {
+      case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break;
+      case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break;
+      case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break;
+      case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break;
+      case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break;
+      case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break;
+   }
+}
+
+STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len)
+{
+   int force_filter = stbi_write_force_png_filter;
+   int ctype[5] = { -1, 0, 4, 2, 6 };
+   unsigned char sig[8] = { 137,80,78,71,13,10,26,10 };
+   unsigned char *out,*o, *filt, *zlib;
+   signed char *line_buffer;
+   int j,zlen;
+
+   if (stride_bytes == 0)
+      stride_bytes = x * n;
+
+   if (force_filter >= 5) {
+      force_filter = -1;
+   }
+
+   filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0;
+   line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; }
+   for (j=0; j < y; ++j) {
+      int filter_type;
+      if (force_filter > -1) {
+         filter_type = force_filter;
+         stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer);
+      } else { // Estimate the best filter by running through all of them:
+         int best_filter = 0, best_filter_val = 0x7fffffff, est, i;
+         for (filter_type = 0; filter_type < 5; filter_type++) {
+            stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer);
+
+            // Estimate the entropy of the line using this filter; the less, the better.
+            est = 0;
+            for (i = 0; i < x*n; ++i) {
+               est += abs((signed char) line_buffer[i]);
+            }
+            if (est < best_filter_val) {
+               best_filter_val = est;
+               best_filter = filter_type;
+            }
+         }
+         if (filter_type != best_filter) {  // If the last iteration already got us the best filter, don't redo it
+            stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer);
+            filter_type = best_filter;
+         }
+      }
+      // when we get here, filter_type contains the filter type, and line_buffer contains the data
+      filt[j*(x*n+1)] = (unsigned char) filter_type;
+      STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n);
+   }
+   STBIW_FREE(line_buffer);
+   zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level);
+   STBIW_FREE(filt);
+   if (!zlib) return 0;
+
+   // each tag requires 12 bytes of overhead
+   out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12);
+   if (!out) return 0;
+   *out_len = 8 + 12+13 + 12+zlen + 12;
+
+   o=out;
+   STBIW_MEMMOVE(o,sig,8); o+= 8;
+   stbiw__wp32(o, 13); // header length
+   stbiw__wptag(o, "IHDR");
+   stbiw__wp32(o, x);
+   stbiw__wp32(o, y);
+   *o++ = 8;
+   *o++ = STBIW_UCHAR(ctype[n]);
+   *o++ = 0;
+   *o++ = 0;
+   *o++ = 0;
+   stbiw__wpcrc(&o,13);
+
+   stbiw__wp32(o, zlen);
+   stbiw__wptag(o, "IDAT");
+   STBIW_MEMMOVE(o, zlib, zlen);
+   o += zlen;
+   STBIW_FREE(zlib);
+   stbiw__wpcrc(&o, zlen);
+
+   stbiw__wp32(o,0);
+   stbiw__wptag(o, "IEND");
+   stbiw__wpcrc(&o,0);
+
+   STBIW_ASSERT(o == out + *out_len);
+
+   return out;
+}
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes)
+{
+   FILE *f;
+   int len;
+   unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len);
+   if (png == NULL) return 0;
+
+   f = stbiw__fopen(filename, "wb");
+   if (!f) { STBIW_FREE(png); return 0; }
+   fwrite(png, 1, len, f);
+   fclose(f);
+   STBIW_FREE(png);
+   return 1;
+}
+#endif
+
+STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes)
+{
+   int len;
+   unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len);
+   if (png == NULL) return 0;
+   func(context, png, len);
+   STBIW_FREE(png);
+   return 1;
+}
+
+
+/* ***************************************************************************
+ *
+ * JPEG writer
+ *
+ * This is based on Jon Olick's jo_jpeg.cpp:
+ * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html
+ */
+
+static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,
+      24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 };
+
+static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) {
+   int bitBuf = *bitBufP, bitCnt = *bitCntP;
+   bitCnt += bs[1];
+   bitBuf |= bs[0] << (24 - bitCnt);
+   while(bitCnt >= 8) {
+      unsigned char c = (bitBuf >> 16) & 255;
+      stbiw__putc(s, c);
+      if(c == 255) {
+         stbiw__putc(s, 0);
+      }
+      bitBuf <<= 8;
+      bitCnt -= 8;
+   }
+   *bitBufP = bitBuf;
+   *bitCntP = bitCnt;
+}
+
+static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) {
+   float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;
+   float z1, z2, z3, z4, z5, z11, z13;
+
+   float tmp0 = d0 + d7;
+   float tmp7 = d0 - d7;
+   float tmp1 = d1 + d6;
+   float tmp6 = d1 - d6;
+   float tmp2 = d2 + d5;
+   float tmp5 = d2 - d5;
+   float tmp3 = d3 + d4;
+   float tmp4 = d3 - d4;
+
+   // Even part
+   float tmp10 = tmp0 + tmp3;   // phase 2
+   float tmp13 = tmp0 - tmp3;
+   float tmp11 = tmp1 + tmp2;
+   float tmp12 = tmp1 - tmp2;
+
+   d0 = tmp10 + tmp11;       // phase 3
+   d4 = tmp10 - tmp11;
+
+   z1 = (tmp12 + tmp13) * 0.707106781f; // c4
+   d2 = tmp13 + z1;       // phase 5
+   d6 = tmp13 - z1;
+
+   // Odd part
+   tmp10 = tmp4 + tmp5;       // phase 2
+   tmp11 = tmp5 + tmp6;
+   tmp12 = tmp6 + tmp7;
+
+   // The rotator is modified from fig 4-8 to avoid extra negations.
+   z5 = (tmp10 - tmp12) * 0.382683433f; // c6
+   z2 = tmp10 * 0.541196100f + z5; // c2-c6
+   z4 = tmp12 * 1.306562965f + z5; // c2+c6
+   z3 = tmp11 * 0.707106781f; // c4
+
+   z11 = tmp7 + z3;      // phase 5
+   z13 = tmp7 - z3;
+
+   *d5p = z13 + z2;         // phase 6
+   *d3p = z13 - z2;
+   *d1p = z11 + z4;
+   *d7p = z11 - z4;
+
+   *d0p = d0;  *d2p = d2;  *d4p = d4;  *d6p = d6;
+}
+
+static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {
+   int tmp1 = val < 0 ? -val : val;
+   val = val < 0 ? val-1 : val;
+   bits[1] = 1;
+   while(tmp1 >>= 1) {
+      ++bits[1];
+   }
+   bits[0] = val & ((1<<bits[1])-1);
+}
+
+static int stbiw__jpg_processDU(stbi__write_context *s, int *bitBuf, int *bitCnt, float *CDU, float *fdtbl, int DC, const unsigned short HTDC[256][2], const unsigned short HTAC[256][2]) {
+   const unsigned short EOB[2] = { HTAC[0x00][0], HTAC[0x00][1] };
+   const unsigned short M16zeroes[2] = { HTAC[0xF0][0], HTAC[0xF0][1] };
+   int dataOff, i, diff, end0pos;
+   int DU[64];
+
+   // DCT rows
+   for(dataOff=0; dataOff<64; dataOff+=8) {
+      stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+1], &CDU[dataOff+2], &CDU[dataOff+3], &CDU[dataOff+4], &CDU[dataOff+5], &CDU[dataOff+6], &CDU[dataOff+7]);
+   }
+   // DCT columns
+   for(dataOff=0; dataOff<8; ++dataOff) {
+      stbiw__jpg_DCT(&CDU[dataOff], &CDU[dataOff+8], &CDU[dataOff+16], &CDU[dataOff+24], &CDU[dataOff+32], &CDU[dataOff+40], &CDU[dataOff+48], &CDU[dataOff+56]);
+   }
+   // Quantize/descale/zigzag the coefficients
+   for(i=0; i<64; ++i) {
+      float v = CDU[i]*fdtbl[i];
+      // DU[stbiw__jpg_ZigZag[i]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f));
+      // ceilf() and floorf() are C99, not C89, but I /think/ they're not needed here anyway?
+      DU[stbiw__jpg_ZigZag[i]] = (int)(v < 0 ? v - 0.5f : v + 0.5f);
+   }
+
+   // Encode DC
+   diff = DU[0] - DC;
+   if (diff == 0) {
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[0]);
+   } else {
+      unsigned short bits[2];
+      stbiw__jpg_calcBits(diff, bits);
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTDC[bits[1]]);
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
+   }
+   // Encode ACs
+   end0pos = 63;
+   for(; (end0pos>0)&&(DU[end0pos]==0); --end0pos) {
+   }
+   // end0pos = first element in reverse order !=0
+   if(end0pos == 0) {
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
+      return DU[0];
+   }
+   for(i = 1; i <= end0pos; ++i) {
+      int startpos = i;
+      int nrzeroes;
+      unsigned short bits[2];
+      for (; DU[i]==0 && i<=end0pos; ++i) {
+      }
+      nrzeroes = i-startpos;
+      if ( nrzeroes >= 16 ) {
+         int lng = nrzeroes>>4;
+         int nrmarker;
+         for (nrmarker=1; nrmarker <= lng; ++nrmarker)
+            stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);
+         nrzeroes &= 15;
+      }
+      stbiw__jpg_calcBits(DU[i], bits);
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]);
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
+   }
+   if(end0pos != 63) {
+      stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
+   }
+   return DU[0];
+}
+
+static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
+   // Constants that don't pollute global namespace
+   static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
+   static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
+   static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d};
+   static const unsigned char std_ac_luminance_values[] = {
+      0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,
+      0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,
+      0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,
+      0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
+      0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,
+      0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,
+      0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
+   };
+   static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0};
+   static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
+   static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77};
+   static const unsigned char std_ac_chrominance_values[] = {
+      0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,
+      0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,
+      0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,
+      0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
+      0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,
+      0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,
+      0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
+   };
+   // Huffman tables
+   static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}};
+   static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}};
+   static const unsigned short YAC_HT[256][2] = {
+      {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
+   };
+   static const unsigned short UVAC_HT[256][2] = {
+      {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0},
+      {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
+   };
+   static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22,
+                             37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99};
+   static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99,
+                              99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99};
+   static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,
+                                 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f };
+
+   int row, col, i, k;
+   float fdtbl_Y[64], fdtbl_UV[64];
+   unsigned char YTable[64], UVTable[64];
+
+   if(!data || !width || !height || comp > 4 || comp < 1) {
+      return 0;
+   }
+
+   quality = quality ? quality : 90;
+   quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;
+   quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
+
+   for(i = 0; i < 64; ++i) {
+      int uvti, yti = (YQT[i]*quality+50)/100;
+      YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti);
+      uvti = (UVQT[i]*quality+50)/100;
+      UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);
+   }
+
+   for(row = 0, k = 0; row < 8; ++row) {
+      for(col = 0; col < 8; ++col, ++k) {
+         fdtbl_Y[k]  = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
+         fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
+      }
+   }
+
+   // Write Headers
+   {
+      static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 };
+      static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 };
+      const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width),
+                                      3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 };
+      s->func(s->context, (void*)head0, sizeof(head0));
+      s->func(s->context, (void*)YTable, sizeof(YTable));
+      stbiw__putc(s, 1);
+      s->func(s->context, UVTable, sizeof(UVTable));
+      s->func(s->context, (void*)head1, sizeof(head1));
+      s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
+      s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
+      stbiw__putc(s, 0x10); // HTYACinfo
+      s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1);
+      s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));
+      stbiw__putc(s, 1); // HTUDCinfo
+      s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1);
+      s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));
+      stbiw__putc(s, 0x11); // HTUACinfo
+      s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1);
+      s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));
+      s->func(s->context, (void*)head2, sizeof(head2));
+   }
+
+   // Encode 8x8 macroblocks
+   {
+      static const unsigned short fillBits[] = {0x7F, 7};
+      const unsigned char *imageData = (const unsigned char *)data;
+      int DCY=0, DCU=0, DCV=0;
+      int bitBuf=0, bitCnt=0;
+      // comp == 2 is grey+alpha (alpha is ignored)
+      int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;
+      int x, y, pos;
+      for(y = 0; y < height; y += 8) {
+         for(x = 0; x < width; x += 8) {
+            float YDU[64], UDU[64], VDU[64];
+            for(row = y, pos = 0; row < y+8; ++row) {
+               // row >= height => use last input row
+               int clamped_row = (row < height) ? row : height - 1;
+               int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp;
+               for(col = x; col < x+8; ++col, ++pos) {
+                  float r, g, b;
+                  // if col >= width => use pixel from last input column
+                  int p = base_p + ((col < width) ? col : (width-1))*comp;
+
+                  r = imageData[p+0];
+                  g = imageData[p+ofsG];
+                  b = imageData[p+ofsB];
+                  YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128;
+                  UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b;
+                  VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b;
+               }
+            }
+
+            DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT);
+            DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
+            DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
+         }
+      }
+
+      // Do the bit alignment of the EOI marker
+      stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);
+   }
+
+   // EOI
+   stbiw__putc(s, 0xFF);
+   stbiw__putc(s, 0xD9);
+
+   return 1;
+}
+
+STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality)
+{
+   stbi__write_context s;
+   stbi__start_write_callbacks(&s, func, context);
+   return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
+}
+
+
+#ifndef STBI_WRITE_NO_STDIO
+STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
+{
+   stbi__write_context s;
+   if (stbi__start_write_file(&s,filename)) {
+      int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
+      stbi__end_write_file(&s);
+      return r;
+   } else
+      return 0;
+}
+#endif
+
+#endif // STB_IMAGE_WRITE_IMPLEMENTATION
+
+/* Revision history
+      1.10  (2019-02-07)
+             support utf8 filenames in Windows; fix warnings and platform ifdefs
+      1.09  (2018-02-11)
+             fix typo in zlib quality API, improve STB_I_W_STATIC in C++
+      1.08  (2018-01-29)
+             add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter
+      1.07  (2017-07-24)
+             doc fix
+      1.06 (2017-07-23)
+             writing JPEG (using Jon Olick's code)
+      1.05   ???
+      1.04 (2017-03-03)
+             monochrome BMP expansion
+      1.03   ???
+      1.02 (2016-04-02)
+             avoid allocating large structures on the stack
+      1.01 (2016-01-16)
+             STBIW_REALLOC_SIZED: support allocators with no realloc support
+             avoid race-condition in crc initialization
+             minor compile issues
+      1.00 (2015-09-14)
+             installable file IO function
+      0.99 (2015-09-13)
+             warning fixes; TGA rle support
+      0.98 (2015-04-08)
+             added STBIW_MALLOC, STBIW_ASSERT etc
+      0.97 (2015-01-18)
+             fixed HDR asserts, rewrote HDR rle logic
+      0.96 (2015-01-17)
+             add HDR output
+             fix monochrome BMP
+      0.95 (2014-08-17)
+		       add monochrome TGA output
+      0.94 (2014-05-31)
+             rename private functions to avoid conflicts with stb_image.h
+      0.93 (2014-05-27)
+             warning fixes
+      0.92 (2010-08-01)
+             casts to unsigned char to fix warnings
+      0.91 (2010-07-17)
+             first public release
+      0.90   first internal release
+*/
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
\ No newline at end of file
diff --git a/third-party/tiny_gltf/tiny_gltf.cpp b/third-party/tiny_gltf/tiny_gltf.cpp
new file mode 100644
index 0000000000..3f27915208
--- /dev/null
+++ b/third-party/tiny_gltf/tiny_gltf.cpp
@@ -0,0 +1,4 @@
+#define TINYGLTF_IMPLEMENTATION
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "tiny_gltf.h"
diff --git a/third-party/tiny_gltf/tiny_gltf.h b/third-party/tiny_gltf/tiny_gltf.h
new file mode 100644
index 0000000000..98cc4e42fc
--- /dev/null
+++ b/third-party/tiny_gltf/tiny_gltf.h
@@ -0,0 +1,7748 @@
+//
+// Header-only tiny glTF 2.0 loader and serializer.
+//
+//
+// The MIT License (MIT)
+//
+// Copyright (c) 2015 - Present Syoyo Fujita, Aurélien Chatelain and many
+// contributors.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// Version:
+//  - v2.5.0 Add SetPreserveImageChannels() option to load image data as is.
+//  - v2.4.3 Fix null object output when when material has all default
+//  parameters.
+//  - v2.4.2 Decode percent-encoded URI.
+//  - v2.4.1 Fix some glTF object class does not have `extensions` and/or
+//  `extras` property.
+//  - v2.4.0 Experimental RapidJSON and C++14 support(Thanks to @jrkoone).
+//  - v2.3.1 Set default value of minFilter and magFilter in Sampler to -1.
+//  - v2.3.0 Modified Material representation according to glTF 2.0 schema
+//           (and introduced TextureInfo class)
+//           Change the behavior of `Value::IsNumber`. It return true either the
+//           value is int or real.
+//  - v2.2.0 Add loading 16bit PNG support. Add Sparse accessor support(Thanks
+//  to @Ybalrid)
+//  - v2.1.0 Add draco compression.
+//  - v2.0.1 Add comparsion feature(Thanks to @Selmar).
+//  - v2.0.0 glTF 2.0!.
+//
+// Tiny glTF loader is using following third party libraries:
+//
+//  - jsonhpp: C++ JSON library.
+//  - base64: base64 decode/encode library.
+//  - stb_image: Image loading library.
+//
+#ifndef TINY_GLTF_H_
+#define TINY_GLTF_H_
+
+#include <array>
+#include <cassert>
+#include <cmath>  // std::fabs
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <map>
+#include <string>
+#include <vector>
+
+#ifndef TINYGLTF_USE_CPP14
+#include <functional>
+#endif
+
+#ifdef __ANDROID__
+#ifdef TINYGLTF_ANDROID_LOAD_FROM_ASSETS
+#include <android/asset_manager.h>
+#endif
+#endif
+
+#ifdef __GNUC__
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ <= 8))
+#define TINYGLTF_NOEXCEPT
+#else
+#define TINYGLTF_NOEXCEPT noexcept
+#endif
+#else
+#define TINYGLTF_NOEXCEPT noexcept
+#endif
+
+#define DEFAULT_METHODS(x)             \
+  ~x() = default;                      \
+  x(const x &) = default;              \
+  x(x &&) TINYGLTF_NOEXCEPT = default; \
+  x &operator=(const x &) = default;   \
+  x &operator=(x &&) TINYGLTF_NOEXCEPT = default;
+
+namespace tinygltf {
+
+#define TINYGLTF_MODE_POINTS (0)
+#define TINYGLTF_MODE_LINE (1)
+#define TINYGLTF_MODE_LINE_LOOP (2)
+#define TINYGLTF_MODE_LINE_STRIP (3)
+#define TINYGLTF_MODE_TRIANGLES (4)
+#define TINYGLTF_MODE_TRIANGLE_STRIP (5)
+#define TINYGLTF_MODE_TRIANGLE_FAN (6)
+
+#define TINYGLTF_COMPONENT_TYPE_BYTE (5120)
+#define TINYGLTF_COMPONENT_TYPE_UNSIGNED_BYTE (5121)
+#define TINYGLTF_COMPONENT_TYPE_SHORT (5122)
+#define TINYGLTF_COMPONENT_TYPE_UNSIGNED_SHORT (5123)
+#define TINYGLTF_COMPONENT_TYPE_INT (5124)
+#define TINYGLTF_COMPONENT_TYPE_UNSIGNED_INT (5125)
+#define TINYGLTF_COMPONENT_TYPE_FLOAT (5126)
+#define TINYGLTF_COMPONENT_TYPE_DOUBLE (5130) // OpenGL double type. Note that some of glTF 2.0 validator does not support double type even the schema seems allow any value of integer: https://github.com/KhronosGroup/glTF/blob/b9884a2fd45130b4d673dd6c8a706ee21ee5c5f7/specification/2.0/schema/accessor.schema.json#L22
+
+#define TINYGLTF_TEXTURE_FILTER_NEAREST (9728)
+#define TINYGLTF_TEXTURE_FILTER_LINEAR (9729)
+#define TINYGLTF_TEXTURE_FILTER_NEAREST_MIPMAP_NEAREST (9984)
+#define TINYGLTF_TEXTURE_FILTER_LINEAR_MIPMAP_NEAREST (9985)
+#define TINYGLTF_TEXTURE_FILTER_NEAREST_MIPMAP_LINEAR (9986)
+#define TINYGLTF_TEXTURE_FILTER_LINEAR_MIPMAP_LINEAR (9987)
+
+#define TINYGLTF_TEXTURE_WRAP_REPEAT (10497)
+#define TINYGLTF_TEXTURE_WRAP_CLAMP_TO_EDGE (33071)
+#define TINYGLTF_TEXTURE_WRAP_MIRRORED_REPEAT (33648)
+
+// Redeclarations of the above for technique.parameters.
+#define TINYGLTF_PARAMETER_TYPE_BYTE (5120)
+#define TINYGLTF_PARAMETER_TYPE_UNSIGNED_BYTE (5121)
+#define TINYGLTF_PARAMETER_TYPE_SHORT (5122)
+#define TINYGLTF_PARAMETER_TYPE_UNSIGNED_SHORT (5123)
+#define TINYGLTF_PARAMETER_TYPE_INT (5124)
+#define TINYGLTF_PARAMETER_TYPE_UNSIGNED_INT (5125)
+#define TINYGLTF_PARAMETER_TYPE_FLOAT (5126)
+
+#define TINYGLTF_PARAMETER_TYPE_FLOAT_VEC2 (35664)
+#define TINYGLTF_PARAMETER_TYPE_FLOAT_VEC3 (35665)
+#define TINYGLTF_PARAMETER_TYPE_FLOAT_VEC4 (35666)
+
+#define TINYGLTF_PARAMETER_TYPE_INT_VEC2 (35667)
+#define TINYGLTF_PARAMETER_TYPE_INT_VEC3 (35668)
+#define TINYGLTF_PARAMETER_TYPE_INT_VEC4 (35669)
+
+#define TINYGLTF_PARAMETER_TYPE_BOOL (35670)
+#define TINYGLTF_PARAMETER_TYPE_BOOL_VEC2 (35671)
+#define TINYGLTF_PARAMETER_TYPE_BOOL_VEC3 (35672)
+#define TINYGLTF_PARAMETER_TYPE_BOOL_VEC4 (35673)
+
+#define TINYGLTF_PARAMETER_TYPE_FLOAT_MAT2 (35674)
+#define TINYGLTF_PARAMETER_TYPE_FLOAT_MAT3 (35675)
+#define TINYGLTF_PARAMETER_TYPE_FLOAT_MAT4 (35676)
+
+#define TINYGLTF_PARAMETER_TYPE_SAMPLER_2D (35678)
+
+// End parameter types
+
+#define TINYGLTF_TYPE_VEC2 (2)
+#define TINYGLTF_TYPE_VEC3 (3)
+#define TINYGLTF_TYPE_VEC4 (4)
+#define TINYGLTF_TYPE_MAT2 (32 + 2)
+#define TINYGLTF_TYPE_MAT3 (32 + 3)
+#define TINYGLTF_TYPE_MAT4 (32 + 4)
+#define TINYGLTF_TYPE_SCALAR (64 + 1)
+#define TINYGLTF_TYPE_VECTOR (64 + 4)
+#define TINYGLTF_TYPE_MATRIX (64 + 16)
+
+#define TINYGLTF_IMAGE_FORMAT_JPEG (0)
+#define TINYGLTF_IMAGE_FORMAT_PNG (1)
+#define TINYGLTF_IMAGE_FORMAT_BMP (2)
+#define TINYGLTF_IMAGE_FORMAT_GIF (3)
+
+#define TINYGLTF_TEXTURE_FORMAT_ALPHA (6406)
+#define TINYGLTF_TEXTURE_FORMAT_RGB (6407)
+#define TINYGLTF_TEXTURE_FORMAT_RGBA (6408)
+#define TINYGLTF_TEXTURE_FORMAT_LUMINANCE (6409)
+#define TINYGLTF_TEXTURE_FORMAT_LUMINANCE_ALPHA (6410)
+
+#define TINYGLTF_TEXTURE_TARGET_TEXTURE2D (3553)
+#define TINYGLTF_TEXTURE_TYPE_UNSIGNED_BYTE (5121)
+
+#define TINYGLTF_TARGET_ARRAY_BUFFER (34962)
+#define TINYGLTF_TARGET_ELEMENT_ARRAY_BUFFER (34963)
+
+#define TINYGLTF_SHADER_TYPE_VERTEX_SHADER (35633)
+#define TINYGLTF_SHADER_TYPE_FRAGMENT_SHADER (35632)
+
+#define TINYGLTF_DOUBLE_EPS (1.e-12)
+#define TINYGLTF_DOUBLE_EQUAL(a, b) (std::fabs((b) - (a)) < TINYGLTF_DOUBLE_EPS)
+
+#ifdef __ANDROID__
+#ifdef TINYGLTF_ANDROID_LOAD_FROM_ASSETS
+AAssetManager *asset_manager = nullptr;
+#endif
+#endif
+
+typedef enum {
+  NULL_TYPE,
+  REAL_TYPE,
+  INT_TYPE,
+  BOOL_TYPE,
+  STRING_TYPE,
+  ARRAY_TYPE,
+  BINARY_TYPE,
+  OBJECT_TYPE
+} Type;
+
+static inline int32_t GetComponentSizeInBytes(uint32_t componentType) {
+  if (componentType == TINYGLTF_COMPONENT_TYPE_BYTE) {
+    return 1;
+  } else if (componentType == TINYGLTF_COMPONENT_TYPE_UNSIGNED_BYTE) {
+    return 1;
+  } else if (componentType == TINYGLTF_COMPONENT_TYPE_SHORT) {
+    return 2;
+  } else if (componentType == TINYGLTF_COMPONENT_TYPE_UNSIGNED_SHORT) {
+    return 2;
+  } else if (componentType == TINYGLTF_COMPONENT_TYPE_INT) {
+    return 4;
+  } else if (componentType == TINYGLTF_COMPONENT_TYPE_UNSIGNED_INT) {
+    return 4;
+  } else if (componentType == TINYGLTF_COMPONENT_TYPE_FLOAT) {
+    return 4;
+  } else if (componentType == TINYGLTF_COMPONENT_TYPE_DOUBLE) {
+    return 8;
+  } else {
+    // Unknown componenty type
+    return -1;
+  }
+}
+
+static inline int32_t GetNumComponentsInType(uint32_t ty) {
+  if (ty == TINYGLTF_TYPE_SCALAR) {
+    return 1;
+  } else if (ty == TINYGLTF_TYPE_VEC2) {
+    return 2;
+  } else if (ty == TINYGLTF_TYPE_VEC3) {
+    return 3;
+  } else if (ty == TINYGLTF_TYPE_VEC4) {
+    return 4;
+  } else if (ty == TINYGLTF_TYPE_MAT2) {
+    return 4;
+  } else if (ty == TINYGLTF_TYPE_MAT3) {
+    return 9;
+  } else if (ty == TINYGLTF_TYPE_MAT4) {
+    return 16;
+  } else {
+    // Unknown componenty type
+    return -1;
+  }
+}
+
+// TODO(syoyo): Move these functions to TinyGLTF class
+bool IsDataURI(const std::string &in);
+bool DecodeDataURI(std::vector<unsigned char> *out, std::string &mime_type,
+                   const std::string &in, size_t reqBytes, bool checkSize);
+
+#ifdef __clang__
+#pragma clang diagnostic push
+// Suppress warning for : static Value null_value
+#pragma clang diagnostic ignored "-Wexit-time-destructors"
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+// Simple class to represent JSON object
+class Value {
+ public:
+  typedef std::vector<Value> Array;
+  typedef std::map<std::string, Value> Object;
+
+  Value()
+      : type_(NULL_TYPE),
+        int_value_(0),
+        real_value_(0.0),
+        boolean_value_(false) {}
+
+  explicit Value(bool b) : type_(BOOL_TYPE) { boolean_value_ = b; }
+  explicit Value(int i) : type_(INT_TYPE) {
+    int_value_ = i;
+    real_value_ = i;
+  }
+  explicit Value(double n) : type_(REAL_TYPE) { real_value_ = n; }
+  explicit Value(const std::string &s) : type_(STRING_TYPE) {
+    string_value_ = s;
+  }
+  explicit Value(std::string &&s)
+      : type_(STRING_TYPE), string_value_(std::move(s)) {}
+  explicit Value(const unsigned char *p, size_t n) : type_(BINARY_TYPE) {
+    binary_value_.resize(n);
+    memcpy(binary_value_.data(), p, n);
+  }
+  explicit Value(std::vector<unsigned char> &&v) noexcept
+      : type_(BINARY_TYPE),
+        binary_value_(std::move(v)) {}
+  explicit Value(const Array &a) : type_(ARRAY_TYPE) { array_value_ = a; }
+  explicit Value(Array &&a) noexcept : type_(ARRAY_TYPE),
+                                       array_value_(std::move(a)) {}
+
+  explicit Value(const Object &o) : type_(OBJECT_TYPE) { object_value_ = o; }
+  explicit Value(Object &&o) noexcept : type_(OBJECT_TYPE),
+                                        object_value_(std::move(o)) {}
+
+  DEFAULT_METHODS(Value)
+
+  char Type() const { return static_cast<char>(type_); }
+
+  bool IsBool() const { return (type_ == BOOL_TYPE); }
+
+  bool IsInt() const { return (type_ == INT_TYPE); }
+
+  bool IsNumber() const { return (type_ == REAL_TYPE) || (type_ == INT_TYPE); }
+
+  bool IsReal() const { return (type_ == REAL_TYPE); }
+
+  bool IsString() const { return (type_ == STRING_TYPE); }
+
+  bool IsBinary() const { return (type_ == BINARY_TYPE); }
+
+  bool IsArray() const { return (type_ == ARRAY_TYPE); }
+
+  bool IsObject() const { return (type_ == OBJECT_TYPE); }
+
+  // Use this function if you want to have number value as double.
+  double GetNumberAsDouble() const {
+    if (type_ == INT_TYPE) {
+      return double(int_value_);
+    } else {
+      return real_value_;
+    }
+  }
+
+  // Use this function if you want to have number value as int.
+  // TODO(syoyo): Support int value larger than 32 bits
+  int GetNumberAsInt() const {
+    if (type_ == REAL_TYPE) {
+      return int(real_value_);
+    } else {
+      return int_value_;
+    }
+  }
+
+  // Accessor
+  template <typename T>
+  const T &Get() const;
+  template <typename T>
+  T &Get();
+
+  // Lookup value from an array
+  const Value &Get(int idx) const {
+    static Value null_value;
+    assert(IsArray());
+    assert(idx >= 0);
+    return (static_cast<size_t>(idx) < array_value_.size())
+           ? array_value_[static_cast<size_t>(idx)]
+           : null_value;
+  }
+
+  // Lookup value from a key-value pair
+  const Value &Get(const std::string &key) const {
+    static Value null_value;
+    assert(IsObject());
+    Object::const_iterator it = object_value_.find(key);
+    return (it != object_value_.end()) ? it->second : null_value;
+  }
+
+  size_t ArrayLen() const {
+    if (!IsArray()) return 0;
+    return array_value_.size();
+  }
+
+  // Valid only for object type.
+  bool Has(const std::string &key) const {
+    if (!IsObject()) return false;
+    Object::const_iterator it = object_value_.find(key);
+    return (it != object_value_.end()) ? true : false;
+  }
+
+  // List keys
+  std::vector<std::string> Keys() const {
+    std::vector<std::string> keys;
+    if (!IsObject()) return keys;  // empty
+
+    for (Object::const_iterator it = object_value_.begin();
+         it != object_value_.end(); ++it) {
+      keys.push_back(it->first);
+    }
+
+    return keys;
+  }
+
+  size_t Size() const { return (IsArray() ? ArrayLen() : Keys().size()); }
+
+  bool operator==(const tinygltf::Value &other) const;
+
+ protected:
+  int type_ = NULL_TYPE;
+
+  int int_value_ = 0;
+  double real_value_ = 0.0;
+  std::string string_value_;
+  std::vector<unsigned char> binary_value_;
+  Array array_value_;
+  Object object_value_;
+  bool boolean_value_ = false;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#define TINYGLTF_VALUE_GET(ctype, var)            \
+  template <>                                     \
+  inline const ctype &Value::Get<ctype>() const { \
+    return var;                                   \
+  }                                               \
+  template <>                                     \
+  inline ctype &Value::Get<ctype>() {             \
+    return var;                                   \
+  }
+TINYGLTF_VALUE_GET(bool, boolean_value_)
+TINYGLTF_VALUE_GET(double, real_value_)
+TINYGLTF_VALUE_GET(int, int_value_)
+TINYGLTF_VALUE_GET(std::string, string_value_)
+TINYGLTF_VALUE_GET(std::vector<unsigned char>, binary_value_)
+TINYGLTF_VALUE_GET(Value::Array, array_value_)
+TINYGLTF_VALUE_GET(Value::Object, object_value_)
+#undef TINYGLTF_VALUE_GET
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+/// Agregate object for representing a color
+using ColorValue = std::array<double, 4>;
+
+// === legacy interface ====
+// TODO(syoyo): Deprecate `Parameter` class.
+struct Parameter {
+  bool bool_value = false;
+  bool has_number_value = false;
+  std::string string_value;
+  std::vector<double> number_array;
+  std::map<std::string, double> json_double_value;
+  double number_value = 0.0;
+
+  // context sensitive methods. depending the type of the Parameter you are
+  // accessing, these are either valid or not
+  // If this parameter represent a texture map in a material, will return the
+  // texture index
+
+  /// Return the index of a texture if this Parameter is a texture map.
+  /// Returned value is only valid if the parameter represent a texture from a
+  /// material
+  int TextureIndex() const {
+    const auto it = json_double_value.find("index");
+    if (it != std::end(json_double_value)) {
+      return int(it->second);
+    }
+    return -1;
+  }
+
+  /// Return the index of a texture coordinate set if this Parameter is a
+  /// texture map. Returned value is only valid if the parameter represent a
+  /// texture from a material
+  int TextureTexCoord() const {
+    const auto it = json_double_value.find("texCoord");
+    if (it != std::end(json_double_value)) {
+      return int(it->second);
+    }
+    // As per the spec, if texCoord is ommited, this parameter is 0
+    return 0;
+  }
+
+  /// Return the scale of a texture if this Parameter is a normal texture map.
+  /// Returned value is only valid if the parameter represent a normal texture
+  /// from a material
+  double TextureScale() const {
+    const auto it = json_double_value.find("scale");
+    if (it != std::end(json_double_value)) {
+      return it->second;
+    }
+    // As per the spec, if scale is ommited, this paramter is 1
+    return 1;
+  }
+
+  /// Return the strength of a texture if this Parameter is a an occlusion map.
+  /// Returned value is only valid if the parameter represent an occlusion map
+  /// from a material
+  double TextureStrength() const {
+    const auto it = json_double_value.find("strength");
+    if (it != std::end(json_double_value)) {
+      return it->second;
+    }
+    // As per the spec, if strenghth is ommited, this parameter is 1
+    return 1;
+  }
+
+  /// Material factor, like the roughness or metalness of a material
+  /// Returned value is only valid if the parameter represent a texture from a
+  /// material
+  double Factor() const { return number_value; }
+
+  /// Return the color of a material
+  /// Returned value is only valid if the parameter represent a texture from a
+  /// material
+  ColorValue ColorFactor() const {
+    return {
+        {// this agregate intialize the std::array object, and uses C++11 RVO.
+            number_array[0], number_array[1], number_array[2],
+            (number_array.size() > 3 ? number_array[3] : 1.0)}};
+  }
+
+  Parameter() = default;
+  DEFAULT_METHODS(Parameter)
+  bool operator==(const Parameter &) const;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wpadded"
+#endif
+
+typedef std::map<std::string, Parameter> ParameterMap;
+typedef std::map<std::string, Value> ExtensionMap;
+
+struct AnimationChannel {
+  int sampler;              // required
+  int target_node;          // required (index of the node to target)
+  std::string target_path;  // required in ["translation", "rotation", "scale",
+  // "weights"]
+  Value extras;
+  ExtensionMap extensions;
+  ExtensionMap target_extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+  std::string target_extensions_json_string;
+
+  AnimationChannel() : sampler(-1), target_node(-1) {}
+  DEFAULT_METHODS(AnimationChannel)
+  bool operator==(const AnimationChannel &) const;
+};
+
+struct AnimationSampler {
+  int input;                  // required
+  int output;                 // required
+  std::string interpolation;  // "LINEAR", "STEP","CUBICSPLINE" or user defined
+  // string. default "LINEAR"
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  AnimationSampler() : input(-1), output(-1), interpolation("LINEAR") {}
+  DEFAULT_METHODS(AnimationSampler)
+  bool operator==(const AnimationSampler &) const;
+};
+
+struct Animation {
+  std::string name;
+  std::vector<AnimationChannel> channels;
+  std::vector<AnimationSampler> samplers;
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Animation() = default;
+  DEFAULT_METHODS(Animation)
+  bool operator==(const Animation &) const;
+};
+
+struct Skin {
+  std::string name;
+  int inverseBindMatrices;  // required here but not in the spec
+  int skeleton;             // The index of the node used as a skeleton root
+  std::vector<int> joints;  // Indices of skeleton nodes
+
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Skin() {
+    inverseBindMatrices = -1;
+    skeleton = -1;
+  }
+  DEFAULT_METHODS(Skin)
+  bool operator==(const Skin &) const;
+};
+
+struct Sampler {
+  std::string name;
+  // glTF 2.0 spec does not define default value for `minFilter` and
+  // `magFilter`. Set -1 in TinyGLTF(issue #186)
+  int minFilter =
+      -1;  // optional. -1 = no filter defined. ["NEAREST", "LINEAR",
+  // "NEAREST_MIPMAP_NEAREST", "LINEAR_MIPMAP_NEAREST",
+  // "NEAREST_MIPMAP_LINEAR", "LINEAR_MIPMAP_LINEAR"]
+  int magFilter =
+      -1;  // optional. -1 = no filter defined. ["NEAREST", "LINEAR"]
+  int wrapS =
+      TINYGLTF_TEXTURE_WRAP_REPEAT;  // ["CLAMP_TO_EDGE", "MIRRORED_REPEAT",
+  // "REPEAT"], default "REPEAT"
+  int wrapT =
+      TINYGLTF_TEXTURE_WRAP_REPEAT;  // ["CLAMP_TO_EDGE", "MIRRORED_REPEAT",
+  // "REPEAT"], default "REPEAT"
+  //int wrapR = TINYGLTF_TEXTURE_WRAP_REPEAT;  // TinyGLTF extension. currently not used.
+
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Sampler()
+      : minFilter(-1),
+        magFilter(-1),
+        wrapS(TINYGLTF_TEXTURE_WRAP_REPEAT),
+        wrapT(TINYGLTF_TEXTURE_WRAP_REPEAT) {}
+  DEFAULT_METHODS(Sampler)
+  bool operator==(const Sampler &) const;
+};
+
+struct Image {
+  std::string name;
+  int width;
+  int height;
+  int component;
+  int bits;        // bit depth per channel. 8(byte), 16 or 32.
+  int pixel_type;  // pixel type(TINYGLTF_COMPONENT_TYPE_***). usually
+  // UBYTE(bits = 8) or USHORT(bits = 16)
+  std::vector<unsigned char> image;
+  int bufferView;        // (required if no uri)
+  std::string mimeType;  // (required if no uri) ["image/jpeg", "image/png",
+  // "image/bmp", "image/gif"]
+  std::string uri;       // (required if no mimeType) uri is not decoded(e.g.
+  // whitespace may be represented as %20)
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  // When this flag is true, data is stored to `image` in as-is format(e.g. jpeg
+  // compressed for "image/jpeg" mime) This feature is good if you use custom
+  // image loader function. (e.g. delayed decoding of images for faster glTF
+  // parsing) Default parser for Image does not provide as-is loading feature at
+  // the moment. (You can manipulate this by providing your own LoadImageData
+  // function)
+  bool as_is;
+
+  Image() : as_is(false) {
+    bufferView = -1;
+    width = -1;
+    height = -1;
+    component = -1;
+    bits = -1;
+    pixel_type = -1;
+  }
+  DEFAULT_METHODS(Image)
+
+  bool operator==(const Image &) const;
+};
+
+struct Texture {
+  std::string name;
+
+  int sampler;
+  int source;
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Texture() : sampler(-1), source(-1) {}
+  DEFAULT_METHODS(Texture)
+
+  bool operator==(const Texture &) const;
+};
+
+struct TextureInfo {
+  int index = -1;  // required.
+  int texCoord;    // The set index of texture's TEXCOORD attribute used for
+  // texture coordinate mapping.
+
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  TextureInfo() : index(-1), texCoord(0) {}
+  DEFAULT_METHODS(TextureInfo)
+  bool operator==(const TextureInfo &) const;
+};
+
+struct NormalTextureInfo {
+  int index = -1;  // required
+  int texCoord;    // The set index of texture's TEXCOORD attribute used for
+  // texture coordinate mapping.
+  double scale;    // scaledNormal = normalize((<sampled normal texture value>
+  // * 2.0 - 1.0) * vec3(<normal scale>, <normal scale>, 1.0))
+
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  NormalTextureInfo() : index(-1), texCoord(0), scale(1.0) {}
+  DEFAULT_METHODS(NormalTextureInfo)
+  bool operator==(const NormalTextureInfo &) const;
+};
+
+struct OcclusionTextureInfo {
+  int index = -1;   // required
+  int texCoord;     // The set index of texture's TEXCOORD attribute used for
+  // texture coordinate mapping.
+  double strength;  // occludedColor = lerp(color, color * <sampled occlusion
+  // texture value>, <occlusion strength>)
+
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  OcclusionTextureInfo() : index(-1), texCoord(0), strength(1.0) {}
+  DEFAULT_METHODS(OcclusionTextureInfo)
+  bool operator==(const OcclusionTextureInfo &) const;
+};
+
+// pbrMetallicRoughness class defined in glTF 2.0 spec.
+struct PbrMetallicRoughness {
+  std::vector<double> baseColorFactor;  // len = 4. default [1,1,1,1]
+  TextureInfo baseColorTexture;
+  double metallicFactor;   // default 1
+  double roughnessFactor;  // default 1
+  TextureInfo metallicRoughnessTexture;
+
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  PbrMetallicRoughness()
+      : baseColorFactor(std::vector<double>{1.0, 1.0, 1.0, 1.0}),
+        metallicFactor(1.0),
+        roughnessFactor(1.0) {}
+  DEFAULT_METHODS(PbrMetallicRoughness)
+  bool operator==(const PbrMetallicRoughness &) const;
+};
+
+// Each extension should be stored in a ParameterMap.
+// members not in the values could be included in the ParameterMap
+// to keep a single material model
+struct Material {
+  std::string name;
+
+  std::vector<double> emissiveFactor;  // length 3. default [0, 0, 0]
+  std::string alphaMode;               // default "OPAQUE"
+  double alphaCutoff;                  // default 0.5
+  bool doubleSided;                    // default false;
+
+  PbrMetallicRoughness pbrMetallicRoughness;
+
+  NormalTextureInfo normalTexture;
+  OcclusionTextureInfo occlusionTexture;
+  TextureInfo emissiveTexture;
+
+  // For backward compatibility
+  // TODO(syoyo): Remove `values` and `additionalValues` in the next release.
+  ParameterMap values;
+  ParameterMap additionalValues;
+
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Material() : alphaMode("OPAQUE"), alphaCutoff(0.5), doubleSided(false) {}
+  DEFAULT_METHODS(Material)
+
+  bool operator==(const Material &) const;
+};
+
+struct BufferView {
+  std::string name;
+  int buffer{-1};        // Required
+  size_t byteOffset{0};  // minimum 0, default 0
+  size_t byteLength{0};  // required, minimum 1. 0 = invalid
+  size_t byteStride{0};  // minimum 4, maximum 252 (multiple of 4), default 0 =
+  // understood to be tightly packed
+  int target{0};  // ["ARRAY_BUFFER", "ELEMENT_ARRAY_BUFFER"] for vertex indices
+  // or atttribs. Could be 0 for other data
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  bool dracoDecoded{false};  // Flag indicating this has been draco decoded
+
+  BufferView()
+      : buffer(-1),
+        byteOffset(0),
+        byteLength(0),
+        byteStride(0),
+        target(0),
+        dracoDecoded(false) {}
+  DEFAULT_METHODS(BufferView)
+  bool operator==(const BufferView &) const;
+};
+
+struct Accessor {
+  int bufferView;  // optional in spec but required here since sparse accessor
+  // are not supported
+  std::string name;
+  size_t byteOffset;
+  bool normalized;    // optional.
+  int componentType;  // (required) One of TINYGLTF_COMPONENT_TYPE_***
+  size_t count;       // required
+  int type;           // (required) One of TINYGLTF_TYPE_***   ..
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  std::vector<double>
+      minValues;  // optional. integer value is promoted to double
+  std::vector<double>
+      maxValues;  // optional. integer value is promoted to double
+
+  struct {
+    int count;
+    bool isSparse;
+    struct {
+      int byteOffset;
+      int bufferView;
+      int componentType;  // a TINYGLTF_COMPONENT_TYPE_ value
+    } indices;
+    struct {
+      int bufferView;
+      int byteOffset;
+    } values;
+  } sparse;
+
+  ///
+  /// Utility function to compute byteStride for a given bufferView object.
+  /// Returns -1 upon invalid glTF value or parameter configuration.
+  ///
+  int ByteStride(const BufferView &bufferViewObject) const {
+    if (bufferViewObject.byteStride == 0) {
+      // Assume data is tightly packed.
+      int componentSizeInBytes =
+          GetComponentSizeInBytes(static_cast<uint32_t>(componentType));
+      if (componentSizeInBytes <= 0) {
+        return -1;
+      }
+
+      int numComponents = GetNumComponentsInType(static_cast<uint32_t>(type));
+      if (numComponents <= 0) {
+        return -1;
+      }
+
+      return componentSizeInBytes * numComponents;
+    } else {
+      // Check if byteStride is a mulple of the size of the accessor's component
+      // type.
+      int componentSizeInBytes =
+          GetComponentSizeInBytes(static_cast<uint32_t>(componentType));
+      if (componentSizeInBytes <= 0) {
+        return -1;
+      }
+
+      if ((bufferViewObject.byteStride % uint32_t(componentSizeInBytes)) != 0) {
+        return -1;
+      }
+      return static_cast<int>(bufferViewObject.byteStride);
+    }
+
+    // unreachable return 0;
+  }
+
+  Accessor()
+      : bufferView(-1),
+        byteOffset(0),
+        normalized(false),
+        componentType(-1),
+        count(0),
+        type(-1) {
+    sparse.isSparse = false;
+  }
+  DEFAULT_METHODS(Accessor)
+  bool operator==(const tinygltf::Accessor &) const;
+};
+
+struct PerspectiveCamera {
+  double aspectRatio;  // min > 0
+  double yfov;         // required. min > 0
+  double zfar;         // min > 0
+  double znear;        // required. min > 0
+
+  PerspectiveCamera()
+      : aspectRatio(0.0),
+        yfov(0.0),
+        zfar(0.0)  // 0 = use infinite projecton matrix
+      ,
+        znear(0.0) {}
+  DEFAULT_METHODS(PerspectiveCamera)
+  bool operator==(const PerspectiveCamera &) const;
+
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+};
+
+struct OrthographicCamera {
+  double xmag;   // required. must not be zero.
+  double ymag;   // required. must not be zero.
+  double zfar;   // required. `zfar` must be greater than `znear`.
+  double znear;  // required
+
+  OrthographicCamera() : xmag(0.0), ymag(0.0), zfar(0.0), znear(0.0) {}
+  DEFAULT_METHODS(OrthographicCamera)
+  bool operator==(const OrthographicCamera &) const;
+
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+};
+
+struct Camera {
+  std::string type;  // required. "perspective" or "orthographic"
+  std::string name;
+
+  PerspectiveCamera perspective;
+  OrthographicCamera orthographic;
+
+  Camera() {}
+  DEFAULT_METHODS(Camera)
+  bool operator==(const Camera &) const;
+
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+};
+
+struct Primitive {
+  std::map<std::string, int> attributes;  // (required) A dictionary object of
+  // integer, where each integer
+  // is the index of the accessor
+  // containing an attribute.
+  int material;  // The index of the material to apply to this primitive
+  // when rendering.
+  int indices;   // The index of the accessor that contains the indices.
+  int mode;      // one of TINYGLTF_MODE_***
+  std::vector<std::map<std::string, int> > targets;  // array of morph targets,
+  // where each target is a dict with attribues in ["POSITION, "NORMAL",
+  // "TANGENT"] pointing
+  // to their corresponding accessors
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Primitive() {
+    material = -1;
+    indices = -1;
+    mode = -1;
+  }
+  DEFAULT_METHODS(Primitive)
+  bool operator==(const Primitive &) const;
+};
+
+struct Mesh {
+  std::string name;
+  std::vector<Primitive> primitives;
+  std::vector<double> weights;  // weights to be applied to the Morph Targets
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Mesh() = default;
+  DEFAULT_METHODS(Mesh)
+  bool operator==(const Mesh &) const;
+};
+
+class Node {
+ public:
+  Node() : camera(-1), skin(-1), mesh(-1) {}
+
+  DEFAULT_METHODS(Node)
+
+  bool operator==(const Node &) const;
+
+  int camera;  // the index of the camera referenced by this node
+
+  std::string name;
+  int skin;
+  int mesh;
+  std::vector<int> children;
+  std::vector<double> rotation;     // length must be 0 or 4
+  std::vector<double> scale;        // length must be 0 or 3
+  std::vector<double> translation;  // length must be 0 or 3
+  std::vector<double> matrix;       // length must be 0 or 16
+  std::vector<double> weights;  // The weights of the instantiated Morph Target
+
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+};
+
+struct Buffer {
+  std::string name;
+  std::vector<unsigned char> data;
+  std::string
+      uri;  // considered as required here but not in the spec (need to clarify)
+  // uri is not decoded(e.g. whitespace may be represented as %20)
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Buffer() = default;
+  DEFAULT_METHODS(Buffer)
+  bool operator==(const Buffer &) const;
+};
+
+struct Asset {
+  std::string version = "2.0";  // required
+  std::string generator;
+  std::string minVersion;
+  std::string copyright;
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Asset() = default;
+  DEFAULT_METHODS(Asset)
+  bool operator==(const Asset &) const;
+};
+
+struct Scene {
+  std::string name;
+  std::vector<int> nodes;
+
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+
+  Scene() = default;
+  DEFAULT_METHODS(Scene)
+  bool operator==(const Scene &) const;
+};
+
+struct SpotLight {
+  double innerConeAngle;
+  double outerConeAngle;
+
+  SpotLight() : innerConeAngle(0.0), outerConeAngle(0.7853981634) {}
+  DEFAULT_METHODS(SpotLight)
+  bool operator==(const SpotLight &) const;
+
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+};
+
+struct Light {
+  std::string name;
+  std::vector<double> color;
+  double intensity{1.0};
+  std::string type;
+  double range{0.0};  // 0.0 = inifinite
+  SpotLight spot;
+
+  Light() : intensity(1.0), range(0.0) {}
+  DEFAULT_METHODS(Light)
+
+  bool operator==(const Light &) const;
+
+  ExtensionMap extensions;
+  Value extras;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+};
+
+class Model {
+ public:
+  Model() = default;
+  DEFAULT_METHODS(Model)
+
+  bool operator==(const Model &) const;
+
+  std::vector<Accessor> accessors;
+  std::vector<Animation> animations;
+  std::vector<Buffer> buffers;
+  std::vector<BufferView> bufferViews;
+  std::vector<Material> materials;
+  std::vector<Mesh> meshes;
+  std::vector<Node> nodes;
+  std::vector<Texture> textures;
+  std::vector<Image> images;
+  std::vector<Skin> skins;
+  std::vector<Sampler> samplers;
+  std::vector<Camera> cameras;
+  std::vector<Scene> scenes;
+  std::vector<Light> lights;
+
+  int defaultScene = -1;
+  std::vector<std::string> extensionsUsed;
+  std::vector<std::string> extensionsRequired;
+
+  Asset asset;
+
+  Value extras;
+  ExtensionMap extensions;
+
+  // Filled when SetStoreOriginalJSONForExtrasAndExtensions is enabled.
+  std::string extras_json_string;
+  std::string extensions_json_string;
+};
+
+enum SectionCheck {
+  NO_REQUIRE = 0x00,
+  REQUIRE_VERSION = 0x01,
+  REQUIRE_SCENE = 0x02,
+  REQUIRE_SCENES = 0x04,
+  REQUIRE_NODES = 0x08,
+  REQUIRE_ACCESSORS = 0x10,
+  REQUIRE_BUFFERS = 0x20,
+  REQUIRE_BUFFER_VIEWS = 0x40,
+  REQUIRE_ALL = 0x7f
+};
+
+///
+/// LoadImageDataFunction type. Signature for custom image loading callbacks.
+///
+typedef bool (*LoadImageDataFunction)(Image *, const int, std::string *,
+                                      std::string *, int, int,
+                                      const unsigned char *, int,
+                                      void *user_pointer);
+
+///
+/// WriteImageDataFunction type. Signature for custom image writing callbacks.
+///
+typedef bool (*WriteImageDataFunction)(const std::string *, const std::string *,
+                                       Image *, bool, void *);
+
+#ifndef TINYGLTF_NO_STB_IMAGE
+// Declaration of default image loader callback
+bool LoadImageData(Image *image, const int image_idx, std::string *err,
+                   std::string *warn, int req_width, int req_height,
+                   const unsigned char *bytes, int size, void *);
+#endif
+
+#ifndef TINYGLTF_NO_STB_IMAGE_WRITE
+// Declaration of default image writer callback
+bool WriteImageData(const std::string *basepath, const std::string *filename,
+                    Image *image, bool embedImages, void *);
+#endif
+
+///
+/// FilExistsFunction type. Signature for custom filesystem callbacks.
+///
+typedef bool (*FileExistsFunction)(const std::string &abs_filename, void *);
+
+///
+/// ExpandFilePathFunction type. Signature for custom filesystem callbacks.
+///
+typedef std::string (*ExpandFilePathFunction)(const std::string &, void *);
+
+///
+/// ReadWholeFileFunction type. Signature for custom filesystem callbacks.
+///
+typedef bool (*ReadWholeFileFunction)(std::vector<unsigned char> *,
+                                      std::string *, const std::string &,
+                                      void *);
+
+///
+/// WriteWholeFileFunction type. Signature for custom filesystem callbacks.
+///
+typedef bool (*WriteWholeFileFunction)(std::string *, const std::string &,
+                                       const std::vector<unsigned char> &,
+                                       void *);
+
+///
+/// A structure containing all required filesystem callbacks and a pointer to
+/// their user data.
+///
+struct FsCallbacks {
+  FileExistsFunction FileExists;
+  ExpandFilePathFunction ExpandFilePath;
+  ReadWholeFileFunction ReadWholeFile;
+  WriteWholeFileFunction WriteWholeFile;
+
+  void *user_data;  // An argument that is passed to all fs callbacks
+};
+
+#ifndef TINYGLTF_NO_FS
+// Declaration of default filesystem callbacks
+
+bool FileExists(const std::string &abs_filename, void *);
+
+///
+/// Expand file path(e.g. `~` to home directory on posix, `%APPDATA%` to
+/// `C:\\Users\\tinygltf\\AppData`)
+///
+/// @param[in] filepath File path string. Assume UTF-8
+/// @param[in] userdata User data. Set to `nullptr` if you don't need it.
+///
+std::string ExpandFilePath(const std::string &filepath, void *userdata);
+
+bool ReadWholeFile(std::vector<unsigned char> *out, std::string *err,
+                   const std::string &filepath, void *);
+
+bool WriteWholeFile(std::string *err, const std::string &filepath,
+                    const std::vector<unsigned char> &contents, void *);
+#endif
+
+///
+/// glTF Parser/Serialier context.
+///
+class TinyGLTF {
+ public:
+#ifdef __clang__
+  #pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#endif
+
+  TinyGLTF() : bin_data_(nullptr), bin_size_(0), is_binary_(false) {}
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+  ~TinyGLTF() {}
+
+  ///
+  /// Loads glTF ASCII asset from a file.
+  /// Set warning message to `warn` for example it fails to load asserts.
+  /// Returns false and set error string to `err` if there's an error.
+  ///
+  bool LoadASCIIFromFile(Model *model, std::string *err, std::string *warn,
+                         const std::string &filename,
+                         unsigned int check_sections = REQUIRE_VERSION);
+
+  ///
+  /// Loads glTF ASCII asset from string(memory).
+  /// `length` = strlen(str);
+  /// Set warning message to `warn` for example it fails to load asserts.
+  /// Returns false and set error string to `err` if there's an error.
+  ///
+  bool LoadASCIIFromString(Model *model, std::string *err, std::string *warn,
+                           const char *str, const unsigned int length,
+                           const std::string &base_dir,
+                           unsigned int check_sections = REQUIRE_VERSION);
+
+  ///
+  /// Loads glTF binary asset from a file.
+  /// Set warning message to `warn` for example it fails to load asserts.
+  /// Returns false and set error string to `err` if there's an error.
+  ///
+  bool LoadBinaryFromFile(Model *model, std::string *err, std::string *warn,
+                          const std::string &filename,
+                          unsigned int check_sections = REQUIRE_VERSION);
+
+  ///
+  /// Loads glTF binary asset from memory.
+  /// `length` = strlen(str);
+  /// Set warning message to `warn` for example it fails to load asserts.
+  /// Returns false and set error string to `err` if there's an error.
+  ///
+  bool LoadBinaryFromMemory(Model *model, std::string *err, std::string *warn,
+                            const unsigned char *bytes,
+                            const unsigned int length,
+                            const std::string &base_dir = "",
+                            unsigned int check_sections = REQUIRE_VERSION);
+
+  ///
+  /// Write glTF to stream, buffers and images will be embeded
+  ///
+  bool WriteGltfSceneToStream(Model *model, std::ostream &stream,
+                              bool prettyPrint, bool writeBinary);
+
+  ///
+  /// Write glTF to file.
+  ///
+  bool WriteGltfSceneToFile(Model *model, const std::string &filename,
+                            bool embedImages, bool embedBuffers,
+                            bool prettyPrint, bool writeBinary);
+
+  ///
+  /// Set callback to use for loading image data
+  ///
+  void SetImageLoader(LoadImageDataFunction LoadImageData, void *user_data);
+
+  ///
+  /// Unset(remove) callback of loading image data
+  ///
+  void RemoveImageLoader();
+
+  ///
+  /// Set callback to use for writing image data
+  ///
+  void SetImageWriter(WriteImageDataFunction WriteImageData, void *user_data);
+
+  ///
+  /// Set callbacks to use for filesystem (fs) access and their user data
+  ///
+  void SetFsCallbacks(FsCallbacks callbacks);
+
+  ///
+  /// Set serializing default values(default = false).
+  /// When true, default values are force serialized to .glTF.
+  /// This may be helpfull if you want to serialize a full description of glTF
+  /// data.
+  ///
+  /// TODO(LTE): Supply parsing option as function arguments to
+  /// `LoadASCIIFromFile()` and others, not by a class method
+  ///
+  void SetSerializeDefaultValues(const bool enabled) {
+    serialize_default_values_ = enabled;
+  }
+
+  bool GetSerializeDefaultValues() const { return serialize_default_values_; }
+
+  ///
+  /// Store original JSON string for `extras` and `extensions`.
+  /// This feature will be useful when the user want to reconstruct custom data
+  /// structure from JSON string.
+  ///
+  void SetStoreOriginalJSONForExtrasAndExtensions(const bool enabled) {
+    store_original_json_for_extras_and_extensions_ = enabled;
+  }
+
+  bool GetStoreOriginalJSONForExtrasAndExtensions() const {
+    return store_original_json_for_extras_and_extensions_;
+  }
+
+  ///
+  /// Specify whether preserve image channales when loading images or not.
+  /// (Not effective when the user suppy their own LoadImageData callbacks)
+  ///
+  void SetPreserveImageChannels(bool onoff) {
+    preserve_image_channels_ = onoff;
+  }
+
+  bool GetPreserveImageChannels() const { return preserve_image_channels_; }
+
+ private:
+  ///
+  /// Loads glTF asset from string(memory).
+  /// `length` = strlen(str);
+  /// Set warning message to `warn` for example it fails to load asserts
+  /// Returns false and set error string to `err` if there's an error.
+  ///
+  bool LoadFromString(Model *model, std::string *err, std::string *warn,
+                      const char *str, const unsigned int length,
+                      const std::string &base_dir, unsigned int check_sections);
+
+  const unsigned char *bin_data_ = nullptr;
+  size_t bin_size_ = 0;
+  bool is_binary_ = false;
+
+  bool serialize_default_values_ = false;  ///< Serialize default values?
+
+  bool store_original_json_for_extras_and_extensions_ = false;
+
+  bool preserve_image_channels_ = false;  /// Default false(expand channels to
+  /// RGBA) for backward compatibility.
+
+  FsCallbacks fs = {
+#ifndef TINYGLTF_NO_FS
+      &tinygltf::FileExists, &tinygltf::ExpandFilePath,
+      &tinygltf::ReadWholeFile, &tinygltf::WriteWholeFile,
+
+      nullptr  // Fs callback user data
+#else
+      nullptr, nullptr, nullptr, nullptr,
+
+      nullptr  // Fs callback user data
+#endif
+  };
+
+  LoadImageDataFunction LoadImageData =
+#ifndef TINYGLTF_NO_STB_IMAGE
+      &tinygltf::LoadImageData;
+#else
+  nullptr;
+#endif
+  void *load_image_user_data_{nullptr};
+  bool user_image_loader_{false};
+
+  WriteImageDataFunction WriteImageData =
+#ifndef TINYGLTF_NO_STB_IMAGE_WRITE
+      &tinygltf::WriteImageData;
+#else
+  nullptr;
+#endif
+  void *write_image_user_data_{nullptr};
+};
+
+#ifdef __clang__
+#pragma clang diagnostic pop  // -Wpadded
+#endif
+
+}  // namespace tinygltf
+
+#endif  // TINY_GLTF_H_
+
+#if defined(TINYGLTF_IMPLEMENTATION) || defined(__INTELLISENSE__)
+#include <algorithm>
+//#include <cassert>
+#ifndef TINYGLTF_NO_FS
+#include <cstdio>
+#include <fstream>
+#endif
+#include <sstream>
+
+#ifdef __clang__
+// Disable some warnings for external files.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wfloat-equal"
+#pragma clang diagnostic ignored "-Wexit-time-destructors"
+#pragma clang diagnostic ignored "-Wconversion"
+#pragma clang diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic ignored "-Wglobal-constructors"
+#if __has_warning("-Wreserved-id-macro")
+#pragma clang diagnostic ignored "-Wreserved-id-macro"
+#endif
+#pragma clang diagnostic ignored "-Wdisabled-macro-expansion"
+#pragma clang diagnostic ignored "-Wpadded"
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#pragma clang diagnostic ignored "-Wc++98-compat-pedantic"
+#pragma clang diagnostic ignored "-Wdocumentation-unknown-command"
+#pragma clang diagnostic ignored "-Wswitch-enum"
+#pragma clang diagnostic ignored "-Wimplicit-fallthrough"
+#pragma clang diagnostic ignored "-Wweak-vtables"
+#pragma clang diagnostic ignored "-Wcovered-switch-default"
+#if __has_warning("-Wdouble-promotion")
+#pragma clang diagnostic ignored "-Wdouble-promotion"
+#endif
+#if __has_warning("-Wcomma")
+#pragma clang diagnostic ignored "-Wcomma"
+#endif
+#if __has_warning("-Wzero-as-null-pointer-constant")
+#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
+#endif
+#if __has_warning("-Wcast-qual")
+#pragma clang diagnostic ignored "-Wcast-qual"
+#endif
+#if __has_warning("-Wmissing-variable-declarations")
+#pragma clang diagnostic ignored "-Wmissing-variable-declarations"
+#endif
+#if __has_warning("-Wmissing-prototypes")
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#endif
+#if __has_warning("-Wcast-align")
+#pragma clang diagnostic ignored "-Wcast-align"
+#endif
+#if __has_warning("-Wnewline-eof")
+#pragma clang diagnostic ignored "-Wnewline-eof"
+#endif
+#if __has_warning("-Wunused-parameter")
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#endif
+#if __has_warning("-Wmismatched-tags")
+#pragma clang diagnostic ignored "-Wmismatched-tags"
+#endif
+#if __has_warning("-Wextra-semi-stmt")
+#pragma clang diagnostic ignored "-Wextra-semi-stmt"
+#endif
+#endif
+
+// Disable GCC warnigs
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+#endif  // __GNUC__
+
+#ifndef TINYGLTF_NO_INCLUDE_JSON
+#ifndef TINYGLTF_USE_RAPIDJSON
+#include "json.hpp"
+#else
+#ifndef TINYGLTF_NO_INCLUDE_RAPIDJSON
+#include "document.h"
+#include "prettywriter.h"
+#include "rapidjson.h"
+#include "stringbuffer.h"
+#include "writer.h"
+#endif
+#endif
+#endif
+
+#ifdef TINYGLTF_ENABLE_DRACO
+#include "draco/compression/decode.h"
+#include "draco/core/decoder_buffer.h"
+#endif
+
+#ifndef TINYGLTF_NO_STB_IMAGE
+#ifndef TINYGLTF_NO_INCLUDE_STB_IMAGE
+#include "stb_image.h"
+#endif
+#endif
+
+#ifndef TINYGLTF_NO_STB_IMAGE_WRITE
+#ifndef TINYGLTF_NO_INCLUDE_STB_IMAGE_WRITE
+#include "stb_image_write.h"
+#endif
+#endif
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
+#ifdef _WIN32
+
+// issue 143.
+// Define NOMINMAX to avoid min/max defines,
+// but undef it after included windows.h
+#ifndef NOMINMAX
+#define TINYGLTF_INTERNAL_NOMINMAX
+#define NOMINMAX
+#endif
+
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#define TINYGLTF_INTERNAL_WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>  // include API for expanding a file path
+
+#ifdef TINYGLTF_INTERNAL_WIN32_LEAN_AND_MEAN
+#undef WIN32_LEAN_AND_MEAN
+#endif
+
+#if defined(TINYGLTF_INTERNAL_NOMINMAX)
+#undef NOMINMAX
+#endif
+
+#if defined(__GLIBCXX__)  // mingw
+
+#include <fcntl.h>  // _O_RDONLY
+
+#include <ext/stdio_filebuf.h>  // fstream (all sorts of IO stuff) + stdio_filebuf (=streambuf)
+
+#endif
+
+#elif !defined(__ANDROID__) && !defined(__OpenBSD__)
+#include <wordexp.h>
+#endif
+
+#if defined(__sparcv9) || defined(__powerpc__)
+// Big endian
+#else
+#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
+#define TINYGLTF_LITTLE_ENDIAN 1
+#endif
+#endif
+
+namespace {
+#ifdef TINYGLTF_USE_RAPIDJSON
+
+#ifdef TINYGLTF_USE_RAPIDJSON_CRTALLOCATOR
+// This uses the RapidJSON CRTAllocator.  It is thread safe and multiple
+// documents may be active at once.
+using json =
+    rapidjson::GenericValue<rapidjson::UTF8<>, rapidjson::CrtAllocator>;
+using json_const_iterator = json::ConstMemberIterator;
+using json_const_array_iterator = json const *;
+using JsonDocument =
+    rapidjson::GenericDocument<rapidjson::UTF8<>, rapidjson::CrtAllocator>;
+rapidjson::CrtAllocator s_CrtAllocator;  // stateless and thread safe
+rapidjson::CrtAllocator &GetAllocator() { return s_CrtAllocator; }
+#else
+// This uses the default RapidJSON MemoryPoolAllocator.  It is very fast, but
+// not thread safe. Only a single JsonDocument may be active at any one time,
+// meaning only a single gltf load/save can be active any one time.
+using json = rapidjson::Value;
+using json_const_iterator = json::ConstMemberIterator;
+using json_const_array_iterator = json const *;
+rapidjson::Document *s_pActiveDocument = nullptr;
+rapidjson::Document::AllocatorType &GetAllocator() {
+  assert(s_pActiveDocument);  // Root json node must be JsonDocument type
+  return s_pActiveDocument->GetAllocator();
+}
+
+#ifdef __clang__
+#pragma clang diagnostic push
+// Suppress JsonDocument(JsonDocument &&rhs) noexcept
+#pragma clang diagnostic ignored "-Wunused-member-function"
+#endif
+
+struct JsonDocument : public rapidjson::Document {
+  JsonDocument() {
+    assert(s_pActiveDocument ==
+           nullptr);  // When using default allocator, only one document can be
+                      // active at a time, if you need multiple active at once,
+                      // define TINYGLTF_USE_RAPIDJSON_CRTALLOCATOR
+    s_pActiveDocument = this;
+  }
+  JsonDocument(const JsonDocument &) = delete;
+  JsonDocument(JsonDocument &&rhs) noexcept
+      : rapidjson::Document(std::move(rhs)) {
+    s_pActiveDocument = this;
+    rhs.isNil = true;
+  }
+  ~JsonDocument() {
+    if (!isNil) {
+      s_pActiveDocument = nullptr;
+    }
+  }
+
+ private:
+  bool isNil = false;
+};
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#endif  // TINYGLTF_USE_RAPIDJSON_CRTALLOCATOR
+
+#else
+using nlohmann::json;
+using json_const_iterator = json::const_iterator;
+using json_const_array_iterator = json_const_iterator;
+using JsonDocument = json;
+#endif
+
+void JsonParse(JsonDocument &doc, const char *str, size_t length,
+               bool throwExc = false) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  (void)throwExc;
+  doc.Parse(str, length);
+#else
+  doc = json::parse(str, str + length, nullptr, throwExc);
+#endif
+}
+}  // namespace
+
+#ifdef __APPLE__
+#include "TargetConditionals.h"
+#endif
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wc++98-compat"
+#endif
+
+namespace tinygltf {
+
+///
+/// Internal LoadImageDataOption struct.
+/// This struct is passed through `user_pointer` in LoadImageData.
+/// The struct is not passed when the user supply their own LoadImageData
+/// callbacks.
+///
+struct LoadImageDataOption {
+  // true: preserve image channels(e.g. load as RGB image if the image has RGB
+  // channels) default `false`(channels are expanded to RGBA for backward
+  // compatiblity).
+  bool preserve_channels{false};
+};
+
+// Equals function for Value, for recursivity
+static bool Equals(const tinygltf::Value &one, const tinygltf::Value &other) {
+  if (one.Type() != other.Type()) return false;
+
+  switch (one.Type()) {
+    case NULL_TYPE:
+      return true;
+    case BOOL_TYPE:
+      return one.Get<bool>() == other.Get<bool>();
+    case REAL_TYPE:
+      return TINYGLTF_DOUBLE_EQUAL(one.Get<double>(), other.Get<double>());
+    case INT_TYPE:
+      return one.Get<int>() == other.Get<int>();
+    case OBJECT_TYPE: {
+      auto oneObj = one.Get<tinygltf::Value::Object>();
+      auto otherObj = other.Get<tinygltf::Value::Object>();
+      if (oneObj.size() != otherObj.size()) return false;
+      for (auto &it : oneObj) {
+        auto otherIt = otherObj.find(it.first);
+        if (otherIt == otherObj.end()) return false;
+
+        if (!Equals(it.second, otherIt->second)) return false;
+      }
+      return true;
+    }
+    case ARRAY_TYPE: {
+      if (one.Size() != other.Size()) return false;
+      for (int i = 0; i < int(one.Size()); ++i)
+        if (!Equals(one.Get(i), other.Get(i))) return false;
+      return true;
+    }
+    case STRING_TYPE:
+      return one.Get<std::string>() == other.Get<std::string>();
+    case BINARY_TYPE:
+      return one.Get<std::vector<unsigned char> >() ==
+             other.Get<std::vector<unsigned char> >();
+    default: {
+      // unhandled type
+      return false;
+    }
+  }
+}
+
+// Equals function for std::vector<double> using TINYGLTF_DOUBLE_EPSILON
+static bool Equals(const std::vector<double> &one,
+                   const std::vector<double> &other) {
+  if (one.size() != other.size()) return false;
+  for (int i = 0; i < int(one.size()); ++i) {
+    if (!TINYGLTF_DOUBLE_EQUAL(one[size_t(i)], other[size_t(i)])) return false;
+  }
+  return true;
+}
+
+bool Accessor::operator==(const Accessor &other) const {
+  return this->bufferView == other.bufferView &&
+         this->byteOffset == other.byteOffset &&
+         this->componentType == other.componentType &&
+         this->count == other.count && this->extensions == other.extensions &&
+         this->extras == other.extras &&
+         Equals(this->maxValues, other.maxValues) &&
+         Equals(this->minValues, other.minValues) && this->name == other.name &&
+         this->normalized == other.normalized && this->type == other.type;
+}
+bool Animation::operator==(const Animation &other) const {
+  return this->channels == other.channels &&
+         this->extensions == other.extensions && this->extras == other.extras &&
+         this->name == other.name && this->samplers == other.samplers;
+}
+bool AnimationChannel::operator==(const AnimationChannel &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->target_node == other.target_node &&
+         this->target_path == other.target_path &&
+         this->sampler == other.sampler;
+}
+bool AnimationSampler::operator==(const AnimationSampler &other) const {
+  return this->extras == other.extras && this->extensions == other.extensions &&
+         this->input == other.input &&
+         this->interpolation == other.interpolation &&
+         this->output == other.output;
+}
+bool Asset::operator==(const Asset &other) const {
+  return this->copyright == other.copyright &&
+         this->extensions == other.extensions && this->extras == other.extras &&
+         this->generator == other.generator &&
+         this->minVersion == other.minVersion && this->version == other.version;
+}
+bool Buffer::operator==(const Buffer &other) const {
+  return this->data == other.data && this->extensions == other.extensions &&
+         this->extras == other.extras && this->name == other.name &&
+         this->uri == other.uri;
+}
+bool BufferView::operator==(const BufferView &other) const {
+  return this->buffer == other.buffer && this->byteLength == other.byteLength &&
+         this->byteOffset == other.byteOffset &&
+         this->byteStride == other.byteStride && this->name == other.name &&
+         this->target == other.target && this->extensions == other.extensions &&
+         this->extras == other.extras &&
+         this->dracoDecoded == other.dracoDecoded;
+}
+bool Camera::operator==(const Camera &other) const {
+  return this->name == other.name && this->extensions == other.extensions &&
+         this->extras == other.extras &&
+         this->orthographic == other.orthographic &&
+         this->perspective == other.perspective && this->type == other.type;
+}
+bool Image::operator==(const Image &other) const {
+  return this->bufferView == other.bufferView &&
+         this->component == other.component &&
+         this->extensions == other.extensions && this->extras == other.extras &&
+         this->height == other.height && this->image == other.image &&
+         this->mimeType == other.mimeType && this->name == other.name &&
+         this->uri == other.uri && this->width == other.width;
+}
+bool Light::operator==(const Light &other) const {
+  return Equals(this->color, other.color) && this->name == other.name &&
+         this->type == other.type;
+}
+bool Material::operator==(const Material &other) const {
+  return (this->pbrMetallicRoughness == other.pbrMetallicRoughness) &&
+         (this->normalTexture == other.normalTexture) &&
+         (this->occlusionTexture == other.occlusionTexture) &&
+         (this->emissiveTexture == other.emissiveTexture) &&
+         Equals(this->emissiveFactor, other.emissiveFactor) &&
+         (this->alphaMode == other.alphaMode) &&
+         TINYGLTF_DOUBLE_EQUAL(this->alphaCutoff, other.alphaCutoff) &&
+         (this->doubleSided == other.doubleSided) &&
+         (this->extensions == other.extensions) &&
+         (this->extras == other.extras) && (this->values == other.values) &&
+         (this->additionalValues == other.additionalValues) &&
+         (this->name == other.name);
+}
+bool Mesh::operator==(const Mesh &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->name == other.name && Equals(this->weights, other.weights) &&
+         this->primitives == other.primitives;
+}
+bool Model::operator==(const Model &other) const {
+  return this->accessors == other.accessors &&
+         this->animations == other.animations && this->asset == other.asset &&
+         this->buffers == other.buffers &&
+         this->bufferViews == other.bufferViews &&
+         this->cameras == other.cameras &&
+         this->defaultScene == other.defaultScene &&
+         this->extensions == other.extensions &&
+         this->extensionsRequired == other.extensionsRequired &&
+         this->extensionsUsed == other.extensionsUsed &&
+         this->extras == other.extras && this->images == other.images &&
+         this->lights == other.lights && this->materials == other.materials &&
+         this->meshes == other.meshes && this->nodes == other.nodes &&
+         this->samplers == other.samplers && this->scenes == other.scenes &&
+         this->skins == other.skins && this->textures == other.textures;
+}
+bool Node::operator==(const Node &other) const {
+  return this->camera == other.camera && this->children == other.children &&
+         this->extensions == other.extensions && this->extras == other.extras &&
+         Equals(this->matrix, other.matrix) && this->mesh == other.mesh &&
+         this->name == other.name && Equals(this->rotation, other.rotation) &&
+         Equals(this->scale, other.scale) && this->skin == other.skin &&
+         Equals(this->translation, other.translation) &&
+         Equals(this->weights, other.weights);
+}
+bool SpotLight::operator==(const SpotLight &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         TINYGLTF_DOUBLE_EQUAL(this->innerConeAngle, other.innerConeAngle) &&
+         TINYGLTF_DOUBLE_EQUAL(this->outerConeAngle, other.outerConeAngle);
+}
+bool OrthographicCamera::operator==(const OrthographicCamera &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         TINYGLTF_DOUBLE_EQUAL(this->xmag, other.xmag) &&
+         TINYGLTF_DOUBLE_EQUAL(this->ymag, other.ymag) &&
+         TINYGLTF_DOUBLE_EQUAL(this->zfar, other.zfar) &&
+         TINYGLTF_DOUBLE_EQUAL(this->znear, other.znear);
+}
+bool Parameter::operator==(const Parameter &other) const {
+  if (this->bool_value != other.bool_value ||
+      this->has_number_value != other.has_number_value)
+    return false;
+
+  if (!TINYGLTF_DOUBLE_EQUAL(this->number_value, other.number_value))
+    return false;
+
+  if (this->json_double_value.size() != other.json_double_value.size())
+    return false;
+  for (auto &it : this->json_double_value) {
+    auto otherIt = other.json_double_value.find(it.first);
+    if (otherIt == other.json_double_value.end()) return false;
+
+    if (!TINYGLTF_DOUBLE_EQUAL(it.second, otherIt->second)) return false;
+  }
+
+  if (!Equals(this->number_array, other.number_array)) return false;
+
+  if (this->string_value != other.string_value) return false;
+
+  return true;
+}
+bool PerspectiveCamera::operator==(const PerspectiveCamera &other) const {
+  return TINYGLTF_DOUBLE_EQUAL(this->aspectRatio, other.aspectRatio) &&
+         this->extensions == other.extensions && this->extras == other.extras &&
+         TINYGLTF_DOUBLE_EQUAL(this->yfov, other.yfov) &&
+         TINYGLTF_DOUBLE_EQUAL(this->zfar, other.zfar) &&
+         TINYGLTF_DOUBLE_EQUAL(this->znear, other.znear);
+}
+bool Primitive::operator==(const Primitive &other) const {
+  return this->attributes == other.attributes && this->extras == other.extras &&
+         this->indices == other.indices && this->material == other.material &&
+         this->mode == other.mode && this->targets == other.targets;
+}
+bool Sampler::operator==(const Sampler &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->magFilter == other.magFilter &&
+         this->minFilter == other.minFilter && this->name == other.name &&
+         this->wrapS == other.wrapS &&
+         this->wrapT == other.wrapT;
+
+         //this->wrapR == other.wrapR
+}
+bool Scene::operator==(const Scene &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->name == other.name && this->nodes == other.nodes;
+}
+bool Skin::operator==(const Skin &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->inverseBindMatrices == other.inverseBindMatrices &&
+         this->joints == other.joints && this->name == other.name &&
+         this->skeleton == other.skeleton;
+}
+bool Texture::operator==(const Texture &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->name == other.name && this->sampler == other.sampler &&
+         this->source == other.source;
+}
+bool TextureInfo::operator==(const TextureInfo &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->index == other.index && this->texCoord == other.texCoord;
+}
+bool NormalTextureInfo::operator==(const NormalTextureInfo &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->index == other.index && this->texCoord == other.texCoord &&
+         TINYGLTF_DOUBLE_EQUAL(this->scale, other.scale);
+}
+bool OcclusionTextureInfo::operator==(const OcclusionTextureInfo &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         this->index == other.index && this->texCoord == other.texCoord &&
+         TINYGLTF_DOUBLE_EQUAL(this->strength, other.strength);
+}
+bool PbrMetallicRoughness::operator==(const PbrMetallicRoughness &other) const {
+  return this->extensions == other.extensions && this->extras == other.extras &&
+         (this->baseColorTexture == other.baseColorTexture) &&
+         (this->metallicRoughnessTexture == other.metallicRoughnessTexture) &&
+         Equals(this->baseColorFactor, other.baseColorFactor) &&
+         TINYGLTF_DOUBLE_EQUAL(this->metallicFactor, other.metallicFactor) &&
+         TINYGLTF_DOUBLE_EQUAL(this->roughnessFactor, other.roughnessFactor);
+}
+bool Value::operator==(const Value &other) const {
+  return Equals(*this, other);
+}
+
+static void swap4(unsigned int *val) {
+#ifdef TINYGLTF_LITTLE_ENDIAN
+  (void)val;
+#else
+  unsigned int tmp = *val;
+  unsigned char *dst = reinterpret_cast<unsigned char *>(val);
+  unsigned char *src = reinterpret_cast<unsigned char *>(&tmp);
+
+  dst[0] = src[3];
+  dst[1] = src[2];
+  dst[2] = src[1];
+  dst[3] = src[0];
+#endif
+}
+
+static std::string JoinPath(const std::string &path0,
+                            const std::string &path1) {
+  if (path0.empty()) {
+    return path1;
+  } else {
+    // check '/'
+    char lastChar = *path0.rbegin();
+    if (lastChar != '/') {
+      return path0 + std::string("/") + path1;
+    } else {
+      return path0 + path1;
+    }
+  }
+}
+
+static std::string FindFile(const std::vector<std::string> &paths,
+                            const std::string &filepath, FsCallbacks *fs) {
+  if (fs == nullptr || fs->ExpandFilePath == nullptr ||
+      fs->FileExists == nullptr) {
+    // Error, fs callback[s] missing
+    return std::string();
+  }
+
+  for (size_t i = 0; i < paths.size(); i++) {
+    std::string absPath =
+        fs->ExpandFilePath(JoinPath(paths[i], filepath), fs->user_data);
+    if (fs->FileExists(absPath, fs->user_data)) {
+      return absPath;
+    }
+  }
+
+  return std::string();
+}
+
+static std::string GetFilePathExtension(const std::string &FileName) {
+  if (FileName.find_last_of(".") != std::string::npos)
+    return FileName.substr(FileName.find_last_of(".") + 1);
+  return "";
+}
+
+static std::string GetBaseDir(const std::string &filepath) {
+  if (filepath.find_last_of("/\\") != std::string::npos)
+    return filepath.substr(0, filepath.find_last_of("/\\"));
+  return "";
+}
+
+static std::string GetBaseFilename(const std::string &filepath) {
+  auto idx = filepath.find_last_of("/\\");
+  if (idx != std::string::npos)
+    return filepath.substr(idx + 1);
+  return filepath;
+}
+
+std::string base64_encode(unsigned char const *, unsigned int len);
+std::string base64_decode(std::string const &s);
+
+/*
+   base64.cpp and base64.h
+
+   Copyright (C) 2004-2008 René Nyffenegger
+
+   This source code is provided 'as-is', without any express or implied
+   warranty. In no event will the author be held liable for any damages
+   arising from the use of this software.
+
+   Permission is granted to anyone to use this software for any purpose,
+   including commercial applications, and to alter it and redistribute it
+   freely, subject to the following restrictions:
+
+   1. The origin of this source code must not be misrepresented; you must not
+      claim that you wrote the original source code. If you use this source code
+      in a product, an acknowledgment in the product documentation would be
+      appreciated but is not required.
+
+   2. Altered source versions must be plainly marked as such, and must not be
+      misrepresented as being the original source code.
+
+   3. This notice may not be removed or altered from any source distribution.
+
+   René Nyffenegger rene.nyffenegger@adp-gmbh.ch
+
+*/
+
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wsign-conversion"
+#pragma clang diagnostic ignored "-Wconversion"
+#endif
+
+static inline bool is_base64(unsigned char c) {
+  return (isalnum(c) || (c == '+') || (c == '/'));
+}
+
+std::string base64_encode(unsigned char const *bytes_to_encode,
+                          unsigned int in_len) {
+  std::string ret;
+  int i = 0;
+  int j = 0;
+  unsigned char char_array_3[3];
+  unsigned char char_array_4[4];
+
+  const char *base64_chars =
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+      "abcdefghijklmnopqrstuvwxyz"
+      "0123456789+/";
+
+  while (in_len--) {
+    char_array_3[i++] = *(bytes_to_encode++);
+    if (i == 3) {
+      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+      char_array_4[1] =
+          ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+      char_array_4[2] =
+          ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+      char_array_4[3] = char_array_3[2] & 0x3f;
+
+      for (i = 0; (i < 4); i++) ret += base64_chars[char_array_4[i]];
+      i = 0;
+    }
+  }
+
+  if (i) {
+    for (j = i; j < 3; j++) char_array_3[j] = '\0';
+
+    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+    char_array_4[1] =
+        ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+    char_array_4[2] =
+        ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+
+    for (j = 0; (j < i + 1); j++) ret += base64_chars[char_array_4[j]];
+
+    while ((i++ < 3)) ret += '=';
+  }
+
+  return ret;
+}
+
+std::string base64_decode(std::string const &encoded_string) {
+  int in_len = static_cast<int>(encoded_string.size());
+  int i = 0;
+  int j = 0;
+  int in_ = 0;
+  unsigned char char_array_4[4], char_array_3[3];
+  std::string ret;
+
+  const std::string base64_chars =
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+      "abcdefghijklmnopqrstuvwxyz"
+      "0123456789+/";
+
+  while (in_len-- && (encoded_string[in_] != '=') &&
+         is_base64(encoded_string[in_])) {
+    char_array_4[i++] = encoded_string[in_];
+    in_++;
+    if (i == 4) {
+      for (i = 0; i < 4; i++)
+        char_array_4[i] =
+            static_cast<unsigned char>(base64_chars.find(char_array_4[i]));
+
+      char_array_3[0] =
+          (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+      char_array_3[1] =
+          ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+      for (i = 0; (i < 3); i++) ret += char_array_3[i];
+      i = 0;
+    }
+  }
+
+  if (i) {
+    for (j = i; j < 4; j++) char_array_4[j] = 0;
+
+    for (j = 0; j < 4; j++)
+      char_array_4[j] =
+          static_cast<unsigned char>(base64_chars.find(char_array_4[j]));
+
+    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+    char_array_3[1] =
+        ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+    for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
+  }
+
+  return ret;
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+// https://github.com/syoyo/tinygltf/issues/228
+// TODO(syoyo): Use uriparser https://uriparser.github.io/ for stricter Uri
+// decoding?
+//
+// Uri Decoding from DLIB
+// http://dlib.net/dlib/server/server_http.cpp.html
+// --- dlib begin ------------------------------------------------------------
+// Copyright (C) 2003  Davis E. King (davis@dlib.net)
+// License: Boost Software License
+// Boost Software License - Version 1.0 - August 17th, 2003
+
+// Permission is hereby granted, free of charge, to any person or organization
+// obtaining a copy of the software and accompanying documentation covered by
+// this license (the "Software") to use, reproduce, display, distribute,
+// execute, and transmit the Software, and to prepare derivative works of the
+// Software, and to permit third-parties to whom the Software is furnished to
+// do so, all subject to the following:
+// The copyright notices in the Software and this entire statement, including
+// the above license grant, this restriction and the following disclaimer,
+// must be included in all copies of the Software, in whole or in part, and
+// all derivative works of the Software, unless such copies or derivative
+// works are solely in the form of machine-executable object code generated by
+// a source language processor.
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+// DEALINGS IN THE SOFTWARE.
+//
+namespace dlib {
+
+inline unsigned char from_hex(unsigned char ch) {
+  if (ch <= '9' && ch >= '0')
+    ch -= '0';
+  else if (ch <= 'f' && ch >= 'a')
+    ch -= 'a' - 10;
+  else if (ch <= 'F' && ch >= 'A')
+    ch -= 'A' - 10;
+  else
+    ch = 0;
+  return ch;
+}
+
+static const std::string urldecode(const std::string &str) {
+  using namespace std;
+  string result;
+  string::size_type i;
+  for (i = 0; i < str.size(); ++i) {
+    if (str[i] == '+') {
+      result += ' ';
+    } else if (str[i] == '%' && str.size() > i + 2) {
+      const unsigned char ch1 =
+          from_hex(static_cast<unsigned char>(str[i + 1]));
+      const unsigned char ch2 =
+          from_hex(static_cast<unsigned char>(str[i + 2]));
+      const unsigned char ch = static_cast<unsigned char>((ch1 << 4) | ch2);
+      result += static_cast<char>(ch);
+      i += 2;
+    } else {
+      result += str[i];
+    }
+  }
+  return result;
+}
+
+}  // namespace dlib
+// --- dlib end --------------------------------------------------------------
+
+static bool LoadExternalFile(std::vector<unsigned char> *out, std::string *err,
+                             std::string *warn, const std::string &filename,
+                             const std::string &basedir, bool required,
+                             size_t reqBytes, bool checkSize, FsCallbacks *fs) {
+  if (fs == nullptr || fs->FileExists == nullptr ||
+      fs->ExpandFilePath == nullptr || fs->ReadWholeFile == nullptr) {
+    // This is a developer error, assert() ?
+    if (err) {
+      (*err) += "FS callback[s] not set\n";
+    }
+    return false;
+  }
+
+  std::string *failMsgOut = required ? err : warn;
+
+  out->clear();
+
+  std::vector<std::string> paths;
+  paths.push_back(basedir);
+  paths.push_back(".");
+
+  std::string filepath = FindFile(paths, filename, fs);
+  if (filepath.empty() || filename.empty()) {
+    if (failMsgOut) {
+      (*failMsgOut) += "File not found : " + filename + "\n";
+    }
+    return false;
+  }
+
+  std::vector<unsigned char> buf;
+  std::string fileReadErr;
+  bool fileRead =
+      fs->ReadWholeFile(&buf, &fileReadErr, filepath, fs->user_data);
+  if (!fileRead) {
+    if (failMsgOut) {
+      (*failMsgOut) +=
+          "File read error : " + filepath + " : " + fileReadErr + "\n";
+    }
+    return false;
+  }
+
+  size_t sz = buf.size();
+  if (sz == 0) {
+    if (failMsgOut) {
+      (*failMsgOut) += "File is empty : " + filepath + "\n";
+    }
+    return false;
+  }
+
+  if (checkSize) {
+    if (reqBytes == sz) {
+      out->swap(buf);
+      return true;
+    } else {
+      std::stringstream ss;
+      ss << "File size mismatch : " << filepath << ", requestedBytes "
+         << reqBytes << ", but got " << sz << std::endl;
+      if (failMsgOut) {
+        (*failMsgOut) += ss.str();
+      }
+      return false;
+    }
+  }
+
+  out->swap(buf);
+  return true;
+}
+
+void TinyGLTF::SetImageLoader(LoadImageDataFunction func, void *user_data) {
+  LoadImageData = func;
+  load_image_user_data_ = user_data;
+  user_image_loader_ = true;
+}
+
+void TinyGLTF::RemoveImageLoader() {
+  LoadImageData =
+#ifndef TINYGLTF_NO_STB_IMAGE
+      &tinygltf::LoadImageData;
+#else
+      nullptr;
+#endif
+
+  load_image_user_data_ = nullptr;
+  user_image_loader_ = false;
+}
+
+#ifndef TINYGLTF_NO_STB_IMAGE
+bool LoadImageData(Image *image, const int image_idx, std::string *err,
+                   std::string *warn, int req_width, int req_height,
+                   const unsigned char *bytes, int size, void *user_data) {
+  (void)warn;
+
+  LoadImageDataOption option;
+  if (user_data) {
+    option = *reinterpret_cast<LoadImageDataOption *>(user_data);
+  }
+
+  int w = 0, h = 0, comp = 0, req_comp = 0;
+
+  unsigned char *data = nullptr;
+
+  // preserve_channels true: Use channels stored in the image file.
+  // false: force 32-bit textures for common Vulkan compatibility. It appears
+  // that some GPU drivers do not support 24-bit images for Vulkan
+  req_comp = option.preserve_channels ? 0 : 4;
+  int bits = 8;
+  int pixel_type = TINYGLTF_COMPONENT_TYPE_UNSIGNED_BYTE;
+
+  // It is possible that the image we want to load is a 16bit per channel image
+  // We are going to attempt to load it as 16bit per channel, and if it worked,
+  // set the image data accodingly. We are casting the returned pointer into
+  // unsigned char, because we are representing "bytes". But we are updating
+  // the Image metadata to signal that this image uses 2 bytes (16bits) per
+  // channel:
+  if (stbi_is_16_bit_from_memory(bytes, size)) {
+    data = reinterpret_cast<unsigned char *>(
+        stbi_load_16_from_memory(bytes, size, &w, &h, &comp, req_comp));
+    if (data) {
+      bits = 16;
+      pixel_type = TINYGLTF_COMPONENT_TYPE_UNSIGNED_SHORT;
+    }
+  }
+
+  // at this point, if data is still NULL, it means that the image wasn't
+  // 16bit per channel, we are going to load it as a normal 8bit per channel
+  // mage as we used to do:
+  // if image cannot be decoded, ignore parsing and keep it by its path
+  // don't break in this case
+  // FIXME we should only enter this function if the image is embedded. If
+  // image->uri references
+  // an image file, it should be left as it is. Image loading should not be
+  // mandatory (to support other formats)
+  if (!data) data = stbi_load_from_memory(bytes, size, &w, &h, &comp, req_comp);
+  if (!data) {
+    // NOTE: you can use `warn` instead of `err`
+    if (err) {
+      (*err) +=
+          "Unknown image format. STB cannot decode image data for image[" +
+          std::to_string(image_idx) + "] name = \"" + image->name + "\".\n";
+    }
+    return false;
+  }
+
+  if ((w < 1) || (h < 1)) {
+    stbi_image_free(data);
+    if (err) {
+      (*err) += "Invalid image data for image[" + std::to_string(image_idx) +
+                "] name = \"" + image->name + "\"\n";
+    }
+    return false;
+  }
+
+  if (req_width > 0) {
+    if (req_width != w) {
+      stbi_image_free(data);
+      if (err) {
+        (*err) += "Image width mismatch for image[" +
+                  std::to_string(image_idx) + "] name = \"" + image->name +
+                  "\"\n";
+      }
+      return false;
+    }
+  }
+
+  if (req_height > 0) {
+    if (req_height != h) {
+      stbi_image_free(data);
+      if (err) {
+        (*err) += "Image height mismatch. for image[" +
+                  std::to_string(image_idx) + "] name = \"" + image->name +
+                  "\"\n";
+      }
+      return false;
+    }
+  }
+
+  if (req_comp != 0) {
+    // loaded data has `req_comp` channels(components)
+    comp = req_comp;
+  }
+
+  image->width = w;
+  image->height = h;
+  image->component = comp;
+  image->bits = bits;
+  image->pixel_type = pixel_type;
+  image->image.resize(static_cast<size_t>(w * h * comp) * size_t(bits / 8));
+  std::copy(data, data + w * h * comp * (bits / 8), image->image.begin());
+  stbi_image_free(data);
+
+  return true;
+}
+#endif
+
+void TinyGLTF::SetImageWriter(WriteImageDataFunction func, void *user_data) {
+  WriteImageData = func;
+  write_image_user_data_ = user_data;
+}
+
+#ifndef TINYGLTF_NO_STB_IMAGE_WRITE
+static void WriteToMemory_stbi(void *context, void *data, int size) {
+  std::vector<unsigned char> *buffer =
+      reinterpret_cast<std::vector<unsigned char> *>(context);
+
+  unsigned char *pData = reinterpret_cast<unsigned char *>(data);
+
+  buffer->insert(buffer->end(), pData, pData + size);
+}
+
+bool WriteImageData(const std::string *basepath, const std::string *filename,
+                    Image *image, bool embedImages, void *fsPtr) {
+  const std::string ext = GetFilePathExtension(*filename);
+
+  // Write image to temporary buffer
+  std::string header;
+  std::vector<unsigned char> data;
+
+  if (ext == "png") {
+    if ((image->bits != 8) ||
+        (image->pixel_type != TINYGLTF_COMPONENT_TYPE_UNSIGNED_BYTE)) {
+      // Unsupported pixel format
+      return false;
+    }
+
+    if (!stbi_write_png_to_func(WriteToMemory_stbi, &data, image->width,
+                                image->height, image->component,
+                                &image->image[0], 0)) {
+      return false;
+    }
+    header = "data:image/png;base64,";
+  } else if (ext == "jpg") {
+    if (!stbi_write_jpg_to_func(WriteToMemory_stbi, &data, image->width,
+                                image->height, image->component,
+                                &image->image[0], 100)) {
+      return false;
+    }
+    header = "data:image/jpeg;base64,";
+  } else if (ext == "bmp") {
+    if (!stbi_write_bmp_to_func(WriteToMemory_stbi, &data, image->width,
+                                image->height, image->component,
+                                &image->image[0])) {
+      return false;
+    }
+    header = "data:image/bmp;base64,";
+  } else if (!embedImages) {
+    // Error: can't output requested format to file
+    return false;
+  }
+
+  if (embedImages) {
+    // Embed base64-encoded image into URI
+    if (data.size()) {
+      image->uri =
+          header +
+          base64_encode(&data[0], static_cast<unsigned int>(data.size()));
+    } else {
+      // Throw error?
+    }
+  } else {
+    // Write image to disc
+    FsCallbacks *fs = reinterpret_cast<FsCallbacks *>(fsPtr);
+    if ((fs != nullptr) && (fs->WriteWholeFile != nullptr)) {
+      const std::string imagefilepath = JoinPath(*basepath, *filename);
+      std::string writeError;
+      if (!fs->WriteWholeFile(&writeError, imagefilepath, data,
+                              fs->user_data)) {
+        // Could not write image file to disc; Throw error ?
+        return false;
+      }
+    } else {
+      // Throw error?
+    }
+    image->uri = *filename;
+  }
+
+  return true;
+}
+#endif
+
+void TinyGLTF::SetFsCallbacks(FsCallbacks callbacks) { fs = callbacks; }
+
+#ifdef _WIN32
+static inline std::wstring UTF8ToWchar(const std::string &str) {
+  int wstr_size =
+      MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), nullptr, 0);
+  std::wstring wstr(wstr_size, 0);
+  MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), &wstr[0],
+                      (int)wstr.size());
+  return wstr;
+}
+
+static inline std::string WcharToUTF8(const std::wstring &wstr) {
+  int str_size = WideCharToMultiByte(CP_UTF8, 0, wstr.data(), (int)wstr.size(),
+                                     nullptr, 0, NULL, NULL);
+  std::string str(str_size, 0);
+  WideCharToMultiByte(CP_UTF8, 0, wstr.data(), (int)wstr.size(), &str[0],
+                      (int)str.size(), NULL, NULL);
+  return str;
+}
+#endif
+
+#ifndef TINYGLTF_NO_FS
+// Default implementations of filesystem functions
+
+bool FileExists(const std::string &abs_filename, void *) {
+  bool ret;
+#ifdef TINYGLTF_ANDROID_LOAD_FROM_ASSETS
+  if (asset_manager) {
+    AAsset *asset = AAssetManager_open(asset_manager, abs_filename.c_str(),
+                                       AASSET_MODE_STREAMING);
+    if (!asset) {
+      return false;
+    }
+    AAsset_close(asset);
+    ret = true;
+  } else {
+    return false;
+  }
+#else
+#ifdef _WIN32
+#if defined(_MSC_VER) || defined(__GLIBCXX__)
+  FILE *fp = nullptr;
+  errno_t err = _wfopen_s(&fp, UTF8ToWchar(abs_filename).c_str(), L"rb");
+  if (err != 0) {
+    return false;
+  }
+#else
+  FILE *fp = nullptr;
+  errno_t err = fopen_s(&fp, abs_filename.c_str(), "rb");
+  if (err != 0) {
+    return false;
+  }
+#endif
+
+#else
+  FILE *fp = fopen(abs_filename.c_str(), "rb");
+#endif
+  if (fp) {
+    ret = true;
+    fclose(fp);
+  } else {
+    ret = false;
+  }
+#endif
+
+  return ret;
+}
+
+std::string ExpandFilePath(const std::string &filepath, void *) {
+#ifdef _WIN32
+  // Assume input `filepath` is encoded in UTF-8
+  std::wstring wfilepath = UTF8ToWchar(filepath);
+  DWORD wlen = ExpandEnvironmentStringsW(wfilepath.c_str(), nullptr, 0);
+  wchar_t *wstr = new wchar_t[wlen];
+  ExpandEnvironmentStringsW(wfilepath.c_str(), wstr, wlen);
+
+  std::wstring ws(wstr);
+  delete[] wstr;
+  return WcharToUTF8(ws);
+
+#else
+
+#if defined(TARGET_OS_IPHONE) || defined(TARGET_IPHONE_SIMULATOR) || \
+    defined(__ANDROID__) || defined(__EMSCRIPTEN__) || defined(__OpenBSD__)
+  // no expansion
+  std::string s = filepath;
+#else
+  std::string s;
+  wordexp_t p;
+
+  if (filepath.empty()) {
+    return "";
+  }
+
+  // Quote the string to keep any spaces in filepath intact.
+  std::string quoted_path = "\"" + filepath + "\"";
+  // char** w;
+  int ret = wordexp(quoted_path.c_str(), &p, 0);
+  if (ret) {
+    // err
+    s = filepath;
+    return s;
+  }
+
+  // Use first element only.
+  if (p.we_wordv) {
+    s = std::string(p.we_wordv[0]);
+    wordfree(&p);
+  } else {
+    s = filepath;
+  }
+
+#endif
+
+  return s;
+#endif
+}
+
+bool ReadWholeFile(std::vector<unsigned char> *out, std::string *err,
+                   const std::string &filepath, void *) {
+#ifdef TINYGLTF_ANDROID_LOAD_FROM_ASSETS
+  if (asset_manager) {
+    AAsset *asset = AAssetManager_open(asset_manager, filepath.c_str(),
+                                       AASSET_MODE_STREAMING);
+    if (!asset) {
+      if (err) {
+        (*err) += "File open error : " + filepath + "\n";
+      }
+      return false;
+    }
+    size_t size = AAsset_getLength(asset);
+    if (size == 0) {
+      if (err) {
+        (*err) += "Invalid file size : " + filepath +
+                  " (does the path point to a directory?)";
+      }
+      return false;
+    }
+    out->resize(size);
+    AAsset_read(asset, reinterpret_cast<char *>(&out->at(0)), size);
+    AAsset_close(asset);
+    return true;
+  } else {
+    if (err) {
+      (*err) += "No asset manager specified : " + filepath + "\n";
+    }
+    return false;
+  }
+#else
+#ifdef _WIN32
+#if defined(__GLIBCXX__)  // mingw
+  int file_descriptor =
+      _wopen(UTF8ToWchar(filepath).c_str(), _O_RDONLY | _O_BINARY);
+  __gnu_cxx::stdio_filebuf<char> wfile_buf(file_descriptor, std::ios_base::in);
+  std::istream f(&wfile_buf);
+#elif defined(_MSC_VER) || defined(_LIBCPP_VERSION)
+  // For libcxx, assume _LIBCPP_HAS_OPEN_WITH_WCHAR is defined to accept
+  // `wchar_t *`
+  std::ifstream f(UTF8ToWchar(filepath).c_str(), std::ifstream::binary);
+#else
+  // Unknown compiler/runtime
+  std::ifstream f(filepath.c_str(), std::ifstream::binary);
+#endif
+#else
+  std::ifstream f(filepath.c_str(), std::ifstream::binary);
+#endif
+  if (!f) {
+    if (err) {
+      (*err) += "File open error : " + filepath + "\n";
+    }
+    return false;
+  }
+
+  f.seekg(0, f.end);
+  size_t sz = static_cast<size_t>(f.tellg());
+  f.seekg(0, f.beg);
+
+  if (int64_t(sz) < 0) {
+    if (err) {
+      (*err) += "Invalid file size : " + filepath +
+                " (does the path point to a directory?)";
+    }
+    return false;
+  } else if (sz == 0) {
+    if (err) {
+      (*err) += "File is empty : " + filepath + "\n";
+    }
+    return false;
+  }
+
+  out->resize(sz);
+  f.read(reinterpret_cast<char *>(&out->at(0)),
+         static_cast<std::streamsize>(sz));
+
+  return true;
+#endif
+}
+
+bool WriteWholeFile(std::string *err, const std::string &filepath,
+                    const std::vector<unsigned char> &contents, void *) {
+#ifdef _WIN32
+#if defined(__GLIBCXX__)  // mingw
+  int file_descriptor = _wopen(UTF8ToWchar(filepath).c_str(),
+                               _O_CREAT | _O_WRONLY | _O_TRUNC | _O_BINARY);
+  __gnu_cxx::stdio_filebuf<char> wfile_buf(
+      file_descriptor, std::ios_base::out | std::ios_base::binary);
+  std::ostream f(&wfile_buf);
+#elif defined(_MSC_VER)
+  std::ofstream f(UTF8ToWchar(filepath).c_str(), std::ofstream::binary);
+#else  // clang?
+  std::ofstream f(filepath.c_str(), std::ofstream::binary);
+#endif
+#else
+  std::ofstream f(filepath.c_str(), std::ofstream::binary);
+#endif
+  if (!f) {
+    if (err) {
+      (*err) += "File open error for writing : " + filepath + "\n";
+    }
+    return false;
+  }
+
+  f.write(reinterpret_cast<const char *>(&contents.at(0)),
+          static_cast<std::streamsize>(contents.size()));
+  if (!f) {
+    if (err) {
+      (*err) += "File write error: " + filepath + "\n";
+    }
+    return false;
+  }
+
+  return true;
+}
+
+#endif  // TINYGLTF_NO_FS
+
+static std::string MimeToExt(const std::string &mimeType) {
+  if (mimeType == "image/jpeg") {
+    return "jpg";
+  } else if (mimeType == "image/png") {
+    return "png";
+  } else if (mimeType == "image/bmp") {
+    return "bmp";
+  } else if (mimeType == "image/gif") {
+    return "gif";
+  }
+
+  return "";
+}
+
+static void UpdateImageObject(Image &image, std::string &baseDir, int index,
+                              bool embedImages,
+                              WriteImageDataFunction *WriteImageData = nullptr,
+                              void *user_data = nullptr) {
+  std::string filename;
+  std::string ext;
+  // If image has uri, use it it as a filename
+  if (image.uri.size()) {
+    filename = GetBaseFilename(image.uri);
+    ext = GetFilePathExtension(filename);
+  } else if (image.bufferView != -1) {
+    // If there's no URI and the data exists in a buffer,
+    // don't change properties or write images
+  } else if (image.name.size()) {
+    ext = MimeToExt(image.mimeType);
+    // Otherwise use name as filename
+    filename = image.name + "." + ext;
+  } else {
+    ext = MimeToExt(image.mimeType);
+    // Fallback to index of image as filename
+    filename = std::to_string(index) + "." + ext;
+  }
+
+  // If callback is set, modify image data object
+  if (*WriteImageData != nullptr && !filename.empty()) {
+    std::string uri;
+    (*WriteImageData)(&baseDir, &filename, &image, embedImages, user_data);
+  }
+}
+
+bool IsDataURI(const std::string &in) {
+  std::string header = "data:application/octet-stream;base64,";
+  if (in.find(header) == 0) {
+    return true;
+  }
+
+  header = "data:image/jpeg;base64,";
+  if (in.find(header) == 0) {
+    return true;
+  }
+
+  header = "data:image/png;base64,";
+  if (in.find(header) == 0) {
+    return true;
+  }
+
+  header = "data:image/bmp;base64,";
+  if (in.find(header) == 0) {
+    return true;
+  }
+
+  header = "data:image/gif;base64,";
+  if (in.find(header) == 0) {
+    return true;
+  }
+
+  header = "data:text/plain;base64,";
+  if (in.find(header) == 0) {
+    return true;
+  }
+
+  header = "data:application/gltf-buffer;base64,";
+  if (in.find(header) == 0) {
+    return true;
+  }
+
+  return false;
+}
+
+bool DecodeDataURI(std::vector<unsigned char> *out, std::string &mime_type,
+                   const std::string &in, size_t reqBytes, bool checkSize) {
+  std::string header = "data:application/octet-stream;base64,";
+  std::string data;
+  if (in.find(header) == 0) {
+    data = base64_decode(in.substr(header.size()));  // cut mime string.
+  }
+
+  if (data.empty()) {
+    header = "data:image/jpeg;base64,";
+    if (in.find(header) == 0) {
+      mime_type = "image/jpeg";
+      data = base64_decode(in.substr(header.size()));  // cut mime string.
+    }
+  }
+
+  if (data.empty()) {
+    header = "data:image/png;base64,";
+    if (in.find(header) == 0) {
+      mime_type = "image/png";
+      data = base64_decode(in.substr(header.size()));  // cut mime string.
+    }
+  }
+
+  if (data.empty()) {
+    header = "data:image/bmp;base64,";
+    if (in.find(header) == 0) {
+      mime_type = "image/bmp";
+      data = base64_decode(in.substr(header.size()));  // cut mime string.
+    }
+  }
+
+  if (data.empty()) {
+    header = "data:image/gif;base64,";
+    if (in.find(header) == 0) {
+      mime_type = "image/gif";
+      data = base64_decode(in.substr(header.size()));  // cut mime string.
+    }
+  }
+
+  if (data.empty()) {
+    header = "data:text/plain;base64,";
+    if (in.find(header) == 0) {
+      mime_type = "text/plain";
+      data = base64_decode(in.substr(header.size()));
+    }
+  }
+
+  if (data.empty()) {
+    header = "data:application/gltf-buffer;base64,";
+    if (in.find(header) == 0) {
+      data = base64_decode(in.substr(header.size()));
+    }
+  }
+
+  // TODO(syoyo): Allow empty buffer? #229
+  if (data.empty()) {
+    return false;
+  }
+
+  if (checkSize) {
+    if (data.size() != reqBytes) {
+      return false;
+    }
+    out->resize(reqBytes);
+  } else {
+    out->resize(data.size());
+  }
+  std::copy(data.begin(), data.end(), out->begin());
+  return true;
+}
+
+namespace {
+bool GetInt(const json &o, int &val) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  if (!o.IsDouble()) {
+    if (o.IsInt()) {
+      val = o.GetInt();
+      return true;
+    } else if (o.IsUint()) {
+      val = static_cast<int>(o.GetUint());
+      return true;
+    } else if (o.IsInt64()) {
+      val = static_cast<int>(o.GetInt64());
+      return true;
+    } else if (o.IsUint64()) {
+      val = static_cast<int>(o.GetUint64());
+      return true;
+    }
+  }
+
+  return false;
+#else
+  auto type = o.type();
+
+  if ((type == json::value_t::number_integer) ||
+      (type == json::value_t::number_unsigned)) {
+    val = static_cast<int>(o.get<int64_t>());
+    return true;
+  }
+
+  return false;
+#endif
+}
+
+#ifdef TINYGLTF_USE_RAPIDJSON
+bool GetDouble(const json &o, double &val) {
+  if (o.IsDouble()) {
+    val = o.GetDouble();
+    return true;
+  }
+
+  return false;
+}
+#endif
+
+bool GetNumber(const json &o, double &val) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  if (o.IsNumber()) {
+    val = o.GetDouble();
+    return true;
+  }
+
+  return false;
+#else
+  if (o.is_number()) {
+    val = o.get<double>();
+    return true;
+  }
+
+  return false;
+#endif
+}
+
+bool GetString(const json &o, std::string &val) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  if (o.IsString()) {
+    val = o.GetString();
+    return true;
+  }
+
+  return false;
+#else
+  if (o.type() == json::value_t::string) {
+    val = o.get<std::string>();
+    return true;
+  }
+
+  return false;
+#endif
+}
+
+bool IsArray(const json &o) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return o.IsArray();
+#else
+  return o.is_array();
+#endif
+}
+
+json_const_array_iterator ArrayBegin(const json &o) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return o.Begin();
+#else
+  return o.begin();
+#endif
+}
+
+json_const_array_iterator ArrayEnd(const json &o) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return o.End();
+#else
+  return o.end();
+#endif
+}
+
+bool IsObject(const json &o) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return o.IsObject();
+#else
+  return o.is_object();
+#endif
+}
+
+json_const_iterator ObjectBegin(const json &o) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return o.MemberBegin();
+#else
+  return o.begin();
+#endif
+}
+
+json_const_iterator ObjectEnd(const json &o) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return o.MemberEnd();
+#else
+  return o.end();
+#endif
+}
+
+// Making this a const char* results in a pointer to a temporary when
+// TINYGLTF_USE_RAPIDJSON is off.
+std::string GetKey(json_const_iterator &it) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return it->name.GetString();
+#else
+  return it.key().c_str();
+#endif
+}
+
+bool FindMember(const json &o, const char *member, json_const_iterator &it) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  if (!o.IsObject()) {
+    return false;
+  }
+  it = o.FindMember(member);
+  return it != o.MemberEnd();
+#else
+  it = o.find(member);
+  return it != o.end();
+#endif
+}
+
+const json &GetValue(json_const_iterator &it) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return it->value;
+#else
+  return it.value();
+#endif
+}
+
+std::string JsonToString(const json &o, int spacing = -1) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  using namespace rapidjson;
+  StringBuffer buffer;
+  if (spacing == -1) {
+    Writer<StringBuffer> writer(buffer);
+    o.Accept(writer);
+  } else {
+    PrettyWriter<StringBuffer> writer(buffer);
+    writer.SetIndent(' ', uint32_t(spacing));
+    o.Accept(writer);
+  }
+  return buffer.GetString();
+#else
+  return o.dump(spacing);
+#endif
+}
+
+}  // namespace
+
+static bool ParseJsonAsValue(Value *ret, const json &o) {
+  Value val{};
+#ifdef TINYGLTF_USE_RAPIDJSON
+  using rapidjson::Type;
+  switch (o.GetType()) {
+    case Type::kObjectType: {
+      Value::Object value_object;
+      for (auto it = o.MemberBegin(); it != o.MemberEnd(); ++it) {
+        Value entry;
+        ParseJsonAsValue(&entry, it->value);
+        if (entry.Type() != NULL_TYPE)
+          value_object.emplace(GetKey(it), std::move(entry));
+      }
+      if (value_object.size() > 0) val = Value(std::move(value_object));
+    } break;
+    case Type::kArrayType: {
+      Value::Array value_array;
+      value_array.reserve(o.Size());
+      for (auto it = o.Begin(); it != o.End(); ++it) {
+        Value entry;
+        ParseJsonAsValue(&entry, *it);
+        if (entry.Type() != NULL_TYPE)
+          value_array.emplace_back(std::move(entry));
+      }
+      if (value_array.size() > 0) val = Value(std::move(value_array));
+    } break;
+    case Type::kStringType:
+      val = Value(std::string(o.GetString()));
+      break;
+    case Type::kFalseType:
+    case Type::kTrueType:
+      val = Value(o.GetBool());
+      break;
+    case Type::kNumberType:
+      if (!o.IsDouble()) {
+        int i = 0;
+        GetInt(o, i);
+        val = Value(i);
+      } else {
+        double d = 0.0;
+        GetDouble(o, d);
+        val = Value(d);
+      }
+      break;
+    case Type::kNullType:
+      break;
+      // all types are covered, so no `case default`
+  }
+#else
+  switch (o.type()) {
+    case json::value_t::object: {
+      Value::Object value_object;
+      for (auto it = o.begin(); it != o.end(); it++) {
+        Value entry;
+        ParseJsonAsValue(&entry, it.value());
+        if (entry.Type() != NULL_TYPE)
+          value_object.emplace(it.key(), std::move(entry));
+      }
+      if (value_object.size() > 0) val = Value(std::move(value_object));
+    } break;
+    case json::value_t::array: {
+      Value::Array value_array;
+      value_array.reserve(o.size());
+      for (auto it = o.begin(); it != o.end(); it++) {
+        Value entry;
+        ParseJsonAsValue(&entry, it.value());
+        if (entry.Type() != NULL_TYPE)
+          value_array.emplace_back(std::move(entry));
+      }
+      if (value_array.size() > 0) val = Value(std::move(value_array));
+    } break;
+    case json::value_t::string:
+      val = Value(o.get<std::string>());
+      break;
+    case json::value_t::boolean:
+      val = Value(o.get<bool>());
+      break;
+    case json::value_t::number_integer:
+    case json::value_t::number_unsigned:
+      val = Value(static_cast<int>(o.get<int64_t>()));
+      break;
+    case json::value_t::number_float:
+      val = Value(o.get<double>());
+      break;
+    case json::value_t::null:
+    case json::value_t::discarded:
+    case json::value_t::binary:
+      // default:
+      break;
+  }
+#endif
+  if (ret) *ret = std::move(val);
+
+  return val.Type() != NULL_TYPE;
+}
+
+static bool ParseExtrasProperty(Value *ret, const json &o) {
+  json_const_iterator it;
+  if (!FindMember(o, "extras", it)) {
+    return false;
+  }
+
+  return ParseJsonAsValue(ret, GetValue(it));
+}
+
+static bool ParseBooleanProperty(bool *ret, std::string *err, const json &o,
+                                 const std::string &property,
+                                 const bool required,
+                                 const std::string &parent_node = "") {
+  json_const_iterator it;
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is missing";
+        if (!parent_node.empty()) {
+          (*err) += " in " + parent_node;
+        }
+        (*err) += ".\n";
+      }
+    }
+    return false;
+  }
+
+  auto &value = GetValue(it);
+
+  bool isBoolean;
+  bool boolValue = false;
+#ifdef TINYGLTF_USE_RAPIDJSON
+  isBoolean = value.IsBool();
+  if (isBoolean) {
+    boolValue = value.GetBool();
+  }
+#else
+  isBoolean = value.is_boolean();
+  if (isBoolean) {
+    boolValue = value.get<bool>();
+  }
+#endif
+  if (!isBoolean) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not a bool type.\n";
+      }
+    }
+    return false;
+  }
+
+  if (ret) {
+    (*ret) = boolValue;
+  }
+
+  return true;
+}
+
+static bool ParseIntegerProperty(int *ret, std::string *err, const json &o,
+                                 const std::string &property,
+                                 const bool required,
+                                 const std::string &parent_node = "") {
+  json_const_iterator it;
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is missing";
+        if (!parent_node.empty()) {
+          (*err) += " in " + parent_node;
+        }
+        (*err) += ".\n";
+      }
+    }
+    return false;
+  }
+
+  int intValue;
+  bool isInt = GetInt(GetValue(it), intValue);
+  if (!isInt) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not an integer type.\n";
+      }
+    }
+    return false;
+  }
+
+  if (ret) {
+    (*ret) = intValue;
+  }
+
+  return true;
+}
+
+static bool ParseUnsignedProperty(size_t *ret, std::string *err, const json &o,
+                                  const std::string &property,
+                                  const bool required,
+                                  const std::string &parent_node = "") {
+  json_const_iterator it;
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is missing";
+        if (!parent_node.empty()) {
+          (*err) += " in " + parent_node;
+        }
+        (*err) += ".\n";
+      }
+    }
+    return false;
+  }
+
+  auto &value = GetValue(it);
+
+  size_t uValue = 0;
+  bool isUValue;
+#ifdef TINYGLTF_USE_RAPIDJSON
+  isUValue = false;
+  if (value.IsUint()) {
+    uValue = value.GetUint();
+    isUValue = true;
+  } else if (value.IsUint64()) {
+    uValue = value.GetUint64();
+    isUValue = true;
+  }
+#else
+  isUValue = value.is_number_unsigned();
+  if (isUValue) {
+    uValue = value.get<size_t>();
+  }
+#endif
+  if (!isUValue) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not a positive integer.\n";
+      }
+    }
+    return false;
+  }
+
+  if (ret) {
+    (*ret) = uValue;
+  }
+
+  return true;
+}
+
+static bool ParseNumberProperty(double *ret, std::string *err, const json &o,
+                                const std::string &property,
+                                const bool required,
+                                const std::string &parent_node = "") {
+  json_const_iterator it;
+
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is missing";
+        if (!parent_node.empty()) {
+          (*err) += " in " + parent_node;
+        }
+        (*err) += ".\n";
+      }
+    }
+    return false;
+  }
+
+  double numberValue;
+  bool isNumber = GetNumber(GetValue(it), numberValue);
+
+  if (!isNumber) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not a number type.\n";
+      }
+    }
+    return false;
+  }
+
+  if (ret) {
+    (*ret) = numberValue;
+  }
+
+  return true;
+}
+
+static bool ParseNumberArrayProperty(std::vector<double> *ret, std::string *err,
+                                     const json &o, const std::string &property,
+                                     bool required,
+                                     const std::string &parent_node = "") {
+  json_const_iterator it;
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is missing";
+        if (!parent_node.empty()) {
+          (*err) += " in " + parent_node;
+        }
+        (*err) += ".\n";
+      }
+    }
+    return false;
+  }
+
+  if (!IsArray(GetValue(it))) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not an array";
+        if (!parent_node.empty()) {
+          (*err) += " in " + parent_node;
+        }
+        (*err) += ".\n";
+      }
+    }
+    return false;
+  }
+
+  ret->clear();
+  auto end = ArrayEnd(GetValue(it));
+  for (auto i = ArrayBegin(GetValue(it)); i != end; ++i) {
+    double numberValue;
+    const bool isNumber = GetNumber(*i, numberValue);
+    if (!isNumber) {
+      if (required) {
+        if (err) {
+          (*err) += "'" + property + "' property is not a number.\n";
+          if (!parent_node.empty()) {
+            (*err) += " in " + parent_node;
+          }
+          (*err) += ".\n";
+        }
+      }
+      return false;
+    }
+    ret->push_back(numberValue);
+  }
+
+  return true;
+}
+
+static bool ParseIntegerArrayProperty(std::vector<int> *ret, std::string *err,
+                                      const json &o,
+                                      const std::string &property,
+                                      bool required,
+                                      const std::string &parent_node = "") {
+  json_const_iterator it;
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is missing";
+        if (!parent_node.empty()) {
+          (*err) += " in " + parent_node;
+        }
+        (*err) += ".\n";
+      }
+    }
+    return false;
+  }
+
+  if (!IsArray(GetValue(it))) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not an array";
+        if (!parent_node.empty()) {
+          (*err) += " in " + parent_node;
+        }
+        (*err) += ".\n";
+      }
+    }
+    return false;
+  }
+
+  ret->clear();
+  auto end = ArrayEnd(GetValue(it));
+  for (auto i = ArrayBegin(GetValue(it)); i != end; ++i) {
+    int numberValue;
+    bool isNumber = GetInt(*i, numberValue);
+    if (!isNumber) {
+      if (required) {
+        if (err) {
+          (*err) += "'" + property + "' property is not an integer type.\n";
+          if (!parent_node.empty()) {
+            (*err) += " in " + parent_node;
+          }
+          (*err) += ".\n";
+        }
+      }
+      return false;
+    }
+    ret->push_back(numberValue);
+  }
+
+  return true;
+}
+
+static bool ParseStringProperty(
+    std::string *ret, std::string *err, const json &o,
+    const std::string &property, bool required,
+    const std::string &parent_node = std::string()) {
+  json_const_iterator it;
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is missing";
+        if (parent_node.empty()) {
+          (*err) += ".\n";
+        } else {
+          (*err) += " in `" + parent_node + "'.\n";
+        }
+      }
+    }
+    return false;
+  }
+
+  std::string strValue;
+  if (!GetString(GetValue(it), strValue)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not a string type.\n";
+      }
+    }
+    return false;
+  }
+
+  if (ret) {
+    (*ret) = std::move(strValue);
+  }
+
+  return true;
+}
+
+static bool ParseStringIntegerProperty(std::map<std::string, int> *ret,
+                                       std::string *err, const json &o,
+                                       const std::string &property,
+                                       bool required,
+                                       const std::string &parent = "") {
+  json_const_iterator it;
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        if (!parent.empty()) {
+          (*err) +=
+              "'" + property + "' property is missing in " + parent + ".\n";
+        } else {
+          (*err) += "'" + property + "' property is missing.\n";
+        }
+      }
+    }
+    return false;
+  }
+
+  const json &dict = GetValue(it);
+
+  // Make sure we are dealing with an object / dictionary.
+  if (!IsObject(dict)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not an object.\n";
+      }
+    }
+    return false;
+  }
+
+  ret->clear();
+
+  json_const_iterator dictIt(ObjectBegin(dict));
+  json_const_iterator dictItEnd(ObjectEnd(dict));
+
+  for (; dictIt != dictItEnd; ++dictIt) {
+    int intVal;
+    if (!GetInt(GetValue(dictIt), intVal)) {
+      if (required) {
+        if (err) {
+          (*err) += "'" + property + "' value is not an integer type.\n";
+        }
+      }
+      return false;
+    }
+
+    // Insert into the list.
+    (*ret)[GetKey(dictIt)] = intVal;
+  }
+  return true;
+}
+
+static bool ParseJSONProperty(std::map<std::string, double> *ret,
+                              std::string *err, const json &o,
+                              const std::string &property, bool required) {
+  json_const_iterator it;
+  if (!FindMember(o, property.c_str(), it)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is missing. \n'";
+      }
+    }
+    return false;
+  }
+
+  const json &obj = GetValue(it);
+
+  if (!IsObject(obj)) {
+    if (required) {
+      if (err) {
+        (*err) += "'" + property + "' property is not a JSON object.\n";
+      }
+    }
+    return false;
+  }
+
+  ret->clear();
+
+  json_const_iterator it2(ObjectBegin(obj));
+  json_const_iterator itEnd(ObjectEnd(obj));
+  for (; it2 != itEnd; ++it2) {
+    double numVal;
+    if (GetNumber(GetValue(it2), numVal))
+      ret->emplace(std::string(GetKey(it2)), numVal);
+  }
+
+  return true;
+}
+
+static bool ParseParameterProperty(Parameter *param, std::string *err,
+                                   const json &o, const std::string &prop,
+                                   bool required) {
+  // A parameter value can either be a string or an array of either a boolean or
+  // a number. Booleans of any kind aren't supported here. Granted, it
+  // complicates the Parameter structure and breaks it semantically in the sense
+  // that the client probably works off the assumption that if the string is
+  // empty the vector is used, etc. Would a tagged union work?
+  if (ParseStringProperty(&param->string_value, err, o, prop, false)) {
+    // Found string property.
+    return true;
+  } else if (ParseNumberArrayProperty(&param->number_array, err, o, prop,
+                                      false)) {
+    // Found a number array.
+    return true;
+  } else if (ParseNumberProperty(&param->number_value, err, o, prop, false)) {
+    return param->has_number_value = true;
+  } else if (ParseJSONProperty(&param->json_double_value, err, o, prop,
+                               false)) {
+    return true;
+  } else if (ParseBooleanProperty(&param->bool_value, err, o, prop, false)) {
+    return true;
+  } else {
+    if (required) {
+      if (err) {
+        (*err) += "parameter must be a string or number / number array.\n";
+      }
+    }
+    return false;
+  }
+}
+
+static bool ParseExtensionsProperty(ExtensionMap *ret, std::string *err,
+                                    const json &o) {
+  (void)err;
+
+  json_const_iterator it;
+  if (!FindMember(o, "extensions", it)) {
+    return false;
+  }
+
+  auto &obj = GetValue(it);
+  if (!IsObject(obj)) {
+    return false;
+  }
+  ExtensionMap extensions;
+  json_const_iterator extIt = ObjectBegin(obj);  // it.value().begin();
+  json_const_iterator extEnd = ObjectEnd(obj);
+  for (; extIt != extEnd; ++extIt) {
+    auto &itObj = GetValue(extIt);
+    if (!IsObject(itObj)) continue;
+    std::string key(GetKey(extIt));
+    if (!ParseJsonAsValue(&extensions[key], itObj)) {
+      if (!key.empty()) {
+        // create empty object so that an extension object is still of type
+        // object
+        extensions[key] = Value{Value::Object{}};
+      }
+    }
+  }
+  if (ret) {
+    (*ret) = std::move(extensions);
+  }
+  return true;
+}
+
+static bool ParseAsset(Asset *asset, std::string *err, const json &o,
+                       bool store_original_json_for_extras_and_extensions) {
+  ParseStringProperty(&asset->version, err, o, "version", true, "Asset");
+  ParseStringProperty(&asset->generator, err, o, "generator", false, "Asset");
+  ParseStringProperty(&asset->minVersion, err, o, "minVersion", false, "Asset");
+  ParseStringProperty(&asset->copyright, err, o, "copyright", false, "Asset");
+
+  ParseExtensionsProperty(&asset->extensions, err, o);
+
+  // Unity exporter version is added as extra here
+  ParseExtrasProperty(&(asset->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        asset->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        asset->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseImage(Image *image, const int image_idx, std::string *err,
+                       std::string *warn, const json &o,
+                       bool store_original_json_for_extras_and_extensions,
+                       const std::string &basedir, FsCallbacks *fs,
+                       LoadImageDataFunction *LoadImageData = nullptr,
+                       void *load_image_user_data = nullptr) {
+  // A glTF image must either reference a bufferView or an image uri
+
+  // schema says oneOf [`bufferView`, `uri`]
+  // TODO(syoyo): Check the type of each parameters.
+  json_const_iterator it;
+  bool hasBufferView = FindMember(o, "bufferView", it);
+  bool hasURI = FindMember(o, "uri", it);
+
+  ParseStringProperty(&image->name, err, o, "name", false);
+
+  if (hasBufferView && hasURI) {
+    // Should not both defined.
+    if (err) {
+      (*err) +=
+          "Only one of `bufferView` or `uri` should be defined, but both are "
+          "defined for image[" +
+          std::to_string(image_idx) + "] name = \"" + image->name + "\"\n";
+    }
+    return false;
+  }
+
+  if (!hasBufferView && !hasURI) {
+    if (err) {
+      (*err) += "Neither required `bufferView` nor `uri` defined for image[" +
+                std::to_string(image_idx) + "] name = \"" + image->name +
+                "\"\n";
+    }
+    return false;
+  }
+
+  ParseExtensionsProperty(&image->extensions, err, o);
+  ParseExtrasProperty(&image->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator eit;
+      if (FindMember(o, "extensions", eit)) {
+        image->extensions_json_string = JsonToString(GetValue(eit));
+      }
+    }
+    {
+      json_const_iterator eit;
+      if (FindMember(o, "extras", eit)) {
+        image->extras_json_string = JsonToString(GetValue(eit));
+      }
+    }
+  }
+
+  if (hasBufferView) {
+    int bufferView = -1;
+    if (!ParseIntegerProperty(&bufferView, err, o, "bufferView", true)) {
+      if (err) {
+        (*err) += "Failed to parse `bufferView` for image[" +
+                  std::to_string(image_idx) + "] name = \"" + image->name +
+                  "\"\n";
+      }
+      return false;
+    }
+
+    std::string mime_type;
+    ParseStringProperty(&mime_type, err, o, "mimeType", false);
+
+    int width = 0;
+    ParseIntegerProperty(&width, err, o, "width", false);
+
+    int height = 0;
+    ParseIntegerProperty(&height, err, o, "height", false);
+
+    // Just only save some information here. Loading actual image data from
+    // bufferView is done after this `ParseImage` function.
+    image->bufferView = bufferView;
+    image->mimeType = mime_type;
+    image->width = width;
+    image->height = height;
+
+    return true;
+  }
+
+  // Parse URI & Load image data.
+
+  std::string uri;
+  std::string tmp_err;
+  if (!ParseStringProperty(&uri, &tmp_err, o, "uri", true)) {
+    if (err) {
+      (*err) += "Failed to parse `uri` for image[" + std::to_string(image_idx) +
+                "] name = \"" + image->name + "\".\n";
+    }
+    return false;
+  }
+
+  std::vector<unsigned char> img;
+
+  if (IsDataURI(uri)) {
+    if (!DecodeDataURI(&img, image->mimeType, uri, 0, false)) {
+      if (err) {
+        (*err) += "Failed to decode 'uri' for image[" +
+                  std::to_string(image_idx) + "] name = [" + image->name +
+                  "]\n";
+      }
+      return false;
+    }
+  } else {
+    // Assume external file
+    // Keep texture path (for textures that cannot be decoded)
+    image->uri = uri;
+#ifdef TINYGLTF_NO_EXTERNAL_IMAGE
+    return true;
+#endif
+    std::string decoded_uri = dlib::urldecode(uri);
+    if (!LoadExternalFile(&img, err, warn, decoded_uri, basedir,
+                          /* required */ false, /* required bytes */ 0,
+                          /* checksize */ false, fs)) {
+      if (warn) {
+        (*warn) += "Failed to load external 'uri' for image[" +
+                   std::to_string(image_idx) + "] name = [" + image->name +
+                   "]\n";
+      }
+      // If the image cannot be loaded, keep uri as image->uri.
+      return true;
+    }
+
+    if (img.empty()) {
+      if (warn) {
+        (*warn) += "Image data is empty for image[" +
+                   std::to_string(image_idx) + "] name = [" + image->name +
+                   "] \n";
+      }
+      return false;
+    }
+  }
+
+  if (*LoadImageData == nullptr) {
+    if (err) {
+      (*err) += "No LoadImageData callback specified.\n";
+    }
+    return false;
+  }
+  return (*LoadImageData)(image, image_idx, err, warn, 0, 0, &img.at(0),
+                          static_cast<int>(img.size()), load_image_user_data);
+}
+
+static bool ParseTexture(Texture *texture, std::string *err, const json &o,
+                         bool store_original_json_for_extras_and_extensions,
+                         const std::string &basedir) {
+  (void)basedir;
+  int sampler = -1;
+  int source = -1;
+  ParseIntegerProperty(&sampler, err, o, "sampler", false);
+
+  ParseIntegerProperty(&source, err, o, "source", false);
+
+  texture->sampler = sampler;
+  texture->source = source;
+
+  ParseExtensionsProperty(&texture->extensions, err, o);
+  ParseExtrasProperty(&texture->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        texture->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        texture->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  ParseStringProperty(&texture->name, err, o, "name", false);
+
+  return true;
+}
+
+static bool ParseTextureInfo(
+    TextureInfo *texinfo, std::string *err, const json &o,
+    bool store_original_json_for_extras_and_extensions) {
+  if (texinfo == nullptr) {
+    return false;
+  }
+
+  if (!ParseIntegerProperty(&texinfo->index, err, o, "index",
+                            /* required */ true, "TextureInfo")) {
+    return false;
+  }
+
+  ParseIntegerProperty(&texinfo->texCoord, err, o, "texCoord", false);
+
+  ParseExtensionsProperty(&texinfo->extensions, err, o);
+  ParseExtrasProperty(&texinfo->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        texinfo->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        texinfo->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseNormalTextureInfo(
+    NormalTextureInfo *texinfo, std::string *err, const json &o,
+    bool store_original_json_for_extras_and_extensions) {
+  if (texinfo == nullptr) {
+    return false;
+  }
+
+  if (!ParseIntegerProperty(&texinfo->index, err, o, "index",
+                            /* required */ true, "NormalTextureInfo")) {
+    return false;
+  }
+
+  ParseIntegerProperty(&texinfo->texCoord, err, o, "texCoord", false);
+  ParseNumberProperty(&texinfo->scale, err, o, "scale", false);
+
+  ParseExtensionsProperty(&texinfo->extensions, err, o);
+  ParseExtrasProperty(&texinfo->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        texinfo->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        texinfo->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseOcclusionTextureInfo(
+    OcclusionTextureInfo *texinfo, std::string *err, const json &o,
+    bool store_original_json_for_extras_and_extensions) {
+  if (texinfo == nullptr) {
+    return false;
+  }
+
+  if (!ParseIntegerProperty(&texinfo->index, err, o, "index",
+                            /* required */ true, "NormalTextureInfo")) {
+    return false;
+  }
+
+  ParseIntegerProperty(&texinfo->texCoord, err, o, "texCoord", false);
+  ParseNumberProperty(&texinfo->strength, err, o, "strength", false);
+
+  ParseExtensionsProperty(&texinfo->extensions, err, o);
+  ParseExtrasProperty(&texinfo->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        texinfo->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        texinfo->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseBuffer(Buffer *buffer, std::string *err, const json &o,
+                        bool store_original_json_for_extras_and_extensions,
+                        FsCallbacks *fs, const std::string &basedir,
+                        bool is_binary = false,
+                        const unsigned char *bin_data = nullptr,
+                        size_t bin_size = 0) {
+  size_t byteLength;
+  if (!ParseUnsignedProperty(&byteLength, err, o, "byteLength", true,
+                             "Buffer")) {
+    return false;
+  }
+
+  // In glTF 2.0, uri is not mandatory anymore
+  buffer->uri.clear();
+  ParseStringProperty(&buffer->uri, err, o, "uri", false, "Buffer");
+
+  // having an empty uri for a non embedded image should not be valid
+  if (!is_binary && buffer->uri.empty()) {
+    if (err) {
+      (*err) += "'uri' is missing from non binary glTF file buffer.\n";
+    }
+  }
+
+  json_const_iterator type;
+  if (FindMember(o, "type", type)) {
+    std::string typeStr;
+    if (GetString(GetValue(type), typeStr)) {
+      if (typeStr.compare("arraybuffer") == 0) {
+        // buffer.type = "arraybuffer";
+      }
+    }
+  }
+
+  if (is_binary) {
+    // Still binary glTF accepts external dataURI.
+    if (!buffer->uri.empty()) {
+      // First try embedded data URI.
+      if (IsDataURI(buffer->uri)) {
+        std::string mime_type;
+        if (!DecodeDataURI(&buffer->data, mime_type, buffer->uri, byteLength,
+                           true)) {
+          if (err) {
+            (*err) +=
+                "Failed to decode 'uri' : " + buffer->uri + " in Buffer\n";
+          }
+          return false;
+        }
+      } else {
+        // External .bin file.
+        std::string decoded_uri = dlib::urldecode(buffer->uri);
+        if (!LoadExternalFile(&buffer->data, err, /* warn */ nullptr,
+                              decoded_uri, basedir, /* required */ true,
+                              byteLength, /* checkSize */ true, fs)) {
+          return false;
+        }
+      }
+    } else {
+      // load data from (embedded) binary data
+
+      if ((bin_size == 0) || (bin_data == nullptr)) {
+        if (err) {
+          (*err) += "Invalid binary data in `Buffer'.\n";
+        }
+        return false;
+      }
+
+      if (byteLength > bin_size) {
+        if (err) {
+          std::stringstream ss;
+          ss << "Invalid `byteLength'. Must be equal or less than binary size: "
+                "`byteLength' = "
+             << byteLength << ", binary size = " << bin_size << std::endl;
+          (*err) += ss.str();
+        }
+        return false;
+      }
+
+      // Read buffer data
+      buffer->data.resize(static_cast<size_t>(byteLength));
+      memcpy(&(buffer->data.at(0)), bin_data, static_cast<size_t>(byteLength));
+    }
+
+  } else {
+    if (IsDataURI(buffer->uri)) {
+      std::string mime_type;
+      if (!DecodeDataURI(&buffer->data, mime_type, buffer->uri, byteLength,
+                         true)) {
+        if (err) {
+          (*err) += "Failed to decode 'uri' : " + buffer->uri + " in Buffer\n";
+        }
+        return false;
+      }
+    } else {
+      // Assume external .bin file.
+      std::string decoded_uri = dlib::urldecode(buffer->uri);
+      if (!LoadExternalFile(&buffer->data, err, /* warn */ nullptr, decoded_uri,
+                            basedir, /* required */ true, byteLength,
+                            /* checkSize */ true, fs)) {
+        return false;
+      }
+    }
+  }
+
+  ParseStringProperty(&buffer->name, err, o, "name", false);
+
+  ParseExtensionsProperty(&buffer->extensions, err, o);
+  ParseExtrasProperty(&buffer->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        buffer->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        buffer->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseBufferView(
+    BufferView *bufferView, std::string *err, const json &o,
+    bool store_original_json_for_extras_and_extensions) {
+  int buffer = -1;
+  if (!ParseIntegerProperty(&buffer, err, o, "buffer", true, "BufferView")) {
+    return false;
+  }
+
+  size_t byteOffset = 0;
+  ParseUnsignedProperty(&byteOffset, err, o, "byteOffset", false);
+
+  size_t byteLength = 1;
+  if (!ParseUnsignedProperty(&byteLength, err, o, "byteLength", true,
+                             "BufferView")) {
+    return false;
+  }
+
+  size_t byteStride = 0;
+  if (!ParseUnsignedProperty(&byteStride, err, o, "byteStride", false)) {
+    // Spec says: When byteStride of referenced bufferView is not defined, it
+    // means that accessor elements are tightly packed, i.e., effective stride
+    // equals the size of the element.
+    // We cannot determine the actual byteStride until Accessor are parsed, thus
+    // set 0(= tightly packed) here(as done in OpenGL's VertexAttribPoiner)
+    byteStride = 0;
+  }
+
+  if ((byteStride > 252) || ((byteStride % 4) != 0)) {
+    if (err) {
+      std::stringstream ss;
+      ss << "Invalid `byteStride' value. `byteStride' must be the multiple of "
+            "4 : "
+         << byteStride << std::endl;
+
+      (*err) += ss.str();
+    }
+    return false;
+  }
+
+  int target = 0;
+  ParseIntegerProperty(&target, err, o, "target", false);
+  if ((target == TINYGLTF_TARGET_ARRAY_BUFFER) ||
+      (target == TINYGLTF_TARGET_ELEMENT_ARRAY_BUFFER)) {
+    // OK
+  } else {
+    target = 0;
+  }
+  bufferView->target = target;
+
+  ParseStringProperty(&bufferView->name, err, o, "name", false);
+
+  ParseExtensionsProperty(&bufferView->extensions, err, o);
+  ParseExtrasProperty(&bufferView->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        bufferView->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        bufferView->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  bufferView->buffer = buffer;
+  bufferView->byteOffset = byteOffset;
+  bufferView->byteLength = byteLength;
+  bufferView->byteStride = byteStride;
+  return true;
+}
+
+static bool ParseSparseAccessor(Accessor *accessor, std::string *err,
+                                const json &o) {
+  accessor->sparse.isSparse = true;
+
+  int count = 0;
+  if (!ParseIntegerProperty(&count, err, o, "count", true, "SparseAccessor")) {
+    return false;
+  }
+
+  json_const_iterator indices_iterator;
+  json_const_iterator values_iterator;
+  if (!FindMember(o, "indices", indices_iterator)) {
+    (*err) = "the sparse object of this accessor doesn't have indices";
+    return false;
+  }
+
+  if (!FindMember(o, "values", values_iterator)) {
+    (*err) = "the sparse object ob ths accessor doesn't have values";
+    return false;
+  }
+
+  const json &indices_obj = GetValue(indices_iterator);
+  const json &values_obj = GetValue(values_iterator);
+
+  int indices_buffer_view = 0, indices_byte_offset = 0, component_type = 0;
+  if (!ParseIntegerProperty(&indices_buffer_view, err, indices_obj, "bufferView",
+                       true, "SparseAccessor")) {
+    return false;
+  }
+  ParseIntegerProperty(&indices_byte_offset, err, indices_obj, "byteOffset",
+                       false);
+  if (!ParseIntegerProperty(&component_type, err, indices_obj, "componentType",
+                       true, "SparseAccessor")) {
+    return false;
+  }
+
+  int values_buffer_view = 0, values_byte_offset = 0;
+  if (!ParseIntegerProperty(&values_buffer_view, err, values_obj, "bufferView",
+                       true, "SparseAccessor")) {
+    return false;
+  }
+  ParseIntegerProperty(&values_byte_offset, err, values_obj, "byteOffset",
+                       false);
+
+  accessor->sparse.count = count;
+  accessor->sparse.indices.bufferView = indices_buffer_view;
+  accessor->sparse.indices.byteOffset = indices_byte_offset;
+  accessor->sparse.indices.componentType = component_type;
+  accessor->sparse.values.bufferView = values_buffer_view;
+  accessor->sparse.values.byteOffset = values_byte_offset;
+
+  return true;
+}
+
+static bool ParseAccessor(Accessor *accessor, std::string *err, const json &o,
+                          bool store_original_json_for_extras_and_extensions) {
+  int bufferView = -1;
+  ParseIntegerProperty(&bufferView, err, o, "bufferView", false, "Accessor");
+
+  size_t byteOffset = 0;
+  ParseUnsignedProperty(&byteOffset, err, o, "byteOffset", false, "Accessor");
+
+  bool normalized = false;
+  ParseBooleanProperty(&normalized, err, o, "normalized", false, "Accessor");
+
+  size_t componentType = 0;
+  if (!ParseUnsignedProperty(&componentType, err, o, "componentType", true,
+                             "Accessor")) {
+    return false;
+  }
+
+  size_t count = 0;
+  if (!ParseUnsignedProperty(&count, err, o, "count", true, "Accessor")) {
+    return false;
+  }
+
+  std::string type;
+  if (!ParseStringProperty(&type, err, o, "type", true, "Accessor")) {
+    return false;
+  }
+
+  if (type.compare("SCALAR") == 0) {
+    accessor->type = TINYGLTF_TYPE_SCALAR;
+  } else if (type.compare("VEC2") == 0) {
+    accessor->type = TINYGLTF_TYPE_VEC2;
+  } else if (type.compare("VEC3") == 0) {
+    accessor->type = TINYGLTF_TYPE_VEC3;
+  } else if (type.compare("VEC4") == 0) {
+    accessor->type = TINYGLTF_TYPE_VEC4;
+  } else if (type.compare("MAT2") == 0) {
+    accessor->type = TINYGLTF_TYPE_MAT2;
+  } else if (type.compare("MAT3") == 0) {
+    accessor->type = TINYGLTF_TYPE_MAT3;
+  } else if (type.compare("MAT4") == 0) {
+    accessor->type = TINYGLTF_TYPE_MAT4;
+  } else {
+    std::stringstream ss;
+    ss << "Unsupported `type` for accessor object. Got \"" << type << "\"\n";
+    if (err) {
+      (*err) += ss.str();
+    }
+    return false;
+  }
+
+  ParseStringProperty(&accessor->name, err, o, "name", false);
+
+  accessor->minValues.clear();
+  accessor->maxValues.clear();
+  ParseNumberArrayProperty(&accessor->minValues, err, o, "min", false,
+                           "Accessor");
+
+  ParseNumberArrayProperty(&accessor->maxValues, err, o, "max", false,
+                           "Accessor");
+
+  accessor->count = count;
+  accessor->bufferView = bufferView;
+  accessor->byteOffset = byteOffset;
+  accessor->normalized = normalized;
+  {
+    if (componentType >= TINYGLTF_COMPONENT_TYPE_BYTE &&
+        componentType <= TINYGLTF_COMPONENT_TYPE_DOUBLE) {
+      // OK
+      accessor->componentType = int(componentType);
+    } else {
+      std::stringstream ss;
+      ss << "Invalid `componentType` in accessor. Got " << componentType
+         << "\n";
+      if (err) {
+        (*err) += ss.str();
+      }
+      return false;
+    }
+  }
+
+  ParseExtensionsProperty(&(accessor->extensions), err, o);
+  ParseExtrasProperty(&(accessor->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        accessor->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        accessor->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  // check if accessor has a "sparse" object:
+  json_const_iterator iterator;
+  if (FindMember(o, "sparse", iterator)) {
+    // here this accessor has a "sparse" subobject
+    return ParseSparseAccessor(accessor, err, GetValue(iterator));
+  }
+
+  return true;
+}
+
+#ifdef TINYGLTF_ENABLE_DRACO
+
+static void DecodeIndexBuffer(draco::Mesh *mesh, size_t componentSize,
+                              std::vector<uint8_t> &outBuffer) {
+  if (componentSize == 4) {
+    assert(sizeof(mesh->face(draco::FaceIndex(0))[0]) == componentSize);
+    memcpy(outBuffer.data(), &mesh->face(draco::FaceIndex(0))[0],
+           outBuffer.size());
+  } else {
+    size_t faceStride = componentSize * 3;
+    for (draco::FaceIndex f(0); f < mesh->num_faces(); ++f) {
+      const draco::Mesh::Face &face = mesh->face(f);
+      if (componentSize == 2) {
+        uint16_t indices[3] = {(uint16_t)face[0].value(),
+                               (uint16_t)face[1].value(),
+                               (uint16_t)face[2].value()};
+        memcpy(outBuffer.data() + f.value() * faceStride, &indices[0],
+               faceStride);
+      } else {
+        uint8_t indices[3] = {(uint8_t)face[0].value(),
+                              (uint8_t)face[1].value(),
+                              (uint8_t)face[2].value()};
+        memcpy(outBuffer.data() + f.value() * faceStride, &indices[0],
+               faceStride);
+      }
+    }
+  }
+}
+
+template <typename T>
+static bool GetAttributeForAllPoints(draco::Mesh *mesh,
+                                     const draco::PointAttribute *pAttribute,
+                                     std::vector<uint8_t> &outBuffer) {
+  size_t byteOffset = 0;
+  T values[4] = {0, 0, 0, 0};
+  for (draco::PointIndex i(0); i < mesh->num_points(); ++i) {
+    const draco::AttributeValueIndex val_index = pAttribute->mapped_index(i);
+    if (!pAttribute->ConvertValue<T>(val_index, pAttribute->num_components(),
+                                     values))
+      return false;
+
+    memcpy(outBuffer.data() + byteOffset, &values[0],
+           sizeof(T) * pAttribute->num_components());
+    byteOffset += sizeof(T) * pAttribute->num_components();
+  }
+
+  return true;
+}
+
+static bool GetAttributeForAllPoints(uint32_t componentType, draco::Mesh *mesh,
+                                     const draco::PointAttribute *pAttribute,
+                                     std::vector<uint8_t> &outBuffer) {
+  bool decodeResult = false;
+  switch (componentType) {
+    case TINYGLTF_COMPONENT_TYPE_UNSIGNED_BYTE:
+      decodeResult =
+          GetAttributeForAllPoints<uint8_t>(mesh, pAttribute, outBuffer);
+      break;
+    case TINYGLTF_COMPONENT_TYPE_BYTE:
+      decodeResult =
+          GetAttributeForAllPoints<int8_t>(mesh, pAttribute, outBuffer);
+      break;
+    case TINYGLTF_COMPONENT_TYPE_UNSIGNED_SHORT:
+      decodeResult =
+          GetAttributeForAllPoints<uint16_t>(mesh, pAttribute, outBuffer);
+      break;
+    case TINYGLTF_COMPONENT_TYPE_SHORT:
+      decodeResult =
+          GetAttributeForAllPoints<int16_t>(mesh, pAttribute, outBuffer);
+      break;
+    case TINYGLTF_COMPONENT_TYPE_INT:
+      decodeResult =
+          GetAttributeForAllPoints<int32_t>(mesh, pAttribute, outBuffer);
+      break;
+    case TINYGLTF_COMPONENT_TYPE_UNSIGNED_INT:
+      decodeResult =
+          GetAttributeForAllPoints<uint32_t>(mesh, pAttribute, outBuffer);
+      break;
+    case TINYGLTF_COMPONENT_TYPE_FLOAT:
+      decodeResult =
+          GetAttributeForAllPoints<float>(mesh, pAttribute, outBuffer);
+      break;
+    case TINYGLTF_COMPONENT_TYPE_DOUBLE:
+      decodeResult =
+          GetAttributeForAllPoints<double>(mesh, pAttribute, outBuffer);
+      break;
+    default:
+      return false;
+  }
+
+  return decodeResult;
+}
+
+static bool ParseDracoExtension(Primitive *primitive, Model *model,
+                                std::string *err,
+                                const Value &dracoExtensionValue) {
+  (void)err;
+  auto bufferViewValue = dracoExtensionValue.Get("bufferView");
+  if (!bufferViewValue.IsInt()) return false;
+  auto attributesValue = dracoExtensionValue.Get("attributes");
+  if (!attributesValue.IsObject()) return false;
+
+  auto attributesObject = attributesValue.Get<Value::Object>();
+  int bufferView = bufferViewValue.Get<int>();
+
+  BufferView &view = model->bufferViews[bufferView];
+  Buffer &buffer = model->buffers[view.buffer];
+  // BufferView has already been decoded
+  if (view.dracoDecoded) return true;
+  view.dracoDecoded = true;
+
+  const char *bufferViewData =
+      reinterpret_cast<const char *>(buffer.data.data() + view.byteOffset);
+  size_t bufferViewSize = view.byteLength;
+
+  // decode draco
+  draco::DecoderBuffer decoderBuffer;
+  decoderBuffer.Init(bufferViewData, bufferViewSize);
+  draco::Decoder decoder;
+  auto decodeResult = decoder.DecodeMeshFromBuffer(&decoderBuffer);
+  if (!decodeResult.ok()) {
+    return false;
+  }
+  const std::unique_ptr<draco::Mesh> &mesh = decodeResult.value();
+
+  // create new bufferView for indices
+  if (primitive->indices >= 0) {
+    int32_t componentSize = GetComponentSizeInBytes(
+        model->accessors[primitive->indices].componentType);
+    Buffer decodedIndexBuffer;
+    decodedIndexBuffer.data.resize(mesh->num_faces() * 3 * componentSize);
+
+    DecodeIndexBuffer(mesh.get(), componentSize, decodedIndexBuffer.data);
+
+    model->buffers.emplace_back(std::move(decodedIndexBuffer));
+
+    BufferView decodedIndexBufferView;
+    decodedIndexBufferView.buffer = int(model->buffers.size() - 1);
+    decodedIndexBufferView.byteLength =
+        int(mesh->num_faces() * 3 * componentSize);
+    decodedIndexBufferView.byteOffset = 0;
+    decodedIndexBufferView.byteStride = 0;
+    decodedIndexBufferView.target = TINYGLTF_TARGET_ARRAY_BUFFER;
+    model->bufferViews.emplace_back(std::move(decodedIndexBufferView));
+
+    model->accessors[primitive->indices].bufferView =
+        int(model->bufferViews.size() - 1);
+    model->accessors[primitive->indices].count = int(mesh->num_faces() * 3);
+  }
+
+  for (const auto &attribute : attributesObject) {
+    if (!attribute.second.IsInt()) return false;
+    auto primitiveAttribute = primitive->attributes.find(attribute.first);
+    if (primitiveAttribute == primitive->attributes.end()) return false;
+
+    int dracoAttributeIndex = attribute.second.Get<int>();
+    const auto pAttribute = mesh->GetAttributeByUniqueId(dracoAttributeIndex);
+    const auto componentType =
+        model->accessors[primitiveAttribute->second].componentType;
+
+    // Create a new buffer for this decoded buffer
+    Buffer decodedBuffer;
+    size_t bufferSize = mesh->num_points() * pAttribute->num_components() *
+                        GetComponentSizeInBytes(componentType);
+    decodedBuffer.data.resize(bufferSize);
+
+    if (!GetAttributeForAllPoints(componentType, mesh.get(), pAttribute,
+                                  decodedBuffer.data))
+      return false;
+
+    model->buffers.emplace_back(std::move(decodedBuffer));
+
+    BufferView decodedBufferView;
+    decodedBufferView.buffer = int(model->buffers.size() - 1);
+    decodedBufferView.byteLength = bufferSize;
+    decodedBufferView.byteOffset = pAttribute->byte_offset();
+    decodedBufferView.byteStride = pAttribute->byte_stride();
+    decodedBufferView.target = primitive->indices >= 0
+                                   ? TINYGLTF_TARGET_ELEMENT_ARRAY_BUFFER
+                                   : TINYGLTF_TARGET_ARRAY_BUFFER;
+    model->bufferViews.emplace_back(std::move(decodedBufferView));
+
+    model->accessors[primitiveAttribute->second].bufferView =
+        int(model->bufferViews.size() - 1);
+    model->accessors[primitiveAttribute->second].count =
+        int(mesh->num_points());
+  }
+
+  return true;
+}
+#endif
+
+static bool ParsePrimitive(Primitive *primitive, Model *model, std::string *err,
+                           const json &o,
+                           bool store_original_json_for_extras_and_extensions) {
+  int material = -1;
+  ParseIntegerProperty(&material, err, o, "material", false);
+  primitive->material = material;
+
+  int mode = TINYGLTF_MODE_TRIANGLES;
+  ParseIntegerProperty(&mode, err, o, "mode", false);
+  primitive->mode = mode;  // Why only triangled were supported ?
+
+  int indices = -1;
+  ParseIntegerProperty(&indices, err, o, "indices", false);
+  primitive->indices = indices;
+  if (!ParseStringIntegerProperty(&primitive->attributes, err, o, "attributes",
+                                  true, "Primitive")) {
+    return false;
+  }
+
+  // Look for morph targets
+  json_const_iterator targetsObject;
+  if (FindMember(o, "targets", targetsObject) &&
+      IsArray(GetValue(targetsObject))) {
+    auto targetsObjectEnd = ArrayEnd(GetValue(targetsObject));
+    for (json_const_array_iterator i = ArrayBegin(GetValue(targetsObject));
+         i != targetsObjectEnd; ++i) {
+      std::map<std::string, int> targetAttribues;
+
+      const json &dict = *i;
+      if (IsObject(dict)) {
+        json_const_iterator dictIt(ObjectBegin(dict));
+        json_const_iterator dictItEnd(ObjectEnd(dict));
+
+        for (; dictIt != dictItEnd; ++dictIt) {
+          int iVal;
+          if (GetInt(GetValue(dictIt), iVal))
+            targetAttribues[GetKey(dictIt)] = iVal;
+        }
+        primitive->targets.emplace_back(std::move(targetAttribues));
+      }
+    }
+  }
+
+  ParseExtrasProperty(&(primitive->extras), o);
+  ParseExtensionsProperty(&primitive->extensions, err, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        primitive->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        primitive->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+#ifdef TINYGLTF_ENABLE_DRACO
+  auto dracoExtension =
+      primitive->extensions.find("KHR_draco_mesh_compression");
+  if (dracoExtension != primitive->extensions.end()) {
+    ParseDracoExtension(primitive, model, err, dracoExtension->second);
+  }
+#else
+  (void)model;
+#endif
+
+  return true;
+}
+
+static bool ParseMesh(Mesh *mesh, Model *model, std::string *err, const json &o,
+                      bool store_original_json_for_extras_and_extensions) {
+  ParseStringProperty(&mesh->name, err, o, "name", false);
+
+  mesh->primitives.clear();
+  json_const_iterator primObject;
+  if (FindMember(o, "primitives", primObject) &&
+      IsArray(GetValue(primObject))) {
+    json_const_array_iterator primEnd = ArrayEnd(GetValue(primObject));
+    for (json_const_array_iterator i = ArrayBegin(GetValue(primObject));
+         i != primEnd; ++i) {
+      Primitive primitive;
+      if (ParsePrimitive(&primitive, model, err, *i,
+                         store_original_json_for_extras_and_extensions)) {
+        // Only add the primitive if the parsing succeeds.
+        mesh->primitives.emplace_back(std::move(primitive));
+      }
+    }
+  }
+
+  // Should probably check if has targets and if dimensions fit
+  ParseNumberArrayProperty(&mesh->weights, err, o, "weights", false);
+
+  ParseExtensionsProperty(&mesh->extensions, err, o);
+  ParseExtrasProperty(&(mesh->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        mesh->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        mesh->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseNode(Node *node, std::string *err, const json &o,
+                      bool store_original_json_for_extras_and_extensions) {
+  ParseStringProperty(&node->name, err, o, "name", false);
+
+  int skin = -1;
+  ParseIntegerProperty(&skin, err, o, "skin", false);
+  node->skin = skin;
+
+  // Matrix and T/R/S are exclusive
+  if (!ParseNumberArrayProperty(&node->matrix, err, o, "matrix", false)) {
+    ParseNumberArrayProperty(&node->rotation, err, o, "rotation", false);
+    ParseNumberArrayProperty(&node->scale, err, o, "scale", false);
+    ParseNumberArrayProperty(&node->translation, err, o, "translation", false);
+  }
+
+  int camera = -1;
+  ParseIntegerProperty(&camera, err, o, "camera", false);
+  node->camera = camera;
+
+  int mesh = -1;
+  ParseIntegerProperty(&mesh, err, o, "mesh", false);
+  node->mesh = mesh;
+
+  node->children.clear();
+  ParseIntegerArrayProperty(&node->children, err, o, "children", false);
+
+  ParseNumberArrayProperty(&node->weights, err, o, "weights", false);
+
+  ParseExtensionsProperty(&node->extensions, err, o);
+  ParseExtrasProperty(&(node->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        node->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        node->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParsePbrMetallicRoughness(
+    PbrMetallicRoughness *pbr, std::string *err, const json &o,
+    bool store_original_json_for_extras_and_extensions) {
+  if (pbr == nullptr) {
+    return false;
+  }
+
+  std::vector<double> baseColorFactor;
+  if (ParseNumberArrayProperty(&baseColorFactor, err, o, "baseColorFactor",
+                               /* required */ false)) {
+    if (baseColorFactor.size() != 4) {
+      if (err) {
+        (*err) +=
+            "Array length of `baseColorFactor` parameter in "
+            "pbrMetallicRoughness must be 4, but got " +
+            std::to_string(baseColorFactor.size()) + "\n";
+      }
+      return false;
+    }
+    pbr->baseColorFactor = baseColorFactor;
+  }
+
+  {
+    json_const_iterator it;
+    if (FindMember(o, "baseColorTexture", it)) {
+      ParseTextureInfo(&pbr->baseColorTexture, err, GetValue(it),
+                       store_original_json_for_extras_and_extensions);
+    }
+  }
+
+  {
+    json_const_iterator it;
+    if (FindMember(o, "metallicRoughnessTexture", it)) {
+      ParseTextureInfo(&pbr->metallicRoughnessTexture, err, GetValue(it),
+                       store_original_json_for_extras_and_extensions);
+    }
+  }
+
+  ParseNumberProperty(&pbr->metallicFactor, err, o, "metallicFactor", false);
+  ParseNumberProperty(&pbr->roughnessFactor, err, o, "roughnessFactor", false);
+
+  ParseExtensionsProperty(&pbr->extensions, err, o);
+  ParseExtrasProperty(&pbr->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        pbr->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        pbr->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseMaterial(Material *material, std::string *err, const json &o,
+                          bool store_original_json_for_extras_and_extensions) {
+  ParseStringProperty(&material->name, err, o, "name", /* required */ false);
+
+  if (ParseNumberArrayProperty(&material->emissiveFactor, err, o,
+                               "emissiveFactor",
+                               /* required */ false)) {
+    if (material->emissiveFactor.size() != 3) {
+      if (err) {
+        (*err) +=
+            "Array length of `emissiveFactor` parameter in "
+            "material must be 3, but got " +
+            std::to_string(material->emissiveFactor.size()) + "\n";
+      }
+      return false;
+    }
+  } else {
+    // fill with default values
+    material->emissiveFactor = {0.0, 0.0, 0.0};
+  }
+
+  ParseStringProperty(&material->alphaMode, err, o, "alphaMode",
+                      /* required */ false);
+  ParseNumberProperty(&material->alphaCutoff, err, o, "alphaCutoff",
+                      /* required */ false);
+  ParseBooleanProperty(&material->doubleSided, err, o, "doubleSided",
+                       /* required */ false);
+
+  {
+    json_const_iterator it;
+    if (FindMember(o, "pbrMetallicRoughness", it)) {
+      ParsePbrMetallicRoughness(&material->pbrMetallicRoughness, err,
+                                GetValue(it),
+                                store_original_json_for_extras_and_extensions);
+    }
+  }
+
+  {
+    json_const_iterator it;
+    if (FindMember(o, "normalTexture", it)) {
+      ParseNormalTextureInfo(&material->normalTexture, err, GetValue(it),
+                             store_original_json_for_extras_and_extensions);
+    }
+  }
+
+  {
+    json_const_iterator it;
+    if (FindMember(o, "occlusionTexture", it)) {
+      ParseOcclusionTextureInfo(&material->occlusionTexture, err, GetValue(it),
+                                store_original_json_for_extras_and_extensions);
+    }
+  }
+
+  {
+    json_const_iterator it;
+    if (FindMember(o, "emissiveTexture", it)) {
+      ParseTextureInfo(&material->emissiveTexture, err, GetValue(it),
+                       store_original_json_for_extras_and_extensions);
+    }
+  }
+
+  // Old code path. For backward compatibility, we still store material values
+  // as Parameter. This will create duplicated information for
+  // example(pbrMetallicRoughness), but should be neglible in terms of memory
+  // consumption.
+  // TODO(syoyo): Remove in the next major release.
+  material->values.clear();
+  material->additionalValues.clear();
+
+  json_const_iterator it(ObjectBegin(o));
+  json_const_iterator itEnd(ObjectEnd(o));
+
+  for (; it != itEnd; ++it) {
+    std::string key(GetKey(it));
+    if (key == "pbrMetallicRoughness") {
+      if (IsObject(GetValue(it))) {
+        const json &values_object = GetValue(it);
+
+        json_const_iterator itVal(ObjectBegin(values_object));
+        json_const_iterator itValEnd(ObjectEnd(values_object));
+
+        for (; itVal != itValEnd; ++itVal) {
+          Parameter param;
+          if (ParseParameterProperty(&param, err, values_object, GetKey(itVal),
+                                     false)) {
+            material->values.emplace(GetKey(itVal), std::move(param));
+          }
+        }
+      }
+    } else if (key == "extensions" || key == "extras") {
+      // done later, skip, otherwise poorly parsed contents will be saved in the
+      // parametermap and serialized again later
+    } else {
+      Parameter param;
+      if (ParseParameterProperty(&param, err, o, key, false)) {
+        // names of materials have already been parsed. Putting it in this map
+        // doesn't correctly reflext the glTF specification
+        if (key != "name")
+          material->additionalValues.emplace(std::move(key), std::move(param));
+      }
+    }
+  }
+
+  material->extensions.clear();
+  ParseExtensionsProperty(&material->extensions, err, o);
+  ParseExtrasProperty(&(material->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator eit;
+      if (FindMember(o, "extensions", eit)) {
+        material->extensions_json_string = JsonToString(GetValue(eit));
+      }
+    }
+    {
+      json_const_iterator eit;
+      if (FindMember(o, "extras", eit)) {
+        material->extras_json_string = JsonToString(GetValue(eit));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseAnimationChannel(
+    AnimationChannel *channel, std::string *err, const json &o,
+    bool store_original_json_for_extras_and_extensions) {
+  int samplerIndex = -1;
+  int targetIndex = -1;
+  if (!ParseIntegerProperty(&samplerIndex, err, o, "sampler", true,
+                            "AnimationChannel")) {
+    if (err) {
+      (*err) += "`sampler` field is missing in animation channels\n";
+    }
+    return false;
+  }
+
+  json_const_iterator targetIt;
+  if (FindMember(o, "target", targetIt) && IsObject(GetValue(targetIt))) {
+    const json &target_object = GetValue(targetIt);
+
+    if (!ParseIntegerProperty(&targetIndex, err, target_object, "node", true)) {
+      if (err) {
+        (*err) += "`node` field is missing in animation.channels.target\n";
+      }
+      return false;
+    }
+
+    if (!ParseStringProperty(&channel->target_path, err, target_object, "path",
+                             true)) {
+      if (err) {
+        (*err) += "`path` field is missing in animation.channels.target\n";
+      }
+      return false;
+    }
+    ParseExtensionsProperty(&channel->target_extensions, err, target_object);
+    if (store_original_json_for_extras_and_extensions) {
+      json_const_iterator it;
+      if (FindMember(target_object, "extensions", it)) {
+        channel->target_extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  channel->sampler = samplerIndex;
+  channel->target_node = targetIndex;
+
+  ParseExtensionsProperty(&channel->extensions, err, o);
+  ParseExtrasProperty(&(channel->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        channel->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        channel->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseAnimation(Animation *animation, std::string *err,
+                           const json &o,
+                           bool store_original_json_for_extras_and_extensions) {
+  {
+    json_const_iterator channelsIt;
+    if (FindMember(o, "channels", channelsIt) &&
+        IsArray(GetValue(channelsIt))) {
+      json_const_array_iterator channelEnd = ArrayEnd(GetValue(channelsIt));
+      for (json_const_array_iterator i = ArrayBegin(GetValue(channelsIt));
+           i != channelEnd; ++i) {
+        AnimationChannel channel;
+        if (ParseAnimationChannel(
+                &channel, err, *i,
+                store_original_json_for_extras_and_extensions)) {
+          // Only add the channel if the parsing succeeds.
+          animation->channels.emplace_back(std::move(channel));
+        }
+      }
+    }
+  }
+
+  {
+    json_const_iterator samplerIt;
+    if (FindMember(o, "samplers", samplerIt) && IsArray(GetValue(samplerIt))) {
+      const json &sampler_array = GetValue(samplerIt);
+
+      json_const_array_iterator it = ArrayBegin(sampler_array);
+      json_const_array_iterator itEnd = ArrayEnd(sampler_array);
+
+      for (; it != itEnd; ++it) {
+        const json &s = *it;
+
+        AnimationSampler sampler;
+        int inputIndex = -1;
+        int outputIndex = -1;
+        if (!ParseIntegerProperty(&inputIndex, err, s, "input", true)) {
+          if (err) {
+            (*err) += "`input` field is missing in animation.sampler\n";
+          }
+          return false;
+        }
+        ParseStringProperty(&sampler.interpolation, err, s, "interpolation",
+                            false);
+        if (!ParseIntegerProperty(&outputIndex, err, s, "output", true)) {
+          if (err) {
+            (*err) += "`output` field is missing in animation.sampler\n";
+          }
+          return false;
+        }
+        sampler.input = inputIndex;
+        sampler.output = outputIndex;
+        ParseExtensionsProperty(&(sampler.extensions), err, o);
+        ParseExtrasProperty(&(sampler.extras), s);
+
+        if (store_original_json_for_extras_and_extensions) {
+          {
+            json_const_iterator eit;
+            if (FindMember(o, "extensions", eit)) {
+              sampler.extensions_json_string = JsonToString(GetValue(eit));
+            }
+          }
+          {
+            json_const_iterator eit;
+            if (FindMember(o, "extras", eit)) {
+              sampler.extras_json_string = JsonToString(GetValue(eit));
+            }
+          }
+        }
+
+        animation->samplers.emplace_back(std::move(sampler));
+      }
+    }
+  }
+
+  ParseStringProperty(&animation->name, err, o, "name", false);
+
+  ParseExtensionsProperty(&animation->extensions, err, o);
+  ParseExtrasProperty(&(animation->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        animation->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        animation->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseSampler(Sampler *sampler, std::string *err, const json &o,
+                         bool store_original_json_for_extras_and_extensions) {
+  ParseStringProperty(&sampler->name, err, o, "name", false);
+
+  int minFilter = -1;
+  int magFilter = -1;
+  int wrapS = TINYGLTF_TEXTURE_WRAP_REPEAT;
+  int wrapT = TINYGLTF_TEXTURE_WRAP_REPEAT;
+  //int wrapR = TINYGLTF_TEXTURE_WRAP_REPEAT;
+  ParseIntegerProperty(&minFilter, err, o, "minFilter", false);
+  ParseIntegerProperty(&magFilter, err, o, "magFilter", false);
+  ParseIntegerProperty(&wrapS, err, o, "wrapS", false);
+  ParseIntegerProperty(&wrapT, err, o, "wrapT", false);
+  //ParseIntegerProperty(&wrapR, err, o, "wrapR", false);  // tinygltf extension
+
+  // TODO(syoyo): Check the value is alloed one.
+  // (e.g. we allow 9728(NEAREST), but don't allow 9727)
+
+  sampler->minFilter = minFilter;
+  sampler->magFilter = magFilter;
+  sampler->wrapS = wrapS;
+  sampler->wrapT = wrapT;
+  //sampler->wrapR = wrapR;
+
+  ParseExtensionsProperty(&(sampler->extensions), err, o);
+  ParseExtrasProperty(&(sampler->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        sampler->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        sampler->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseSkin(Skin *skin, std::string *err, const json &o,
+                      bool store_original_json_for_extras_and_extensions) {
+  ParseStringProperty(&skin->name, err, o, "name", false, "Skin");
+
+  std::vector<int> joints;
+  if (!ParseIntegerArrayProperty(&joints, err, o, "joints", false, "Skin")) {
+    return false;
+  }
+  skin->joints = std::move(joints);
+
+  int skeleton = -1;
+  ParseIntegerProperty(&skeleton, err, o, "skeleton", false, "Skin");
+  skin->skeleton = skeleton;
+
+  int invBind = -1;
+  ParseIntegerProperty(&invBind, err, o, "inverseBindMatrices", true, "Skin");
+  skin->inverseBindMatrices = invBind;
+
+  ParseExtensionsProperty(&(skin->extensions), err, o);
+  ParseExtrasProperty(&(skin->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        skin->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        skin->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParsePerspectiveCamera(
+    PerspectiveCamera *camera, std::string *err, const json &o,
+    bool store_original_json_for_extras_and_extensions) {
+  double yfov = 0.0;
+  if (!ParseNumberProperty(&yfov, err, o, "yfov", true, "OrthographicCamera")) {
+    return false;
+  }
+
+  double znear = 0.0;
+  if (!ParseNumberProperty(&znear, err, o, "znear", true,
+                           "PerspectiveCamera")) {
+    return false;
+  }
+
+  double aspectRatio = 0.0;  // = invalid
+  ParseNumberProperty(&aspectRatio, err, o, "aspectRatio", false,
+                      "PerspectiveCamera");
+
+  double zfar = 0.0;  // = invalid
+  ParseNumberProperty(&zfar, err, o, "zfar", false, "PerspectiveCamera");
+
+  camera->aspectRatio = aspectRatio;
+  camera->zfar = zfar;
+  camera->yfov = yfov;
+  camera->znear = znear;
+
+  ParseExtensionsProperty(&camera->extensions, err, o);
+  ParseExtrasProperty(&(camera->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        camera->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        camera->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  // TODO(syoyo): Validate parameter values.
+
+  return true;
+}
+
+static bool ParseSpotLight(SpotLight *light, std::string *err, const json &o,
+                           bool store_original_json_for_extras_and_extensions) {
+  ParseNumberProperty(&light->innerConeAngle, err, o, "innerConeAngle", false);
+  ParseNumberProperty(&light->outerConeAngle, err, o, "outerConeAngle", false);
+
+  ParseExtensionsProperty(&light->extensions, err, o);
+  ParseExtrasProperty(&light->extras, o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        light->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        light->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  // TODO(syoyo): Validate parameter values.
+
+  return true;
+}
+
+static bool ParseOrthographicCamera(
+    OrthographicCamera *camera, std::string *err, const json &o,
+    bool store_original_json_for_extras_and_extensions) {
+  double xmag = 0.0;
+  if (!ParseNumberProperty(&xmag, err, o, "xmag", true, "OrthographicCamera")) {
+    return false;
+  }
+
+  double ymag = 0.0;
+  if (!ParseNumberProperty(&ymag, err, o, "ymag", true, "OrthographicCamera")) {
+    return false;
+  }
+
+  double zfar = 0.0;
+  if (!ParseNumberProperty(&zfar, err, o, "zfar", true, "OrthographicCamera")) {
+    return false;
+  }
+
+  double znear = 0.0;
+  if (!ParseNumberProperty(&znear, err, o, "znear", true,
+                           "OrthographicCamera")) {
+    return false;
+  }
+
+  ParseExtensionsProperty(&camera->extensions, err, o);
+  ParseExtrasProperty(&(camera->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        camera->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        camera->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  camera->xmag = xmag;
+  camera->ymag = ymag;
+  camera->zfar = zfar;
+  camera->znear = znear;
+
+  // TODO(syoyo): Validate parameter values.
+
+  return true;
+}
+
+static bool ParseCamera(Camera *camera, std::string *err, const json &o,
+                        bool store_original_json_for_extras_and_extensions) {
+  if (!ParseStringProperty(&camera->type, err, o, "type", true, "Camera")) {
+    return false;
+  }
+
+  if (camera->type.compare("orthographic") == 0) {
+    json_const_iterator orthoIt;
+    if (!FindMember(o, "orthographic", orthoIt)) {
+      if (err) {
+        std::stringstream ss;
+        ss << "Orhographic camera description not found." << std::endl;
+        (*err) += ss.str();
+      }
+      return false;
+    }
+
+    const json &v = GetValue(orthoIt);
+    if (!IsObject(v)) {
+      if (err) {
+        std::stringstream ss;
+        ss << "\"orthographic\" is not a JSON object." << std::endl;
+        (*err) += ss.str();
+      }
+      return false;
+    }
+
+    if (!ParseOrthographicCamera(
+            &camera->orthographic, err, v,
+            store_original_json_for_extras_and_extensions)) {
+      return false;
+    }
+  } else if (camera->type.compare("perspective") == 0) {
+    json_const_iterator perspIt;
+    if (!FindMember(o, "perspective", perspIt)) {
+      if (err) {
+        std::stringstream ss;
+        ss << "Perspective camera description not found." << std::endl;
+        (*err) += ss.str();
+      }
+      return false;
+    }
+
+    const json &v = GetValue(perspIt);
+    if (!IsObject(v)) {
+      if (err) {
+        std::stringstream ss;
+        ss << "\"perspective\" is not a JSON object." << std::endl;
+        (*err) += ss.str();
+      }
+      return false;
+    }
+
+    if (!ParsePerspectiveCamera(
+            &camera->perspective, err, v,
+            store_original_json_for_extras_and_extensions)) {
+      return false;
+    }
+  } else {
+    if (err) {
+      std::stringstream ss;
+      ss << "Invalid camera type: \"" << camera->type
+         << "\". Must be \"perspective\" or \"orthographic\"" << std::endl;
+      (*err) += ss.str();
+    }
+    return false;
+  }
+
+  ParseStringProperty(&camera->name, err, o, "name", false);
+
+  ParseExtensionsProperty(&camera->extensions, err, o);
+  ParseExtrasProperty(&(camera->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        camera->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        camera->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool ParseLight(Light *light, std::string *err, const json &o,
+                       bool store_original_json_for_extras_and_extensions) {
+  if (!ParseStringProperty(&light->type, err, o, "type", true)) {
+    return false;
+  }
+
+  if (light->type == "spot") {
+    json_const_iterator spotIt;
+    if (!FindMember(o, "spot", spotIt)) {
+      if (err) {
+        std::stringstream ss;
+        ss << "Spot light description not found." << std::endl;
+        (*err) += ss.str();
+      }
+      return false;
+    }
+
+    const json &v = GetValue(spotIt);
+    if (!IsObject(v)) {
+      if (err) {
+        std::stringstream ss;
+        ss << "\"spot\" is not a JSON object." << std::endl;
+        (*err) += ss.str();
+      }
+      return false;
+    }
+
+    if (!ParseSpotLight(&light->spot, err, v,
+                        store_original_json_for_extras_and_extensions)) {
+      return false;
+    }
+  }
+
+  ParseStringProperty(&light->name, err, o, "name", false);
+  ParseNumberArrayProperty(&light->color, err, o, "color", false);
+  ParseNumberProperty(&light->range, err, o, "range", false);
+  ParseNumberProperty(&light->intensity, err, o, "intensity", false);
+  ParseExtensionsProperty(&light->extensions, err, o);
+  ParseExtrasProperty(&(light->extras), o);
+
+  if (store_original_json_for_extras_and_extensions) {
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        light->extensions_json_string = JsonToString(GetValue(it));
+      }
+    }
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extras", it)) {
+        light->extras_json_string = JsonToString(GetValue(it));
+      }
+    }
+  }
+
+  return true;
+}
+
+bool TinyGLTF::LoadFromString(Model *model, std::string *err, std::string *warn,
+                              const char *json_str,
+                              unsigned int json_str_length,
+                              const std::string &base_dir,
+                              unsigned int check_sections) {
+  if (json_str_length < 4) {
+    if (err) {
+      (*err) = "JSON string too short.\n";
+    }
+    return false;
+  }
+
+  JsonDocument v;
+
+#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
+     defined(_CPPUNWIND)) &&                               \
+    !defined(TINYGLTF_NOEXCEPTION)
+  try {
+    JsonParse(v, json_str, json_str_length, true);
+
+  } catch (const std::exception &e) {
+    if (err) {
+      (*err) = e.what();
+    }
+    return false;
+  }
+#else
+  {
+    JsonParse(v, json_str, json_str_length);
+
+    if (!IsObject(v)) {
+      // Assume parsing was failed.
+      if (err) {
+        (*err) = "Failed to parse JSON object\n";
+      }
+      return false;
+    }
+  }
+#endif
+
+  if (!IsObject(v)) {
+    // root is not an object.
+    if (err) {
+      (*err) = "Root element is not a JSON object\n";
+    }
+    return false;
+  }
+
+  {
+    bool version_found = false;
+    json_const_iterator it;
+    if (FindMember(v, "asset", it) && IsObject(GetValue(it))) {
+      auto &itObj = GetValue(it);
+      json_const_iterator version_it;
+      std::string versionStr;
+      if (FindMember(itObj, "version", version_it) &&
+          GetString(GetValue(version_it), versionStr)) {
+        version_found = true;
+      }
+    }
+    if (version_found) {
+      // OK
+    } else if (check_sections & REQUIRE_VERSION) {
+      if (err) {
+        (*err) += "\"asset\" object not found in .gltf or not an object type\n";
+      }
+      return false;
+    }
+  }
+
+  // scene is not mandatory.
+  // FIXME Maybe a better way to handle it than removing the code
+
+  auto IsArrayMemberPresent = [](const json &_v, const char *name) -> bool {
+    json_const_iterator it;
+    return FindMember(_v, name, it) && IsArray(GetValue(it));
+  };
+
+  {
+    if ((check_sections & REQUIRE_SCENES) &&
+        !IsArrayMemberPresent(v, "scenes")) {
+      if (err) {
+        (*err) += "\"scenes\" object not found in .gltf or not an array type\n";
+      }
+      return false;
+    }
+  }
+
+  {
+    if ((check_sections & REQUIRE_NODES) && !IsArrayMemberPresent(v, "nodes")) {
+      if (err) {
+        (*err) += "\"nodes\" object not found in .gltf\n";
+      }
+      return false;
+    }
+  }
+
+  {
+    if ((check_sections & REQUIRE_ACCESSORS) &&
+        !IsArrayMemberPresent(v, "accessors")) {
+      if (err) {
+        (*err) += "\"accessors\" object not found in .gltf\n";
+      }
+      return false;
+    }
+  }
+
+  {
+    if ((check_sections & REQUIRE_BUFFERS) &&
+        !IsArrayMemberPresent(v, "buffers")) {
+      if (err) {
+        (*err) += "\"buffers\" object not found in .gltf\n";
+      }
+      return false;
+    }
+  }
+
+  {
+    if ((check_sections & REQUIRE_BUFFER_VIEWS) &&
+        !IsArrayMemberPresent(v, "bufferViews")) {
+      if (err) {
+        (*err) += "\"bufferViews\" object not found in .gltf\n";
+      }
+      return false;
+    }
+  }
+
+  model->buffers.clear();
+  model->bufferViews.clear();
+  model->accessors.clear();
+  model->meshes.clear();
+  model->cameras.clear();
+  model->nodes.clear();
+  model->extensionsUsed.clear();
+  model->extensionsRequired.clear();
+  model->extensions.clear();
+  model->defaultScene = -1;
+
+  // 1. Parse Asset
+  {
+    json_const_iterator it;
+    if (FindMember(v, "asset", it) && IsObject(GetValue(it))) {
+      const json &root = GetValue(it);
+
+      ParseAsset(&model->asset, err, root,
+                 store_original_json_for_extras_and_extensions_);
+    }
+  }
+
+#ifdef TINYGLTF_USE_CPP14
+  auto ForEachInArray = [](const json &_v, const char *member,
+                           const auto &cb) -> bool
+#else
+  // The std::function<> implementation can be less efficient because it will
+  // allocate heap when the size of the captured lambda is above 16 bytes with
+  // clang and gcc, but it does not require C++14.
+  auto ForEachInArray = [](const json &_v, const char *member,
+                           const std::function<bool(const json &)> &cb) -> bool
+#endif
+  {
+    json_const_iterator itm;
+    if (FindMember(_v, member, itm) && IsArray(GetValue(itm))) {
+      const json &root = GetValue(itm);
+      auto it = ArrayBegin(root);
+      auto end = ArrayEnd(root);
+      for (; it != end; ++it) {
+        if (!cb(*it)) return false;
+      }
+    }
+    return true;
+  };
+
+  // 2. Parse extensionUsed
+  {
+    ForEachInArray(v, "extensionsUsed", [&](const json &o) {
+      std::string str;
+      GetString(o, str);
+      model->extensionsUsed.emplace_back(std::move(str));
+      return true;
+    });
+  }
+
+  {
+    ForEachInArray(v, "extensionsRequired", [&](const json &o) {
+      std::string str;
+      GetString(o, str);
+      model->extensionsRequired.emplace_back(std::move(str));
+      return true;
+    });
+  }
+
+  // 3. Parse Buffer
+  {
+    bool success = ForEachInArray(v, "buffers", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`buffers' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Buffer buffer;
+      if (!ParseBuffer(&buffer, err, o,
+                       store_original_json_for_extras_and_extensions_, &fs,
+                       base_dir, is_binary_, bin_data_, bin_size_)) {
+        return false;
+      }
+
+      model->buffers.emplace_back(std::move(buffer));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+  // 4. Parse BufferView
+  {
+    bool success = ForEachInArray(v, "bufferViews", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`bufferViews' does not contain an JSON object.";
+        }
+        return false;
+      }
+      BufferView bufferView;
+      if (!ParseBufferView(&bufferView, err, o,
+                           store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->bufferViews.emplace_back(std::move(bufferView));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 5. Parse Accessor
+  {
+    bool success = ForEachInArray(v, "accessors", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`accessors' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Accessor accessor;
+      if (!ParseAccessor(&accessor, err, o,
+                         store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->accessors.emplace_back(std::move(accessor));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 6. Parse Mesh
+  {
+    bool success = ForEachInArray(v, "meshes", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`meshes' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Mesh mesh;
+      if (!ParseMesh(&mesh, model, err, o,
+                     store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->meshes.emplace_back(std::move(mesh));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // Assign missing bufferView target types
+  // - Look for missing Mesh indices
+  // - Look for missing Mesh attributes
+  for (auto &mesh : model->meshes) {
+    for (auto &primitive : mesh.primitives) {
+      if (primitive.indices >
+          -1)  // has indices from parsing step, must be Element Array Buffer
+      {
+        if (size_t(primitive.indices) >= model->accessors.size()) {
+          if (err) {
+            (*err) += "primitive indices accessor out of bounds";
+          }
+          return false;
+        }
+
+        auto bufferView =
+            model->accessors[size_t(primitive.indices)].bufferView;
+        if (bufferView < 0 || size_t(bufferView) >= model->bufferViews.size()) {
+          if (err) {
+            (*err) += "accessor[" + std::to_string(primitive.indices) +
+                      "] invalid bufferView";
+          }
+          return false;
+        }
+
+        model->bufferViews[size_t(bufferView)].target =
+            TINYGLTF_TARGET_ELEMENT_ARRAY_BUFFER;
+        // we could optionally check if acessors' bufferView type is Scalar, as
+        // it should be
+      }
+
+      for (auto &attribute : primitive.attributes) {
+        model
+            ->bufferViews[size_t(
+                model->accessors[size_t(attribute.second)].bufferView)]
+            .target = TINYGLTF_TARGET_ARRAY_BUFFER;
+      }
+
+      for (auto &target : primitive.targets) {
+        for (auto &attribute : target) {
+          auto bufferView =
+              model->accessors[size_t(attribute.second)].bufferView;
+          // bufferView could be null(-1) for sparse morph target
+          if (bufferView >= 0) {
+            model->bufferViews[size_t(bufferView)].target =
+                TINYGLTF_TARGET_ARRAY_BUFFER;
+          }
+        }
+      }
+    }
+  }
+
+  // 7. Parse Node
+  {
+    bool success = ForEachInArray(v, "nodes", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`nodes' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Node node;
+      if (!ParseNode(&node, err, o,
+                     store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->nodes.emplace_back(std::move(node));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 8. Parse scenes.
+  {
+    bool success = ForEachInArray(v, "scenes", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`scenes' does not contain an JSON object.";
+        }
+        return false;
+      }
+      std::vector<int> nodes;
+      ParseIntegerArrayProperty(&nodes, err, o, "nodes", false);
+
+      Scene scene;
+      scene.nodes = std::move(nodes);
+
+      ParseStringProperty(&scene.name, err, o, "name", false);
+
+      ParseExtensionsProperty(&scene.extensions, err, o);
+      ParseExtrasProperty(&scene.extras, o);
+
+      if (store_original_json_for_extras_and_extensions_) {
+        {
+          json_const_iterator it;
+          if (FindMember(o, "extensions", it)) {
+            scene.extensions_json_string = JsonToString(GetValue(it));
+          }
+        }
+        {
+          json_const_iterator it;
+          if (FindMember(o, "extras", it)) {
+            scene.extras_json_string = JsonToString(GetValue(it));
+          }
+        }
+      }
+
+      model->scenes.emplace_back(std::move(scene));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 9. Parse default scenes.
+  {
+    json_const_iterator rootIt;
+    int iVal;
+    if (FindMember(v, "scene", rootIt) && GetInt(GetValue(rootIt), iVal)) {
+      model->defaultScene = iVal;
+    }
+  }
+
+  // 10. Parse Material
+  {
+    bool success = ForEachInArray(v, "materials", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`materials' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Material material;
+      ParseStringProperty(&material.name, err, o, "name", false);
+
+      if (!ParseMaterial(&material, err, o,
+                         store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->materials.emplace_back(std::move(material));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 11. Parse Image
+  void *load_image_user_data{nullptr};
+
+  LoadImageDataOption load_image_option;
+
+  if (user_image_loader_) {
+    // Use user supplied pointer
+    load_image_user_data = load_image_user_data_;
+  } else {
+    load_image_option.preserve_channels = preserve_image_channels_;
+    load_image_user_data = reinterpret_cast<void *>(&load_image_option);
+  }
+
+  {
+    int idx = 0;
+    bool success = ForEachInArray(v, "images", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "image[" + std::to_string(idx) + "] is not a JSON object.";
+        }
+        return false;
+      }
+      Image image;
+      if (!ParseImage(&image, idx, err, warn, o,
+                      store_original_json_for_extras_and_extensions_, base_dir,
+                      &fs, &this->LoadImageData, load_image_user_data)) {
+        return false;
+      }
+
+      if (image.bufferView != -1) {
+        // Load image from the buffer view.
+        if (size_t(image.bufferView) >= model->bufferViews.size()) {
+          if (err) {
+            std::stringstream ss;
+            ss << "image[" << idx << "] bufferView \"" << image.bufferView
+               << "\" not found in the scene." << std::endl;
+            (*err) += ss.str();
+          }
+          return false;
+        }
+
+        const BufferView &bufferView =
+            model->bufferViews[size_t(image.bufferView)];
+        if (size_t(bufferView.buffer) >= model->buffers.size()) {
+          if (err) {
+            std::stringstream ss;
+            ss << "image[" << idx << "] buffer \"" << bufferView.buffer
+               << "\" not found in the scene." << std::endl;
+            (*err) += ss.str();
+          }
+          return false;
+        }
+        const Buffer &buffer = model->buffers[size_t(bufferView.buffer)];
+
+        if (*LoadImageData == nullptr) {
+          if (err) {
+            (*err) += "No LoadImageData callback specified.\n";
+          }
+          return false;
+        }
+        bool ret = LoadImageData(
+            &image, idx, err, warn, image.width, image.height,
+            &buffer.data[bufferView.byteOffset],
+            static_cast<int>(bufferView.byteLength), load_image_user_data);
+        if (!ret) {
+          return false;
+        }
+      }
+
+      model->images.emplace_back(std::move(image));
+      ++idx;
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 12. Parse Texture
+  {
+    bool success = ForEachInArray(v, "textures", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`textures' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Texture texture;
+      if (!ParseTexture(&texture, err, o,
+                        store_original_json_for_extras_and_extensions_,
+                        base_dir)) {
+        return false;
+      }
+
+      model->textures.emplace_back(std::move(texture));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 13. Parse Animation
+  {
+    bool success = ForEachInArray(v, "animations", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`animations' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Animation animation;
+      if (!ParseAnimation(&animation, err, o,
+                          store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->animations.emplace_back(std::move(animation));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 14. Parse Skin
+  {
+    bool success = ForEachInArray(v, "skins", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`skins' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Skin skin;
+      if (!ParseSkin(&skin, err, o,
+                     store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->skins.emplace_back(std::move(skin));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 15. Parse Sampler
+  {
+    bool success = ForEachInArray(v, "samplers", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`samplers' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Sampler sampler;
+      if (!ParseSampler(&sampler, err, o,
+                        store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->samplers.emplace_back(std::move(sampler));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 16. Parse Camera
+  {
+    bool success = ForEachInArray(v, "cameras", [&](const json &o) {
+      if (!IsObject(o)) {
+        if (err) {
+          (*err) += "`cameras' does not contain an JSON object.";
+        }
+        return false;
+      }
+      Camera camera;
+      if (!ParseCamera(&camera, err, o,
+                       store_original_json_for_extras_and_extensions_)) {
+        return false;
+      }
+
+      model->cameras.emplace_back(std::move(camera));
+      return true;
+    });
+
+    if (!success) {
+      return false;
+    }
+  }
+
+  // 17. Parse Extensions
+  ParseExtensionsProperty(&model->extensions, err, v);
+
+  // 18. Specific extension implementations
+  {
+    json_const_iterator rootIt;
+    if (FindMember(v, "extensions", rootIt) && IsObject(GetValue(rootIt))) {
+      const json &root = GetValue(rootIt);
+
+      json_const_iterator it(ObjectBegin(root));
+      json_const_iterator itEnd(ObjectEnd(root));
+      for (; it != itEnd; ++it) {
+        // parse KHR_lights_punctual extension
+        std::string key(GetKey(it));
+        if ((key == "KHR_lights_punctual") && IsObject(GetValue(it))) {
+          const json &object = GetValue(it);
+          json_const_iterator itLight;
+          if (FindMember(object, "lights", itLight)) {
+            const json &lights = GetValue(itLight);
+            if (!IsArray(lights)) {
+              continue;
+            }
+
+            auto arrayIt(ArrayBegin(lights));
+            auto arrayItEnd(ArrayEnd(lights));
+            for (; arrayIt != arrayItEnd; ++arrayIt) {
+              Light light;
+              if (!ParseLight(&light, err, *arrayIt,
+                              store_original_json_for_extras_and_extensions_)) {
+                return false;
+              }
+              model->lights.emplace_back(std::move(light));
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // 19. Parse Extras
+  ParseExtrasProperty(&model->extras, v);
+
+  if (store_original_json_for_extras_and_extensions_) {
+    model->extras_json_string = JsonToString(v["extras"]);
+    model->extensions_json_string = JsonToString(v["extensions"]);
+  }
+
+  return true;
+}
+
+bool TinyGLTF::LoadASCIIFromString(Model *model, std::string *err,
+                                   std::string *warn, const char *str,
+                                   unsigned int length,
+                                   const std::string &base_dir,
+                                   unsigned int check_sections) {
+  is_binary_ = false;
+  bin_data_ = nullptr;
+  bin_size_ = 0;
+
+  return LoadFromString(model, err, warn, str, length, base_dir,
+                        check_sections);
+}
+
+bool TinyGLTF::LoadASCIIFromFile(Model *model, std::string *err,
+                                 std::string *warn, const std::string &filename,
+                                 unsigned int check_sections) {
+  std::stringstream ss;
+
+  if (fs.ReadWholeFile == nullptr) {
+    // Programmer error, assert() ?
+    ss << "Failed to read file: " << filename
+       << ": one or more FS callback not set" << std::endl;
+    if (err) {
+      (*err) = ss.str();
+    }
+    return false;
+  }
+
+  std::vector<unsigned char> data;
+  std::string fileerr;
+  bool fileread = fs.ReadWholeFile(&data, &fileerr, filename, fs.user_data);
+  if (!fileread) {
+    ss << "Failed to read file: " << filename << ": " << fileerr << std::endl;
+    if (err) {
+      (*err) = ss.str();
+    }
+    return false;
+  }
+
+  size_t sz = data.size();
+  if (sz == 0) {
+    if (err) {
+      (*err) = "Empty file.";
+    }
+    return false;
+  }
+
+  std::string basedir = GetBaseDir(filename);
+
+  bool ret = LoadASCIIFromString(
+      model, err, warn, reinterpret_cast<const char *>(&data.at(0)),
+      static_cast<unsigned int>(data.size()), basedir, check_sections);
+
+  return ret;
+}
+
+bool TinyGLTF::LoadBinaryFromMemory(Model *model, std::string *err,
+                                    std::string *warn,
+                                    const unsigned char *bytes,
+                                    unsigned int size,
+                                    const std::string &base_dir,
+                                    unsigned int check_sections) {
+  if (size < 20) {
+    if (err) {
+      (*err) = "Too short data size for glTF Binary.";
+    }
+    return false;
+  }
+
+  if (bytes[0] == 'g' && bytes[1] == 'l' && bytes[2] == 'T' &&
+      bytes[3] == 'F') {
+    // ok
+  } else {
+    if (err) {
+      (*err) = "Invalid magic.";
+    }
+    return false;
+  }
+
+  unsigned int version;       // 4 bytes
+  unsigned int length;        // 4 bytes
+  unsigned int model_length;  // 4 bytes
+  unsigned int model_format;  // 4 bytes;
+
+  // @todo { Endian swap for big endian machine. }
+  memcpy(&version, bytes + 4, 4);
+  swap4(&version);
+  memcpy(&length, bytes + 8, 4);
+  swap4(&length);
+  memcpy(&model_length, bytes + 12, 4);
+  swap4(&model_length);
+  memcpy(&model_format, bytes + 16, 4);
+  swap4(&model_format);
+
+  // In case the Bin buffer is not present, the size is exactly 20 + size of
+  // JSON contents,
+  // so use "greater than" operator.
+  if ((20 + model_length > size) || (model_length < 1) || (length > size) ||
+      (20 + model_length > length) ||
+      (model_format != 0x4E4F534A)) {  // 0x4E4F534A = JSON format.
+    if (err) {
+      (*err) = "Invalid glTF binary.";
+    }
+    return false;
+  }
+
+  // Extract JSON string.
+  std::string jsonString(reinterpret_cast<const char *>(&bytes[20]),
+                         model_length);
+
+  is_binary_ = true;
+  bin_data_ = bytes + 20 + model_length +
+              8;  // 4 bytes (buffer_length) + 4 bytes(buffer_format)
+  bin_size_ =
+      length - (20 + model_length);  // extract header + JSON scene data.
+
+  bool ret = LoadFromString(model, err, warn,
+                            reinterpret_cast<const char *>(&bytes[20]),
+                            model_length, base_dir, check_sections);
+  if (!ret) {
+    return ret;
+  }
+
+  return true;
+}
+
+bool TinyGLTF::LoadBinaryFromFile(Model *model, std::string *err,
+                                  std::string *warn,
+                                  const std::string &filename,
+                                  unsigned int check_sections) {
+  std::stringstream ss;
+
+  if (fs.ReadWholeFile == nullptr) {
+    // Programmer error, assert() ?
+    ss << "Failed to read file: " << filename
+       << ": one or more FS callback not set" << std::endl;
+    if (err) {
+      (*err) = ss.str();
+    }
+    return false;
+  }
+
+  std::vector<unsigned char> data;
+  std::string fileerr;
+  bool fileread = fs.ReadWholeFile(&data, &fileerr, filename, fs.user_data);
+  if (!fileread) {
+    ss << "Failed to read file: " << filename << ": " << fileerr << std::endl;
+    if (err) {
+      (*err) = ss.str();
+    }
+    return false;
+  }
+
+  std::string basedir = GetBaseDir(filename);
+
+  bool ret = LoadBinaryFromMemory(model, err, warn, &data.at(0),
+                                  static_cast<unsigned int>(data.size()),
+                                  basedir, check_sections);
+
+  return ret;
+}
+
+///////////////////////
+// GLTF Serialization
+///////////////////////
+namespace {
+json JsonFromString(const char *s) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return json(s, GetAllocator());
+#else
+  return json(s);
+#endif
+}
+
+void JsonAssign(json &dest, const json &src) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  dest.CopyFrom(src, GetAllocator());
+#else
+  dest = src;
+#endif
+}
+
+void JsonAddMember(json &o, const char *key, json &&value) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  if (!o.IsObject()) {
+    o.SetObject();
+  }
+  o.AddMember(json(key, GetAllocator()), std::move(value), GetAllocator());
+#else
+  o[key] = std::move(value);
+#endif
+}
+
+void JsonPushBack(json &o, json &&value) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  o.PushBack(std::move(value), GetAllocator());
+#else
+  o.push_back(std::move(value));
+#endif
+}
+
+bool JsonIsNull(const json &o) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  return o.IsNull();
+#else
+  return o.is_null();
+#endif
+}
+
+void JsonSetObject(json &o) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  o.SetObject();
+#else
+  o = o.object({});
+#endif
+}
+
+void JsonReserveArray(json &o, size_t s) {
+#ifdef TINYGLTF_USE_RAPIDJSON
+  o.SetArray();
+  o.Reserve(static_cast<rapidjson::SizeType>(s), GetAllocator());
+#endif
+  (void)(o);
+  (void)(s);
+}
+}  // namespace
+
+// typedef std::pair<std::string, json> json_object_pair;
+
+template <typename T>
+static void SerializeNumberProperty(const std::string &key, T number,
+                                    json &obj) {
+  // obj.insert(
+  //    json_object_pair(key, json(static_cast<double>(number))));
+  // obj[key] = static_cast<double>(number);
+  JsonAddMember(obj, key.c_str(), json(number));
+}
+
+#ifdef TINYGLTF_USE_RAPIDJSON
+template <>
+void SerializeNumberProperty(const std::string &key, size_t number, json &obj) {
+  JsonAddMember(obj, key.c_str(), json(static_cast<uint64_t>(number)));
+}
+#endif
+
+template <typename T>
+static void SerializeNumberArrayProperty(const std::string &key,
+                                         const std::vector<T> &value,
+                                         json &obj) {
+  if (value.empty()) return;
+
+  json ary;
+  JsonReserveArray(ary, value.size());
+  for (const auto &s : value) {
+    JsonPushBack(ary, json(s));
+  }
+  JsonAddMember(obj, key.c_str(), std::move(ary));
+}
+
+static void SerializeStringProperty(const std::string &key,
+                                    const std::string &value, json &obj) {
+  JsonAddMember(obj, key.c_str(), JsonFromString(value.c_str()));
+}
+
+static void SerializeStringArrayProperty(const std::string &key,
+                                         const std::vector<std::string> &value,
+                                         json &obj) {
+  json ary;
+  JsonReserveArray(ary, value.size());
+  for (auto &s : value) {
+    JsonPushBack(ary, JsonFromString(s.c_str()));
+  }
+  JsonAddMember(obj, key.c_str(), std::move(ary));
+}
+
+static bool ValueToJson(const Value &value, json *ret) {
+  json obj;
+#ifdef TINYGLTF_USE_RAPIDJSON
+  switch (value.Type()) {
+    case REAL_TYPE:
+      obj.SetDouble(value.Get<double>());
+      break;
+    case INT_TYPE:
+      obj.SetInt(value.Get<int>());
+      break;
+    case BOOL_TYPE:
+      obj.SetBool(value.Get<bool>());
+      break;
+    case STRING_TYPE:
+      obj.SetString(value.Get<std::string>().c_str(), GetAllocator());
+      break;
+    case ARRAY_TYPE: {
+      obj.SetArray();
+      obj.Reserve(static_cast<rapidjson::SizeType>(value.ArrayLen()),
+                  GetAllocator());
+      for (unsigned int i = 0; i < value.ArrayLen(); ++i) {
+        Value elementValue = value.Get(int(i));
+        json elementJson;
+        if (ValueToJson(value.Get(int(i)), &elementJson))
+          obj.PushBack(std::move(elementJson), GetAllocator());
+      }
+      break;
+    }
+    case BINARY_TYPE:
+      // TODO
+      // obj = json(value.Get<std::vector<unsigned char>>());
+      return false;
+      break;
+    case OBJECT_TYPE: {
+      obj.SetObject();
+      Value::Object objMap = value.Get<Value::Object>();
+      for (auto &it : objMap) {
+        json elementJson;
+        if (ValueToJson(it.second, &elementJson)) {
+          obj.AddMember(json(it.first.c_str(), GetAllocator()),
+                        std::move(elementJson), GetAllocator());
+        }
+      }
+      break;
+    }
+    case NULL_TYPE:
+    default:
+      return false;
+  }
+#else
+  switch (value.Type()) {
+    case REAL_TYPE:
+      obj = json(value.Get<double>());
+      break;
+    case INT_TYPE:
+      obj = json(value.Get<int>());
+      break;
+    case BOOL_TYPE:
+      obj = json(value.Get<bool>());
+      break;
+    case STRING_TYPE:
+      obj = json(value.Get<std::string>());
+      break;
+    case ARRAY_TYPE: {
+      for (unsigned int i = 0; i < value.ArrayLen(); ++i) {
+        Value elementValue = value.Get(int(i));
+        json elementJson;
+        if (ValueToJson(value.Get(int(i)), &elementJson))
+          obj.push_back(elementJson);
+      }
+      break;
+    }
+    case BINARY_TYPE:
+      // TODO
+      // obj = json(value.Get<std::vector<unsigned char>>());
+      return false;
+      break;
+    case OBJECT_TYPE: {
+      Value::Object objMap = value.Get<Value::Object>();
+      for (auto &it : objMap) {
+        json elementJson;
+        if (ValueToJson(it.second, &elementJson)) obj[it.first] = elementJson;
+      }
+      break;
+    }
+    case NULL_TYPE:
+    default:
+      return false;
+  }
+#endif
+  if (ret) *ret = std::move(obj);
+  return true;
+}
+
+static void SerializeValue(const std::string &key, const Value &value,
+                           json &obj) {
+  json ret;
+  if (ValueToJson(value, &ret)) {
+    JsonAddMember(obj, key.c_str(), std::move(ret));
+  }
+}
+
+static void SerializeGltfBufferData(const std::vector<unsigned char> &data,
+                                    json &o) {
+  std::string header = "data:application/octet-stream;base64,";
+  if (data.size() > 0) {
+    std::string encodedData =
+        base64_encode(&data[0], static_cast<unsigned int>(data.size()));
+    SerializeStringProperty("uri", header + encodedData, o);
+  } else {
+    // Issue #229
+    // size 0 is allowd. Just emit mime header.
+    SerializeStringProperty("uri", header, o);
+  }
+}
+
+static bool SerializeGltfBufferData(const std::vector<unsigned char> &data,
+                                    const std::string &binFilename) {
+#ifdef _WIN32
+#if defined(__GLIBCXX__)  // mingw
+  int file_descriptor = _wopen(UTF8ToWchar(binFilename).c_str(),
+                               _O_CREAT | _O_WRONLY | _O_TRUNC | _O_BINARY);
+  __gnu_cxx::stdio_filebuf<char> wfile_buf(
+      file_descriptor, std::ios_base::out | std::ios_base::binary);
+  std::ostream output(&wfile_buf);
+  if (!wfile_buf.is_open()) return false;
+#elif defined(_MSC_VER)
+  std::ofstream output(UTF8ToWchar(binFilename).c_str(), std::ofstream::binary);
+  if (!output.is_open()) return false;
+#else
+  std::ofstream output(binFilename.c_str(), std::ofstream::binary);
+  if (!output.is_open()) return false;
+#endif
+#else
+  std::ofstream output(binFilename.c_str(), std::ofstream::binary);
+  if (!output.is_open()) return false;
+#endif
+  if (data.size() > 0) {
+    output.write(reinterpret_cast<const char *>(&data[0]),
+                 std::streamsize(data.size()));
+  } else {
+    // Issue #229
+    // size 0 will be still valid buffer data.
+    // write empty file.
+  }
+  return true;
+}
+
+#if 0  // FIXME(syoyo): not used. will be removed in the future release.
+static void SerializeParameterMap(ParameterMap &param, json &o) {
+  for (ParameterMap::iterator paramIt = param.begin(); paramIt != param.end();
+       ++paramIt) {
+    if (paramIt->second.number_array.size()) {
+      SerializeNumberArrayProperty<double>(paramIt->first,
+                                           paramIt->second.number_array, o);
+    } else if (paramIt->second.json_double_value.size()) {
+      json json_double_value;
+      for (std::map<std::string, double>::iterator it =
+               paramIt->second.json_double_value.begin();
+           it != paramIt->second.json_double_value.end(); ++it) {
+        if (it->first == "index") {
+          json_double_value[it->first] = paramIt->second.TextureIndex();
+        } else {
+          json_double_value[it->first] = it->second;
+        }
+      }
+
+      o[paramIt->first] = json_double_value;
+    } else if (!paramIt->second.string_value.empty()) {
+      SerializeStringProperty(paramIt->first, paramIt->second.string_value, o);
+    } else if (paramIt->second.has_number_value) {
+      o[paramIt->first] = paramIt->second.number_value;
+    } else {
+      o[paramIt->first] = paramIt->second.bool_value;
+    }
+  }
+}
+#endif
+
+static void SerializeExtensionMap(const ExtensionMap &extensions, json &o) {
+  if (!extensions.size()) return;
+
+  json extMap;
+  for (ExtensionMap::const_iterator extIt = extensions.begin();
+       extIt != extensions.end(); ++extIt) {
+    // Allow an empty object for extension(#97)
+    json ret;
+    bool isNull = true;
+    if (ValueToJson(extIt->second, &ret)) {
+      isNull = JsonIsNull(ret);
+      JsonAddMember(extMap, extIt->first.c_str(), std::move(ret));
+    }
+    if (isNull) {
+      if (!(extIt->first.empty())) {  // name should not be empty, but for sure
+        // create empty object so that an extension name is still included in
+        // json.
+        json empty;
+        JsonSetObject(empty);
+        JsonAddMember(extMap, extIt->first.c_str(), std::move(empty));
+      }
+    }
+  }
+  JsonAddMember(o, "extensions", std::move(extMap));
+}
+
+static void SerializeGltfAccessor(Accessor &accessor, json &o) {
+  if (accessor.bufferView >= 0)
+    SerializeNumberProperty<int>("bufferView", accessor.bufferView, o);
+
+  if (accessor.byteOffset != 0)
+    SerializeNumberProperty<int>("byteOffset", int(accessor.byteOffset), o);
+
+  SerializeNumberProperty<int>("componentType", accessor.componentType, o);
+  SerializeNumberProperty<size_t>("count", accessor.count, o);
+
+  if ((accessor.componentType == TINYGLTF_COMPONENT_TYPE_FLOAT) ||
+      (accessor.componentType == TINYGLTF_COMPONENT_TYPE_DOUBLE)) {
+    SerializeNumberArrayProperty<double>("min", accessor.minValues, o);
+    SerializeNumberArrayProperty<double>("max", accessor.maxValues, o);
+  } else {
+    // Issue #301. Serialize as integer.
+    // Assume int value is within [-2**31-1, 2**31-1]
+    {
+      std::vector<int> values;
+      std::transform(accessor.minValues.begin(), accessor.minValues.end(),
+                     std::back_inserter(values),
+                     [](double v) { return static_cast<int>(v); });
+
+      SerializeNumberArrayProperty<int>("min", values, o);
+    }
+
+    {
+      std::vector<int> values;
+      std::transform(accessor.maxValues.begin(), accessor.maxValues.end(),
+                     std::back_inserter(values),
+                     [](double v) { return static_cast<int>(v); });
+
+      SerializeNumberArrayProperty<int>("max", values, o);
+    }
+  }
+
+  if (accessor.normalized)
+    SerializeValue("normalized", Value(accessor.normalized), o);
+  std::string type;
+  switch (accessor.type) {
+    case TINYGLTF_TYPE_SCALAR:
+      type = "SCALAR";
+      break;
+    case TINYGLTF_TYPE_VEC2:
+      type = "VEC2";
+      break;
+    case TINYGLTF_TYPE_VEC3:
+      type = "VEC3";
+      break;
+    case TINYGLTF_TYPE_VEC4:
+      type = "VEC4";
+      break;
+    case TINYGLTF_TYPE_MAT2:
+      type = "MAT2";
+      break;
+    case TINYGLTF_TYPE_MAT3:
+      type = "MAT3";
+      break;
+    case TINYGLTF_TYPE_MAT4:
+      type = "MAT4";
+      break;
+  }
+
+  SerializeStringProperty("type", type, o);
+  if (!accessor.name.empty()) SerializeStringProperty("name", accessor.name, o);
+
+  if (accessor.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", accessor.extras, o);
+  }
+}
+
+static void SerializeGltfAnimationChannel(AnimationChannel &channel, json &o) {
+  SerializeNumberProperty("sampler", channel.sampler, o);
+  {
+    json target;
+    SerializeNumberProperty("node", channel.target_node, target);
+    SerializeStringProperty("path", channel.target_path, target);
+
+    SerializeExtensionMap(channel.target_extensions, target);
+
+    JsonAddMember(o, "target", std::move(target));
+  }
+
+  if (channel.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", channel.extras, o);
+  }
+
+  SerializeExtensionMap(channel.extensions, o);
+}
+
+static void SerializeGltfAnimationSampler(AnimationSampler &sampler, json &o) {
+  SerializeNumberProperty("input", sampler.input, o);
+  SerializeNumberProperty("output", sampler.output, o);
+  SerializeStringProperty("interpolation", sampler.interpolation, o);
+
+  if (sampler.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", sampler.extras, o);
+  }
+}
+
+static void SerializeGltfAnimation(Animation &animation, json &o) {
+  if (!animation.name.empty())
+    SerializeStringProperty("name", animation.name, o);
+
+  {
+    json channels;
+    JsonReserveArray(channels, animation.channels.size());
+    for (unsigned int i = 0; i < animation.channels.size(); ++i) {
+      json channel;
+      AnimationChannel gltfChannel = animation.channels[i];
+      SerializeGltfAnimationChannel(gltfChannel, channel);
+      JsonPushBack(channels, std::move(channel));
+    }
+
+    JsonAddMember(o, "channels", std::move(channels));
+  }
+
+  {
+    json samplers;
+    JsonReserveArray(samplers, animation.samplers.size());
+    for (unsigned int i = 0; i < animation.samplers.size(); ++i) {
+      json sampler;
+      AnimationSampler gltfSampler = animation.samplers[i];
+      SerializeGltfAnimationSampler(gltfSampler, sampler);
+      JsonPushBack(samplers, std::move(sampler));
+    }
+    JsonAddMember(o, "samplers", std::move(samplers));
+  }
+
+  if (animation.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", animation.extras, o);
+  }
+
+  SerializeExtensionMap(animation.extensions, o);
+}
+
+static void SerializeGltfAsset(Asset &asset, json &o) {
+  if (!asset.generator.empty()) {
+    SerializeStringProperty("generator", asset.generator, o);
+  }
+
+  if (!asset.copyright.empty()) {
+    SerializeStringProperty("copyright", asset.copyright, o);
+  }
+
+  if (asset.version.empty()) {
+    // Just in case
+    // `version` must be defined
+    asset.version = "2.0";
+  }
+
+  // TODO(syoyo): Do we need to check if `version` is greater or equal to 2.0?
+  SerializeStringProperty("version", asset.version, o);
+
+  if (asset.extras.Keys().size()) {
+    SerializeValue("extras", asset.extras, o);
+  }
+
+  SerializeExtensionMap(asset.extensions, o);
+}
+
+static void SerializeGltfBufferBin(Buffer &buffer, json &o,
+                                   std::vector<unsigned char> &binBuffer) {
+  SerializeNumberProperty("byteLength", buffer.data.size(), o);
+  binBuffer = buffer.data;
+
+  if (buffer.name.size()) SerializeStringProperty("name", buffer.name, o);
+
+  if (buffer.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", buffer.extras, o);
+  }
+}
+
+static void SerializeGltfBuffer(Buffer &buffer, json &o) {
+  SerializeNumberProperty("byteLength", buffer.data.size(), o);
+  SerializeGltfBufferData(buffer.data, o);
+
+  if (buffer.name.size()) SerializeStringProperty("name", buffer.name, o);
+
+  if (buffer.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", buffer.extras, o);
+  }
+}
+
+static bool SerializeGltfBuffer(Buffer &buffer, json &o,
+                                const std::string &binFilename,
+                                const std::string &binBaseFilename) {
+  if (!SerializeGltfBufferData(buffer.data, binFilename)) return false;
+  SerializeNumberProperty("byteLength", buffer.data.size(), o);
+  SerializeStringProperty("uri", binBaseFilename, o);
+
+  if (buffer.name.size()) SerializeStringProperty("name", buffer.name, o);
+
+  if (buffer.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", buffer.extras, o);
+  }
+  return true;
+}
+
+static void SerializeGltfBufferView(BufferView &bufferView, json &o) {
+  SerializeNumberProperty("buffer", bufferView.buffer, o);
+  SerializeNumberProperty<size_t>("byteLength", bufferView.byteLength, o);
+
+  // byteStride is optional, minimum allowed is 4
+  if (bufferView.byteStride >= 4) {
+    SerializeNumberProperty<size_t>("byteStride", bufferView.byteStride, o);
+  }
+  // byteOffset is optional, default is 0
+  if (bufferView.byteOffset > 0) {
+    SerializeNumberProperty<size_t>("byteOffset", bufferView.byteOffset, o);
+  }
+  // Target is optional, check if it contains a valid value
+  if (bufferView.target == TINYGLTF_TARGET_ARRAY_BUFFER ||
+      bufferView.target == TINYGLTF_TARGET_ELEMENT_ARRAY_BUFFER) {
+    SerializeNumberProperty("target", bufferView.target, o);
+  }
+  if (bufferView.name.size()) {
+    SerializeStringProperty("name", bufferView.name, o);
+  }
+
+  if (bufferView.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", bufferView.extras, o);
+  }
+}
+
+static void SerializeGltfImage(Image &image, json &o) {
+  // if uri empty, the mimeType and bufferview should be set
+  if (image.uri.empty()) {
+    SerializeStringProperty("mimeType", image.mimeType, o);
+    SerializeNumberProperty<int>("bufferView", image.bufferView, o);
+  } else {
+    // TODO(syoyo): dlib::urilencode?
+    SerializeStringProperty("uri", image.uri, o);
+  }
+
+  if (image.name.size()) {
+    SerializeStringProperty("name", image.name, o);
+  }
+
+  if (image.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", image.extras, o);
+  }
+
+  SerializeExtensionMap(image.extensions, o);
+}
+
+static void SerializeGltfTextureInfo(TextureInfo &texinfo, json &o) {
+  SerializeNumberProperty("index", texinfo.index, o);
+
+  if (texinfo.texCoord != 0) {
+    SerializeNumberProperty("texCoord", texinfo.texCoord, o);
+  }
+
+  if (texinfo.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", texinfo.extras, o);
+  }
+
+  SerializeExtensionMap(texinfo.extensions, o);
+}
+
+static void SerializeGltfNormalTextureInfo(NormalTextureInfo &texinfo,
+                                           json &o) {
+  SerializeNumberProperty("index", texinfo.index, o);
+
+  if (texinfo.texCoord != 0) {
+    SerializeNumberProperty("texCoord", texinfo.texCoord, o);
+  }
+
+  if (!TINYGLTF_DOUBLE_EQUAL(texinfo.scale, 1.0)) {
+    SerializeNumberProperty("scale", texinfo.scale, o);
+  }
+
+  if (texinfo.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", texinfo.extras, o);
+  }
+
+  SerializeExtensionMap(texinfo.extensions, o);
+}
+
+static void SerializeGltfOcclusionTextureInfo(OcclusionTextureInfo &texinfo,
+                                              json &o) {
+  SerializeNumberProperty("index", texinfo.index, o);
+
+  if (texinfo.texCoord != 0) {
+    SerializeNumberProperty("texCoord", texinfo.texCoord, o);
+  }
+
+  if (!TINYGLTF_DOUBLE_EQUAL(texinfo.strength, 1.0)) {
+    SerializeNumberProperty("strength", texinfo.strength, o);
+  }
+
+  if (texinfo.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", texinfo.extras, o);
+  }
+
+  SerializeExtensionMap(texinfo.extensions, o);
+}
+
+static void SerializeGltfPbrMetallicRoughness(PbrMetallicRoughness &pbr,
+                                              json &o) {
+  std::vector<double> default_baseColorFactor = {1.0, 1.0, 1.0, 1.0};
+  if (!Equals(pbr.baseColorFactor, default_baseColorFactor)) {
+    SerializeNumberArrayProperty<double>("baseColorFactor", pbr.baseColorFactor,
+                                         o);
+  }
+
+  if (!TINYGLTF_DOUBLE_EQUAL(pbr.metallicFactor, 1.0)) {
+    SerializeNumberProperty("metallicFactor", pbr.metallicFactor, o);
+  }
+
+  if (!TINYGLTF_DOUBLE_EQUAL(pbr.roughnessFactor, 1.0)) {
+    SerializeNumberProperty("roughnessFactor", pbr.roughnessFactor, o);
+  }
+
+  if (pbr.baseColorTexture.index > -1) {
+    json texinfo;
+    SerializeGltfTextureInfo(pbr.baseColorTexture, texinfo);
+    JsonAddMember(o, "baseColorTexture", std::move(texinfo));
+  }
+
+  if (pbr.metallicRoughnessTexture.index > -1) {
+    json texinfo;
+    SerializeGltfTextureInfo(pbr.metallicRoughnessTexture, texinfo);
+    JsonAddMember(o, "metallicRoughnessTexture", std::move(texinfo));
+  }
+
+  SerializeExtensionMap(pbr.extensions, o);
+
+  if (pbr.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", pbr.extras, o);
+  }
+}
+
+static void SerializeGltfMaterial(Material &material, json &o) {
+  if (material.name.size()) {
+    SerializeStringProperty("name", material.name, o);
+  }
+
+  // QUESTION(syoyo): Write material parameters regardless of its default value?
+
+  if (!TINYGLTF_DOUBLE_EQUAL(material.alphaCutoff, 0.5)) {
+    SerializeNumberProperty("alphaCutoff", material.alphaCutoff, o);
+  }
+
+  if (material.alphaMode.compare("OPAQUE") != 0) {
+    SerializeStringProperty("alphaMode", material.alphaMode, o);
+  }
+
+  if (material.doubleSided != false)
+    JsonAddMember(o, "doubleSided", json(material.doubleSided));
+
+  if (material.normalTexture.index > -1) {
+    json texinfo;
+    SerializeGltfNormalTextureInfo(material.normalTexture, texinfo);
+    JsonAddMember(o, "normalTexture", std::move(texinfo));
+  }
+
+  if (material.occlusionTexture.index > -1) {
+    json texinfo;
+    SerializeGltfOcclusionTextureInfo(material.occlusionTexture, texinfo);
+    JsonAddMember(o, "occlusionTexture", std::move(texinfo));
+  }
+
+  if (material.emissiveTexture.index > -1) {
+    json texinfo;
+    SerializeGltfTextureInfo(material.emissiveTexture, texinfo);
+    JsonAddMember(o, "emissiveTexture", std::move(texinfo));
+  }
+
+  std::vector<double> default_emissiveFactor = {0.0, 0.0, 0.0};
+  if (!Equals(material.emissiveFactor, default_emissiveFactor)) {
+    SerializeNumberArrayProperty<double>("emissiveFactor",
+                                         material.emissiveFactor, o);
+  }
+
+  {
+    json pbrMetallicRoughness;
+    SerializeGltfPbrMetallicRoughness(material.pbrMetallicRoughness,
+                                      pbrMetallicRoughness);
+    // Issue 204
+    // Do not serialize `pbrMetallicRoughness` if pbrMetallicRoughness has all
+    // default values(json is null). Otherwise it will serialize to
+    // `pbrMetallicRoughness : null`, which cannot be read by other glTF
+    // importers(and validators).
+    //
+    if (!JsonIsNull(pbrMetallicRoughness)) {
+      JsonAddMember(o, "pbrMetallicRoughness", std::move(pbrMetallicRoughness));
+    }
+  }
+
+#if 0  // legacy way. just for the record.
+  if (material.values.size()) {
+    json pbrMetallicRoughness;
+    SerializeParameterMap(material.values, pbrMetallicRoughness);
+    JsonAddMember(o, "pbrMetallicRoughness", std::move(pbrMetallicRoughness));
+  }
+
+  SerializeParameterMap(material.additionalValues, o);
+#else
+
+#endif
+
+  SerializeExtensionMap(material.extensions, o);
+
+  if (material.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", material.extras, o);
+  }
+}
+
+static void SerializeGltfMesh(Mesh &mesh, json &o) {
+  json primitives;
+  JsonReserveArray(primitives, mesh.primitives.size());
+  for (unsigned int i = 0; i < mesh.primitives.size(); ++i) {
+    json primitive;
+    const Primitive &gltfPrimitive = mesh.primitives[i];  // don't make a copy
+    {
+      json attributes;
+      for (auto attrIt = gltfPrimitive.attributes.begin();
+           attrIt != gltfPrimitive.attributes.end(); ++attrIt) {
+        SerializeNumberProperty<int>(attrIt->first, attrIt->second, attributes);
+      }
+
+      JsonAddMember(primitive, "attributes", std::move(attributes));
+    }
+
+    // Indicies is optional
+    if (gltfPrimitive.indices > -1) {
+      SerializeNumberProperty<int>("indices", gltfPrimitive.indices, primitive);
+    }
+    // Material is optional
+    if (gltfPrimitive.material > -1) {
+      SerializeNumberProperty<int>("material", gltfPrimitive.material,
+                                   primitive);
+    }
+    SerializeNumberProperty<int>("mode", gltfPrimitive.mode, primitive);
+
+    // Morph targets
+    if (gltfPrimitive.targets.size()) {
+      json targets;
+      JsonReserveArray(targets, gltfPrimitive.targets.size());
+      for (unsigned int k = 0; k < gltfPrimitive.targets.size(); ++k) {
+        json targetAttributes;
+        std::map<std::string, int> targetData = gltfPrimitive.targets[k];
+        for (std::map<std::string, int>::iterator attrIt = targetData.begin();
+             attrIt != targetData.end(); ++attrIt) {
+          SerializeNumberProperty<int>(attrIt->first, attrIt->second,
+                                       targetAttributes);
+        }
+        JsonPushBack(targets, std::move(targetAttributes));
+      }
+      JsonAddMember(primitive, "targets", std::move(targets));
+    }
+
+    SerializeExtensionMap(gltfPrimitive.extensions, primitive);
+
+    if (gltfPrimitive.extras.Type() != NULL_TYPE) {
+      SerializeValue("extras", gltfPrimitive.extras, primitive);
+    }
+
+    JsonPushBack(primitives, std::move(primitive));
+  }
+
+  JsonAddMember(o, "primitives", std::move(primitives));
+
+  if (mesh.weights.size()) {
+    SerializeNumberArrayProperty<double>("weights", mesh.weights, o);
+  }
+
+  if (mesh.name.size()) {
+    SerializeStringProperty("name", mesh.name, o);
+  }
+
+  SerializeExtensionMap(mesh.extensions, o);
+  if (mesh.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", mesh.extras, o);
+  }
+}
+
+static void SerializeSpotLight(SpotLight &spot, json &o) {
+  SerializeNumberProperty("innerConeAngle", spot.innerConeAngle, o);
+  SerializeNumberProperty("outerConeAngle", spot.outerConeAngle, o);
+  SerializeExtensionMap(spot.extensions, o);
+  if (spot.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", spot.extras, o);
+  }
+}
+
+static void SerializeGltfLight(Light &light, json &o) {
+  if (!light.name.empty()) SerializeStringProperty("name", light.name, o);
+  SerializeNumberProperty("intensity", light.intensity, o);
+  if (light.range > 0.0) {
+    SerializeNumberProperty("range", light.range, o);
+  }
+  SerializeNumberArrayProperty("color", light.color, o);
+  SerializeStringProperty("type", light.type, o);
+  if (light.type == "spot") {
+    json spot;
+    SerializeSpotLight(light.spot, spot);
+    JsonAddMember(o, "spot", std::move(spot));
+  }
+  SerializeExtensionMap(light.extensions, o);
+  if (light.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", light.extras, o);
+  }
+}
+
+static void SerializeGltfNode(Node &node, json &o) {
+  if (node.translation.size() > 0) {
+    SerializeNumberArrayProperty<double>("translation", node.translation, o);
+  }
+  if (node.rotation.size() > 0) {
+    SerializeNumberArrayProperty<double>("rotation", node.rotation, o);
+  }
+  if (node.scale.size() > 0) {
+    SerializeNumberArrayProperty<double>("scale", node.scale, o);
+  }
+  if (node.matrix.size() > 0) {
+    SerializeNumberArrayProperty<double>("matrix", node.matrix, o);
+  }
+  if (node.mesh != -1) {
+    SerializeNumberProperty<int>("mesh", node.mesh, o);
+  }
+
+  if (node.skin != -1) {
+    SerializeNumberProperty<int>("skin", node.skin, o);
+  }
+
+  if (node.camera != -1) {
+    SerializeNumberProperty<int>("camera", node.camera, o);
+  }
+
+  if (node.weights.size() > 0) {
+    SerializeNumberArrayProperty<double>("weights", node.weights, o);
+  }
+
+  if (node.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", node.extras, o);
+  }
+
+  SerializeExtensionMap(node.extensions, o);
+  if (!node.name.empty()) SerializeStringProperty("name", node.name, o);
+  SerializeNumberArrayProperty<int>("children", node.children, o);
+}
+
+static void SerializeGltfSampler(Sampler &sampler, json &o) {
+  if (sampler.magFilter != -1) {
+    SerializeNumberProperty("magFilter", sampler.magFilter, o);
+  }
+  if (sampler.minFilter != -1) {
+    SerializeNumberProperty("minFilter", sampler.minFilter, o);
+  }
+  //SerializeNumberProperty("wrapR", sampler.wrapR, o);
+  SerializeNumberProperty("wrapS", sampler.wrapS, o);
+  SerializeNumberProperty("wrapT", sampler.wrapT, o);
+
+  if (sampler.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", sampler.extras, o);
+  }
+}
+
+static void SerializeGltfOrthographicCamera(const OrthographicCamera &camera,
+                                            json &o) {
+  SerializeNumberProperty("zfar", camera.zfar, o);
+  SerializeNumberProperty("znear", camera.znear, o);
+  SerializeNumberProperty("xmag", camera.xmag, o);
+  SerializeNumberProperty("ymag", camera.ymag, o);
+
+  if (camera.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", camera.extras, o);
+  }
+}
+
+static void SerializeGltfPerspectiveCamera(const PerspectiveCamera &camera,
+                                           json &o) {
+  SerializeNumberProperty("zfar", camera.zfar, o);
+  SerializeNumberProperty("znear", camera.znear, o);
+  if (camera.aspectRatio > 0) {
+    SerializeNumberProperty("aspectRatio", camera.aspectRatio, o);
+  }
+
+  if (camera.yfov > 0) {
+    SerializeNumberProperty("yfov", camera.yfov, o);
+  }
+
+  if (camera.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", camera.extras, o);
+  }
+}
+
+static void SerializeGltfCamera(const Camera &camera, json &o) {
+  SerializeStringProperty("type", camera.type, o);
+  if (!camera.name.empty()) {
+    SerializeStringProperty("name", camera.name, o);
+  }
+
+  if (camera.type.compare("orthographic") == 0) {
+    json orthographic;
+    SerializeGltfOrthographicCamera(camera.orthographic, orthographic);
+    JsonAddMember(o, "orthographic", std::move(orthographic));
+  } else if (camera.type.compare("perspective") == 0) {
+    json perspective;
+    SerializeGltfPerspectiveCamera(camera.perspective, perspective);
+    JsonAddMember(o, "perspective", std::move(perspective));
+  } else {
+    // ???
+  }
+
+  if (camera.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", camera.extras, o);
+  }
+  SerializeExtensionMap(camera.extensions, o);
+}
+
+static void SerializeGltfScene(Scene &scene, json &o) {
+  SerializeNumberArrayProperty<int>("nodes", scene.nodes, o);
+
+  if (scene.name.size()) {
+    SerializeStringProperty("name", scene.name, o);
+  }
+  if (scene.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", scene.extras, o);
+  }
+  SerializeExtensionMap(scene.extensions, o);
+}
+
+static void SerializeGltfSkin(Skin &skin, json &o) {
+  // required
+  SerializeNumberArrayProperty<int>("joints", skin.joints, o);
+
+  if (skin.inverseBindMatrices >= 0) {
+    SerializeNumberProperty("inverseBindMatrices", skin.inverseBindMatrices, o);
+  }
+
+  if (skin.skeleton >= 0) {
+    SerializeNumberProperty("skeleton", skin.skeleton, o);
+  }
+
+  if (skin.name.size()) {
+    SerializeStringProperty("name", skin.name, o);
+  }
+}
+
+static void SerializeGltfTexture(Texture &texture, json &o) {
+  if (texture.sampler > -1) {
+    SerializeNumberProperty("sampler", texture.sampler, o);
+  }
+  if (texture.source > -1) {
+    SerializeNumberProperty("source", texture.source, o);
+  }
+  if (texture.name.size()) {
+    SerializeStringProperty("name", texture.name, o);
+  }
+  if (texture.extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", texture.extras, o);
+  }
+  SerializeExtensionMap(texture.extensions, o);
+}
+
+///
+/// Serialize all properties except buffers and images.
+///
+static void SerializeGltfModel(Model *model, json &o) {
+  // ACCESSORS
+  if (model->accessors.size()) {
+    json accessors;
+    JsonReserveArray(accessors, model->accessors.size());
+    for (unsigned int i = 0; i < model->accessors.size(); ++i) {
+      json accessor;
+      SerializeGltfAccessor(model->accessors[i], accessor);
+      JsonPushBack(accessors, std::move(accessor));
+    }
+    JsonAddMember(o, "accessors", std::move(accessors));
+  }
+
+  // ANIMATIONS
+  if (model->animations.size()) {
+    json animations;
+    JsonReserveArray(animations, model->animations.size());
+    for (unsigned int i = 0; i < model->animations.size(); ++i) {
+      if (model->animations[i].channels.size()) {
+        json animation;
+        SerializeGltfAnimation(model->animations[i], animation);
+        JsonPushBack(animations, std::move(animation));
+      }
+    }
+
+    JsonAddMember(o, "animations", std::move(animations));
+  }
+
+  // ASSET
+  json asset;
+  SerializeGltfAsset(model->asset, asset);
+  JsonAddMember(o, "asset", std::move(asset));
+
+  // BUFFERVIEWS
+  if (model->bufferViews.size()) {
+    json bufferViews;
+    JsonReserveArray(bufferViews, model->bufferViews.size());
+    for (unsigned int i = 0; i < model->bufferViews.size(); ++i) {
+      json bufferView;
+      SerializeGltfBufferView(model->bufferViews[i], bufferView);
+      JsonPushBack(bufferViews, std::move(bufferView));
+    }
+    JsonAddMember(o, "bufferViews", std::move(bufferViews));
+  }
+
+  // Extensions required
+  if (model->extensionsRequired.size()) {
+    SerializeStringArrayProperty("extensionsRequired",
+                                 model->extensionsRequired, o);
+  }
+
+  // MATERIALS
+  if (model->materials.size()) {
+    json materials;
+    JsonReserveArray(materials, model->materials.size());
+    for (unsigned int i = 0; i < model->materials.size(); ++i) {
+      json material;
+      SerializeGltfMaterial(model->materials[i], material);
+
+      if (JsonIsNull(material)) {
+        // Issue 294.
+        // `material` does not have any required parameters
+        // so the result may be null(unmodified) when all material parameters
+        // have default value.
+        //
+        // null is not allowed thus we create an empty JSON object.
+        JsonSetObject(material);
+      }
+      JsonPushBack(materials, std::move(material));
+    }
+    JsonAddMember(o, "materials", std::move(materials));
+  }
+
+  // MESHES
+  if (model->meshes.size()) {
+    json meshes;
+    JsonReserveArray(meshes, model->meshes.size());
+    for (unsigned int i = 0; i < model->meshes.size(); ++i) {
+      json mesh;
+      SerializeGltfMesh(model->meshes[i], mesh);
+      JsonPushBack(meshes, std::move(mesh));
+    }
+    JsonAddMember(o, "meshes", std::move(meshes));
+  }
+
+  // NODES
+  if (model->nodes.size()) {
+    json nodes;
+    JsonReserveArray(nodes, model->nodes.size());
+    for (unsigned int i = 0; i < model->nodes.size(); ++i) {
+      json node;
+      SerializeGltfNode(model->nodes[i], node);
+      JsonPushBack(nodes, std::move(node));
+    }
+    JsonAddMember(o, "nodes", std::move(nodes));
+  }
+
+  // SCENE
+  if (model->defaultScene > -1) {
+    SerializeNumberProperty<int>("scene", model->defaultScene, o);
+  }
+
+  // SCENES
+  if (model->scenes.size()) {
+    json scenes;
+    JsonReserveArray(scenes, model->scenes.size());
+    for (unsigned int i = 0; i < model->scenes.size(); ++i) {
+      json currentScene;
+      SerializeGltfScene(model->scenes[i], currentScene);
+      JsonPushBack(scenes, std::move(currentScene));
+    }
+    JsonAddMember(o, "scenes", std::move(scenes));
+  }
+
+  // SKINS
+  if (model->skins.size()) {
+    json skins;
+    JsonReserveArray(skins, model->skins.size());
+    for (unsigned int i = 0; i < model->skins.size(); ++i) {
+      json skin;
+      SerializeGltfSkin(model->skins[i], skin);
+      JsonPushBack(skins, std::move(skin));
+    }
+    JsonAddMember(o, "skins", std::move(skins));
+  }
+
+  // TEXTURES
+  if (model->textures.size()) {
+    json textures;
+    JsonReserveArray(textures, model->textures.size());
+    for (unsigned int i = 0; i < model->textures.size(); ++i) {
+      json texture;
+      SerializeGltfTexture(model->textures[i], texture);
+      JsonPushBack(textures, std::move(texture));
+    }
+    JsonAddMember(o, "textures", std::move(textures));
+  }
+
+  // SAMPLERS
+  if (model->samplers.size()) {
+    json samplers;
+    JsonReserveArray(samplers, model->samplers.size());
+    for (unsigned int i = 0; i < model->samplers.size(); ++i) {
+      json sampler;
+      SerializeGltfSampler(model->samplers[i], sampler);
+      JsonPushBack(samplers, std::move(sampler));
+    }
+    JsonAddMember(o, "samplers", std::move(samplers));
+  }
+
+  // CAMERAS
+  if (model->cameras.size()) {
+    json cameras;
+    JsonReserveArray(cameras, model->cameras.size());
+    for (unsigned int i = 0; i < model->cameras.size(); ++i) {
+      json camera;
+      SerializeGltfCamera(model->cameras[i], camera);
+      JsonPushBack(cameras, std::move(camera));
+    }
+    JsonAddMember(o, "cameras", std::move(cameras));
+  }
+
+  // EXTENSIONS
+  SerializeExtensionMap(model->extensions, o);
+
+  auto extensionsUsed = model->extensionsUsed;
+
+  // LIGHTS as KHR_lights_punctual
+  if (model->lights.size()) {
+    json lights;
+    JsonReserveArray(lights, model->lights.size());
+    for (unsigned int i = 0; i < model->lights.size(); ++i) {
+      json light;
+      SerializeGltfLight(model->lights[i], light);
+      JsonPushBack(lights, std::move(light));
+    }
+    json khr_lights_cmn;
+    JsonAddMember(khr_lights_cmn, "lights", std::move(lights));
+    json ext_j;
+
+    {
+      json_const_iterator it;
+      if (FindMember(o, "extensions", it)) {
+        JsonAssign(ext_j, GetValue(it));
+      }
+    }
+
+    JsonAddMember(ext_j, "KHR_lights_punctual", std::move(khr_lights_cmn));
+
+    JsonAddMember(o, "extensions", std::move(ext_j));
+
+    // Also add "KHR_lights_punctual" to `extensionsUsed`
+    {
+      auto has_khr_lights_punctual =
+          std::find_if(extensionsUsed.begin(), extensionsUsed.end(),
+                       [](const std::string &s) {
+                         return (s.compare("KHR_lights_punctual") == 0);
+                       });
+
+      if (has_khr_lights_punctual == extensionsUsed.end()) {
+        extensionsUsed.push_back("KHR_lights_punctual");
+      }
+    }
+  }
+
+  // Extensions used
+  if (extensionsUsed.size()) {
+    SerializeStringArrayProperty("extensionsUsed", extensionsUsed, o);
+  }
+
+  // EXTRAS
+  if (model->extras.Type() != NULL_TYPE) {
+    SerializeValue("extras", model->extras, o);
+  }
+}
+
+static bool WriteGltfStream(std::ostream &stream, const std::string &content) {
+  stream << content << std::endl;
+  return true;
+}
+
+static bool WriteGltfFile(const std::string &output,
+                          const std::string &content) {
+#ifdef _WIN32
+#if defined(_MSC_VER)
+  std::ofstream gltfFile(UTF8ToWchar(output).c_str());
+#elif defined(__GLIBCXX__)
+  int file_descriptor = _wopen(UTF8ToWchar(output).c_str(),
+                               _O_CREAT | _O_WRONLY | _O_TRUNC | _O_BINARY);
+  __gnu_cxx::stdio_filebuf<char> wfile_buf(
+      file_descriptor, std::ios_base::out | std::ios_base::binary);
+  std::ostream gltfFile(&wfile_buf);
+  if (!wfile_buf.is_open()) return false;
+#else
+  std::ofstream gltfFile(output.c_str());
+  if (!gltfFile.is_open()) return false;
+#endif
+#else
+  std::ofstream gltfFile(output.c_str());
+  if (!gltfFile.is_open()) return false;
+#endif
+  return WriteGltfStream(gltfFile, content);
+}
+
+static void WriteBinaryGltfStream(std::ostream &stream,
+                                  const std::string &content,
+                                  const std::vector<unsigned char> &binBuffer) {
+  const std::string header = "glTF";
+  const int version = 2;
+
+  const uint32_t content_size = uint32_t(content.size());
+  const uint32_t binBuffer_size = uint32_t(binBuffer.size());
+  // determine number of padding bytes required to ensure 4 byte alignment
+  const uint32_t content_padding_size = content_size % 4 == 0 ? 0 : 4 - content_size % 4;
+  const uint32_t bin_padding_size = binBuffer_size % 4 == 0 ? 0 : 4 - binBuffer_size % 4;
+
+  // 12 bytes for header, JSON content length, 8 bytes for JSON chunk info.
+  // Chunk data must be located at 4-byte boundary, which may require padding
+  const uint32_t length =
+      12 + 8 + content_size + content_padding_size +
+      (binBuffer_size ? (8 + binBuffer_size + bin_padding_size) : 0);
+
+  stream.write(header.c_str(), std::streamsize(header.size()));
+  stream.write(reinterpret_cast<const char *>(&version), sizeof(version));
+  stream.write(reinterpret_cast<const char *>(&length), sizeof(length));
+
+  // JSON chunk info, then JSON data
+  const uint32_t model_length = uint32_t(content.size()) + content_padding_size;
+  const uint32_t model_format = 0x4E4F534A;
+  stream.write(reinterpret_cast<const char *>(&model_length),
+               sizeof(model_length));
+  stream.write(reinterpret_cast<const char *>(&model_format),
+               sizeof(model_format));
+  stream.write(content.c_str(), std::streamsize(content.size()));
+
+  // Chunk must be multiplies of 4, so pad with spaces
+  if (content_padding_size > 0) {
+    const std::string padding = std::string(size_t(content_padding_size), ' ');
+    stream.write(padding.c_str(), std::streamsize(padding.size()));
+  }
+  if (binBuffer.size() > 0) {
+    // BIN chunk info, then BIN data
+    const uint32_t bin_length = uint32_t(binBuffer.size()) + bin_padding_size;
+    const uint32_t bin_format = 0x004e4942;
+    stream.write(reinterpret_cast<const char *>(&bin_length),
+                 sizeof(bin_length));
+    stream.write(reinterpret_cast<const char *>(&bin_format),
+                 sizeof(bin_format));
+    stream.write(reinterpret_cast<const char *>(binBuffer.data()),
+                 std::streamsize(binBuffer.size()));
+    // Chunksize must be multiplies of 4, so pad with zeroes
+    if (bin_padding_size > 0) {
+      const std::vector<unsigned char> padding =
+          std::vector<unsigned char>(size_t(bin_padding_size), 0);
+      stream.write(reinterpret_cast<const char *>(padding.data()),
+                   std::streamsize(padding.size()));
+    }
+  }
+}
+
+static void WriteBinaryGltfFile(const std::string &output,
+                                const std::string &content,
+                                const std::vector<unsigned char> &binBuffer) {
+#ifdef _WIN32
+#if defined(_MSC_VER)
+  std::ofstream gltfFile(UTF8ToWchar(output).c_str(), std::ios::binary);
+#elif defined(__GLIBCXX__)
+  int file_descriptor = _wopen(UTF8ToWchar(output).c_str(),
+                               _O_CREAT | _O_WRONLY | _O_TRUNC | _O_BINARY);
+  __gnu_cxx::stdio_filebuf<char> wfile_buf(
+      file_descriptor, std::ios_base::out | std::ios_base::binary);
+  std::ostream gltfFile(&wfile_buf);
+#else
+  std::ofstream gltfFile(output.c_str(), std::ios::binary);
+#endif
+#else
+  std::ofstream gltfFile(output.c_str(), std::ios::binary);
+#endif
+  WriteBinaryGltfStream(gltfFile, content, binBuffer);
+}
+
+bool TinyGLTF::WriteGltfSceneToStream(Model *model, std::ostream &stream,
+                                      bool prettyPrint = true,
+                                      bool writeBinary = false) {
+  JsonDocument output;
+
+  /// Serialize all properties except buffers and images.
+  SerializeGltfModel(model, output);
+
+  // BUFFERS
+  std::vector<unsigned char> binBuffer;
+  if (model->buffers.size()) {
+    json buffers;
+    JsonReserveArray(buffers, model->buffers.size());
+    for (unsigned int i = 0; i < model->buffers.size(); ++i) {
+      json buffer;
+      if (writeBinary && i == 0 && model->buffers[i].uri.empty()) {
+        SerializeGltfBufferBin(model->buffers[i], buffer, binBuffer);
+      } else {
+        SerializeGltfBuffer(model->buffers[i], buffer);
+      }
+      JsonPushBack(buffers, std::move(buffer));
+    }
+    JsonAddMember(output, "buffers", std::move(buffers));
+  }
+
+  // IMAGES
+  if (model->images.size()) {
+    json images;
+    JsonReserveArray(images, model->images.size());
+    for (unsigned int i = 0; i < model->images.size(); ++i) {
+      json image;
+
+      std::string dummystring = "";
+      // UpdateImageObject need baseDir but only uses it if embeddedImages is
+      // enabled, since we won't write separate images when writing to a stream
+      // we
+      UpdateImageObject(model->images[i], dummystring, int(i), false,
+                        &this->WriteImageData, this->write_image_user_data_);
+      SerializeGltfImage(model->images[i], image);
+      JsonPushBack(images, std::move(image));
+    }
+    JsonAddMember(output, "images", std::move(images));
+  }
+
+  if (writeBinary) {
+    WriteBinaryGltfStream(stream, JsonToString(output), binBuffer);
+  } else {
+    WriteGltfStream(stream, JsonToString(output, prettyPrint ? 2 : -1));
+  }
+
+  return true;
+}
+
+bool TinyGLTF::WriteGltfSceneToFile(Model *model, const std::string &filename,
+                                    bool embedImages = false,
+                                    bool embedBuffers = false,
+                                    bool prettyPrint = true,
+                                    bool writeBinary = false) {
+  JsonDocument output;
+  std::string defaultBinFilename = GetBaseFilename(filename);
+  std::string defaultBinFileExt = ".bin";
+  std::string::size_type pos =
+      defaultBinFilename.rfind('.', defaultBinFilename.length());
+
+  if (pos != std::string::npos) {
+    defaultBinFilename = defaultBinFilename.substr(0, pos);
+  }
+  std::string baseDir = GetBaseDir(filename);
+  if (baseDir.empty()) {
+    baseDir = "./";
+  }
+  /// Serialize all properties except buffers and images.
+  SerializeGltfModel(model, output);
+
+  // BUFFERS
+  std::vector<std::string> usedUris;
+  std::vector<unsigned char> binBuffer;
+  if (model->buffers.size()) {
+    json buffers;
+    JsonReserveArray(buffers, model->buffers.size());
+    for (unsigned int i = 0; i < model->buffers.size(); ++i) {
+      json buffer;
+      if (writeBinary && i == 0 && model->buffers[i].uri.empty()) {
+        SerializeGltfBufferBin(model->buffers[i], buffer, binBuffer);
+      } else if (embedBuffers) {
+        SerializeGltfBuffer(model->buffers[i], buffer);
+      } else {
+        std::string binSavePath;
+        std::string binUri;
+        if (!model->buffers[i].uri.empty() &&
+            !IsDataURI(model->buffers[i].uri)) {
+          binUri = model->buffers[i].uri;
+        } else {
+          binUri = defaultBinFilename + defaultBinFileExt;
+          bool inUse = true;
+          int numUsed = 0;
+          while (inUse) {
+            inUse = false;
+            for (const std::string &usedName : usedUris) {
+              if (binUri.compare(usedName) != 0) continue;
+              inUse = true;
+              binUri = defaultBinFilename + std::to_string(numUsed++) +
+                       defaultBinFileExt;
+              break;
+            }
+          }
+        }
+        usedUris.push_back(binUri);
+        binSavePath = JoinPath(baseDir, binUri);
+        if (!SerializeGltfBuffer(model->buffers[i], buffer, binSavePath,
+                                 binUri)) {
+          return false;
+        }
+      }
+      JsonPushBack(buffers, std::move(buffer));
+    }
+    JsonAddMember(output, "buffers", std::move(buffers));
+  }
+
+  // IMAGES
+  if (model->images.size()) {
+    json images;
+    JsonReserveArray(images, model->images.size());
+    for (unsigned int i = 0; i < model->images.size(); ++i) {
+      json image;
+
+      UpdateImageObject(model->images[i], baseDir, int(i), embedImages,
+                        &this->WriteImageData, this->write_image_user_data_);
+      SerializeGltfImage(model->images[i], image);
+      JsonPushBack(images, std::move(image));
+    }
+    JsonAddMember(output, "images", std::move(images));
+  }
+
+  if (writeBinary) {
+    WriteBinaryGltfFile(filename, JsonToString(output), binBuffer);
+  } else {
+    WriteGltfFile(filename, JsonToString(output, (prettyPrint ? 2 : -1)));
+  }
+
+  return true;
+}
+
+}  // namespace tinygltf
+
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+
+#endif  // TINYGLTF_IMPLEMENTATION
\ No newline at end of file
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 900f43bd2c..678f90c5fe 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_subdirectory(level_tools)
+add_subdirectory(build_level)
 
 add_executable(dgo_unpacker
         dgo_unpacker.cpp)
diff --git a/tools/build_level/CMakeLists.txt b/tools/build_level/CMakeLists.txt
new file mode 100644
index 0000000000..03bca07db3
--- /dev/null
+++ b/tools/build_level/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_executable(build_level
+        main.cpp)
+
+target_link_libraries(build_level common compiler tiny_gltf)
\ No newline at end of file
diff --git a/tools/build_level/main.cpp b/tools/build_level/main.cpp
new file mode 100644
index 0000000000..d269c89436
--- /dev/null
+++ b/tools/build_level/main.cpp
@@ -0,0 +1,3 @@
+int main() {
+  return 0;
+}
\ No newline at end of file

From c1a020a21a1a85646c22b9d9116c45e3cd4ff98c Mon Sep 17 00:00:00 2001
From: towai <shado0909@gmail.com>
Date: Sat, 18 Jun 2022 14:22:16 -0500
Subject: [PATCH 02/17] Binding display readability improvements + add example
 showing font-color constants (#1475)

* Fix outdated bucket id and change the bind display to not scroll off the screen

* Readability improvements to bind display (alignment)

* Provide script showing all font-color constants

* Rename display-all-colors.gc to keep with convention
---
 goal_src/examples/display-all-colors.gc | 61 +++++++++++++++++++++++++
 goal_src/pc_debug/pc-pad-utils.gc       | 23 ++++++++--
 2 files changed, 81 insertions(+), 3 deletions(-)
 create mode 100644 goal_src/examples/display-all-colors.gc

diff --git a/goal_src/examples/display-all-colors.gc b/goal_src/examples/display-all-colors.gc
new file mode 100644
index 0000000000..3a068c8a43
--- /dev/null
+++ b/goal_src/examples/display-all-colors.gc
@@ -0,0 +1,61 @@
+;;-*-Lisp-*-
+(in-package goal)
+
+;; This script creates a simple process that draws text demonstrating
+;; all of GOAL's color constants to the on-screen debug output.
+
+;; Create somewhere for the handle to the process to live. See https://open-goal.github.io/docs/reference/process_and_state
+;; as well as kernel/gstate.gc
+(define *color-display-handle* (new 'static 'handle))
+(set! *color-display-handle* (the handle #f))
+
+
+(defun-debug start-display-text-colors ()
+  "Spawn an onscreen string displaying all possible colors"
+  (if (not (handle->process *color-display-handle*))
+    (let ((disp-proc
+      (process-spawn-function process :name 'display-proc
+        (lambda :behavior process ()
+          (stack-size-set! (-> self main-thread) 256)
+          (loop
+            ;; These constants live in engine/gfx/font-h.gc
+            (format *stdcon* "~0k~%~%
+~0L 0 default               ~1L 1 white       
+~2L 2 gray                  ~3L 3 orange-red
+~4L 4 bright-orange-red     ~5L 5 bright-orange-red
+~6L 6 bright-green          ~7L 7 dark-blue
+~8L 8 light-blue            ~9L 9 dark-pink
+~10L10 lighter-blue          ~11L11 dark-light-blue
+~12L12 dim-white             ~13L13 dim-gray
+~14L14 orange-red-2          ~15L15 yellow-green 
+~16L16 dark-green            ~17L17 another-gray
+~18L18 dark-dark-green       ~19L19 flat-dark-purple 
+~20L20 flat-yellow           ~21L21 blue-white
+~22L22 pad-back              ~23L23 pad-shine
+~24L24 pad-square            ~25L25 pad-circle
+~26L26 pad-triangle          ~27L27 pad-x
+~28L28 lighter-lighter-blue  ~29L29 yellow-orange
+~30L30 yellow-green-2        ~31L31 another-light-blue
+~32L32 light-yellow          ~33L33 red-orange
+~34L34 another-orange-red~0L~%
+ alternate names
+  ~3L3 red   ~4L4 red2   ~5L5 yellow   ~6L6 green   ~7L7 blue
+ ~10L10 cyan   ~33L33 red-reverse   ~34L34 red-obverse~0L"
+              )
+            (suspend)
+            )
+          )
+        )
+      ))
+      (set! *color-display-handle* (ppointer->handle disp-proc))
+      )
+    ;; else
+    (format #t "Colors are already being displayed")
+    )
+  )
+
+
+(defun-debug stop-display-text-colors ()
+  "Kill the example text color display"
+  (kill-by-name 'display-proc *active-pool*)
+  )
diff --git a/goal_src/pc_debug/pc-pad-utils.gc b/goal_src/pc_debug/pc-pad-utils.gc
index 5ae03dee3b..fd00295898 100644
--- a/goal_src/pc_debug/pc-pad-utils.gc
+++ b/goal_src/pc_debug/pc-pad-utils.gc
@@ -104,13 +104,30 @@
               (format *stdcon* "~%")
               )
             (dotimes (ii 2)
-              (format *stdcon* " ~3Lcpad ~D~0L~%" ii)
+              (format *stdcon* " ~0k~3Lcpad ~D~0L~%" ii)
               (dotimes (j 8)
                 (dotimes (i 2)
-                  (let (
+                  (format *stdcon* "  ") 
+                  (let* (
                     (btn-idx (+ i(* j 2)))
+                    (btn-name (-> *pc-pad-button-names* btn-idx))
+                    (keycode (pc-pad-get-mapped-button ii btn-idx))
                     )
-                    (format *stdcon* "  ~8L~S:  ~0L~D  " (-> *pc-pad-button-names* btn-idx) (pc-pad-get-mapped-button ii btn-idx))
+                    (format *stdcon* "~8L~S: " btn-name)
+                    ;; longest button string is TRIANGLE, which is 8 characters in length
+                    ;; but only shows up in the left column. CIRCLE and SQUARE are longest in the right with 6
+                    (dotimes (_ (- 
+                                (cond ((= i 0) 8) ((= i 1) 6))
+                                (length btn-name)))
+                      (format *stdcon* " ")
+                      )
+                    (format *stdcon* "~0L~D" keycode)
+                    (when (< keycode 100)
+                      (format *stdcon* " ")
+                      (if (< keycode 10)
+                        (format *stdcon* " ")
+                        )
+                      )
                     )
                   )
                   (format *stdcon* "~%")

From 0ec742319e3e34bfa5afb70dfc65762b855350a7 Mon Sep 17 00:00:00 2001
From: Hat Kid <6624576+Hat-Kid@users.noreply.github.com>
Date: Sat, 18 Jun 2022 21:22:50 +0200
Subject: [PATCH 03/17] issues: add issue templates (#1480)

---
 .github/ISSUE_TEMPLATE/bug-report.yml         | 52 +++++++++++++++++++
 .../ISSUE_TEMPLATE/enhancement-request.yml    | 24 +++++++++
 .github/ISSUE_TEMPLATE/feature-request.yml    | 24 +++++++++
 3 files changed, 100 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug-report.yml
 create mode 100644 .github/ISSUE_TEMPLATE/enhancement-request.yml
 create mode 100644 .github/ISSUE_TEMPLATE/feature-request.yml

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
new file mode 100644
index 0000000000..a4d5c1fb91
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -0,0 +1,52 @@
+name: Bug Report
+description: Create a bug report.
+labels: ["bug"]
+body:
+- type: textarea
+  attributes:
+    label: Describe the Bug
+    description: A clear and concise description of what the bug is. You may post screenshots or videos of the bug here.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: How To Reproduce
+    description: Steps to reproduce the behavior. You can also post a video of it here.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Expected Behavior
+    description: A clear and concise description of the expected behavior.
+    placeholder: When I do X, Y should happen.
+  validations:
+    required: true
+
+- type: input
+  attributes:
+    label: Operating System
+    description: Windows version, Linux distribution, etc.
+  validations:
+    required: true
+
+- type: dropdown
+  attributes:
+    label: OpenGOAL Version
+    options:
+      - Release
+      - Compiled from source
+  validations:
+    required: true
+
+- type: dropdown
+  attributes:
+    label: Game Version
+    options:
+      - NTSC 1.0 (black label)
+      - NTSC Greatest Hits version (red label)
+      - PAL
+      - JP
+  validations:
+    required: true
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/enhancement-request.yml b/.github/ISSUE_TEMPLATE/enhancement-request.yml
new file mode 100644
index 0000000000..83afc363f8
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/enhancement-request.yml
@@ -0,0 +1,24 @@
+name: Enhancement Request
+description: Suggest an improvement for an existing feature.
+labels: ["enhancement"]
+body:
+- type: textarea
+  attributes:
+    label: What feature is your idea related to?
+    description: A clear and concise description of what the problem is.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Describe the solution you'd like.
+    description: A clear and concise description of what you want to happen.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Additional context
+    description: Any other context or screenshots about the enhancement request here.
+  validations:
+    required: false
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
new file mode 100644
index 0000000000..ddb59ed0cf
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -0,0 +1,24 @@
+name: Feature Request
+description: Suggest an idea for the game
+labels: ["game"]
+body:
+- type: textarea
+  attributes:
+    label: Is your feature request related to a problem?
+    description: A clear and concise description of what the problem is.
+  validations:
+    required: true
+    
+- type: textarea
+  attributes:
+    label: Describe the solution you'd like.
+    description: A clear and concise description of what you want to happen.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Additional context
+    description: Add any other context or screenshots about the feature request here.
+  validations:
+    required: false
\ No newline at end of file

From 4a2d48bfc77519b8a165e9cbe3a0190dca7481db Mon Sep 17 00:00:00 2001
From: Matthew Wells <91291346+richarm4@users.noreply.github.com>
Date: Sun, 19 Jun 2022 03:08:11 -0700
Subject: [PATCH 04/17] Fixed typos in FAQ.md (#1486)

---
 FAQ.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FAQ.md b/FAQ.md
index 40f4772a0c..29fc2271dc 100644
--- a/FAQ.md
+++ b/FAQ.md
@@ -37,7 +37,7 @@ The original game had a few issues of varying severity - from progress softlocks
 In the future yes! We intend to make it as easy as we can to make your own code or import graphics for the game.
 
 ### What is this menu? Why am I not dying sometimes?
-This might be the result of the game beeing booted in debugging mode. The project is still in development and we rely on the game's built-in debug features (along with some we made ourselves) for testing and development. The final product will have this debug mode disabled. In the meantime, have fun with all the options! Keep in mind some don't work very well.
+This might be the result of the game being booted in debugging mode. The project is still in development and we rely on the game's built-in debug features (along with some we made ourselves) for testing and development. The final product will have this debug mode disabled. In the meantime, have fun with all the options! Keep in mind some don't work very well.
 
 ### What are the minimum requirements?
 These are not totally set in stone as the project is not complete. These are the minimum architecture requirements for the project:
@@ -49,7 +49,7 @@ These are not totally set in stone as the project is not complete. These are the
 Probably yes, in the future!
 
 ### I'd like to contribute, but don't know where to start!
-Great to hear! Any help is good help. If you haven't already, please read the project readme. It has information on what is done, what isn't done, what is being worked on, the project layout and how it all works. Then just join our development discord server and we can help you further.
+Great to hear! Any help is good help. If you haven't already, please read the project readme. It has information on what is done, what isn't done, what is being worked on, the project layout and how it all works. Then just join our development Discord server and we can help you further.
 
 ### Will you also decompile other games that aren't Jak?
 No.

From f48fda692e2268a201ad0f017e3fb1c239d192f7 Mon Sep 17 00:00:00 2001
From: Brent Hickey <brent.hickey@icloud.com>
Date: Sun, 19 Jun 2022 14:01:51 -0700
Subject: [PATCH 05/17] [game] 150fps support (and 100fps support) (#1264)

* docs for ee merc code

* 150fps support but it replaces 60fps

* oops switched wrong mode

* oops 50.0 not 50

* formatting

* fix cutscene speed

* oops

* Replace latest merc.md, not sure what happened

* Automatically switch between video modes (ntsc or 150fps) based on
refresh rate. Cleanup particle timing

* cleanup

* fix idle animation

* linter

* fix village2 crash

* can load all levels again

* update loader output and replace sparticle time with formula

* Add 100fps support, add some comments, fix build

* formatting

Co-authored-by: water <awaterford111445@gmail.com>
---
 game/graphics/pipelines/opengl.cpp            |   7 +
 game/kernel/kboot.cpp                         |   2 +
 game/kernel/kboot.h                           |  10 ++
 game/kernel/kscheme.cpp                       |   2 +-
 goal_src/engine/ambient/weather-part.gc       |  56 +-----
 goal_src/engine/draw/drawable.gc              |   9 +-
 goal_src/engine/draw/process-drawable-h.gc    | 135 ++++++++++++++-
 goal_src/engine/draw/process-drawable.gc      | 134 ---------------
 goal_src/engine/game/effect-control.gc        |  12 +-
 goal_src/engine/game/generic-obs.gc           |   9 +-
 goal_src/engine/game/powerups.gc              | 160 ++++--------------
 goal_src/engine/game/settings.gc              |  18 +-
 goal_src/engine/game/video.gc                 |  24 +++
 goal_src/engine/gfx/hw/display.gc             |  19 ++-
 goal_src/engine/gfx/ocean/ocean.gc            |   2 +-
 goal_src/engine/gfx/sky/sky-tng.gc            |  28 ++-
 goal_src/engine/gfx/water/water.gc            |  72 +-------
 goal_src/engine/gfx/wind.gc                   |   4 +-
 goal_src/engine/load/loader.gc                | 109 +++++-------
 goal_src/engine/sparticle/sparticle.gc        |  49 +-----
 goal_src/engine/target/target-part.gc         |   9 +-
 goal_src/engine/target/target.gc              |  12 +-
 goal_src/examples/debug-draw-example.gc       |  20 +--
 goal_src/levels/beach/beach-part.gc           |  18 +-
 goal_src/levels/beach/lurkerworm.gc           |  36 +---
 goal_src/levels/citadel/citadel-part.gc       |  18 +-
 goal_src/levels/citadel/citadel-sages.gc      |   9 +-
 goal_src/levels/citadel/citb-plat.gc          |   9 +-
 goal_src/levels/finalboss/light-eco.gc        |  36 +---
 .../levels/finalboss/sage-finalboss-part.gc   |  36 +---
 goal_src/levels/jungle/darkvine.gc            |   9 +-
 goal_src/levels/jungle/fisher.gc              |  45 +----
 goal_src/levels/jungle/jungle-mirrors.gc      |  18 +-
 goal_src/levels/maincave/maincave-obs.gc      |  18 +-
 goal_src/levels/maincave/maincave-part.gc     |  18 +-
 goal_src/levels/misty/misty-obs.gc            |   9 +-
 goal_src/levels/misty/muse.gc                 |   9 +-
 goal_src/levels/racer_common/racer-states.gc  |   9 +-
 goal_src/levels/racer_common/target-racer.gc  | 101 +++--------
 .../levels/rolling/rolling-lightning-mole.gc  |  18 +-
 goal_src/levels/sunken/orbit-plat.gc          |   9 +-
 goal_src/levels/sunken/sun-exit-chamber.gc    |   9 +-
 goal_src/levels/sunken/sunken-water.gc        |   9 +-
 goal_src/levels/training/training-part.gc     |  18 +-
 goal_src/levels/village1/assistant.gc         |   9 +-
 goal_src/levels/village1/fishermans-boat.gc   |   9 +-
 goal_src/levels/village1/village1-part.gc     |  18 +-
 goal_src/levels/village1/village1-part2.gc    |  18 +-
 .../levels/village2/assistant-village2.gc     |   9 +-
 goal_src/levels/village2/swamp-blimp.gc       |   9 +-
 goal_src/levels/village2/village2-part.gc     |   9 +-
 51 files changed, 428 insertions(+), 1016 deletions(-)

diff --git a/game/graphics/pipelines/opengl.cpp b/game/graphics/pipelines/opengl.cpp
index 2249232b4b..f85190e1ae 100644
--- a/game/graphics/pipelines/opengl.cpp
+++ b/game/graphics/pipelines/opengl.cpp
@@ -191,6 +191,13 @@ static std::shared_ptr<GfxDisplay> gl_make_display(int width,
       g_gfx_data->debug_gui.m_vsync = false;
       g_gfx_data->vsync_enabled = false;
       glfwSwapInterval(false);
+      if (primary_monitor_video_mode->refreshRate > 100) {
+        BootVideoMode = VideoMode::FPS150;
+        g_gfx_data->debug_gui.target_fps = 150;
+      } else if (primary_monitor_video_mode->refreshRate > 60) {
+        BootVideoMode = VideoMode::FPS100;
+        g_gfx_data->debug_gui.target_fps = 100;
+      }
     } else {
       // enable vsync
       g_gfx_data->debug_gui.framelimiter = false;
diff --git a/game/kernel/kboot.cpp b/game/kernel/kboot.cpp
index 6e8aade4a4..7cb2c83bc7 100644
--- a/game/kernel/kboot.cpp
+++ b/game/kernel/kboot.cpp
@@ -32,6 +32,8 @@
 
 using namespace ee;
 
+VideoMode BootVideoMode;
+
 // Level to load on boot
 char DebugBootLevel[64];
 
diff --git a/game/kernel/kboot.h b/game/kernel/kboot.h
index 7989048c1c..db29523a71 100644
--- a/game/kernel/kboot.h
+++ b/game/kernel/kboot.h
@@ -30,6 +30,16 @@ enum class RuntimeExitStatus {
   RESTART_IN_DEBUG = 3,
 };
 
+enum class VideoMode {
+  NTSC = 0,
+  PAL = 1,
+  FPS100 = 2,
+  FPS150 = 3,
+};
+
+// Video Mode that's set based on display refresh rate on boot
+extern VideoMode BootVideoMode;
+
 // Level to load on boot
 extern char DebugBootLevel[64];
 
diff --git a/game/kernel/kscheme.cpp b/game/kernel/kscheme.cpp
index ab7a65630a..557c545754 100644
--- a/game/kernel/kscheme.cpp
+++ b/game/kernel/kscheme.cpp
@@ -2003,7 +2003,7 @@ s32 InitHeapAndSymbol() {
   method_set_symbol->value = 0;
 
   // set *boot-video-mode*
-  intern_from_c("*boot-video-mode*")->value = 0;
+  intern_from_c("*boot-video-mode*")->value = (u32)BootVideoMode;
 
   lg::info("Initialized GOAL heap in {:.2} ms", heap_init_timer.getMs());
   // load the kernel!
diff --git a/goal_src/engine/ambient/weather-part.gc b/goal_src/engine/ambient/weather-part.gc
index 54bcc23c1c..c934004a65 100644
--- a/goal_src/engine/ambient/weather-part.gc
+++ b/goal_src/engine/ambient/weather-part.gc
@@ -353,7 +353,7 @@
   )
 
 (defun update-snow ((arg0 target))
-  (let ((gp-0 (-> arg0 control trans)))
+  (let ((target-position (-> arg0 control trans)))
     (let ((f0-0 (lerp-scale 0.0 1.0 (vector-length (-> arg0 control transv)) 2048.0 40960.0)))
       (set! (-> *part-id-table* 34 init-specs 1 initial-valuef) (- 1.0 f0-0))
       (set! (-> *part-id-table* 33 init-specs 1 initial-valuef) (* 4.0 f0-0))
@@ -361,22 +361,8 @@
     (set! (-> *part-id-table* 33 init-specs 19 initial-valuef)
           (+ 32768.0 (vector-y-angle (-> arg0 control transv)))
           )
-    (sp-launch-particles-var
-      *sp-particle-system-2d*
-      (-> *part-id-table* 34)
-      gp-0
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
-    (sp-launch-particles-var
-      *sp-particle-system-2d*
-      (-> *part-id-table* 33)
-      gp-0
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles (-> *part-id-table* 34) target-position)
+    (launch-particles (-> *part-id-table* 33) target-position)
     )
   0
   (none)
@@ -464,22 +450,8 @@
     (let ((gp-0 (new 'stack-no-clear 'vector)))
       (sp-kill-particle arg0 arg1)
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 39)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 40)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 39) gp-0)
+      (launch-particles :system *sp-particle-system-3d* :rate 1.0 (-> *part-id-table* 40) gp-0)
       )
     )
   (none)
@@ -512,22 +484,8 @@
         (set! (-> *part-id-table* 38 init-specs 5 initial-valuef) f30-0)
         (set! (-> *part-id-table* 38 init-specs 5 random-rangef) f30-0)
         )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 37)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 38)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles (-> *part-id-table* 37) gp-0)
+      (launch-particles (-> *part-id-table* 38) gp-0)
       )
     )
   0
diff --git a/goal_src/engine/draw/drawable.gc b/goal_src/engine/draw/drawable.gc
index 9249778496..3068e0b9e1 100644
--- a/goal_src/engine/draw/drawable.gc
+++ b/goal_src/engine/draw/drawable.gc
@@ -1335,7 +1335,12 @@
         (+! (-> disp base-frame-counter) scaled-seconds)
         (+! (-> disp part-frame-counter) scaled-seconds)
         ;; this counts actual frames, not seconds. Will count 2 frames if we lag
-        (+! (-> disp integral-frame-counter) (the int time-ratio))
+        ;; When counting frames while running at > 60fps we need to scale it, but always count at least 1 frame
+        (+! (-> disp integral-frame-counter) (if (= (-> *setting-control* current video-mode) '150fps)
+                                                 (min 1 (the int (+ 1 (* 0.4 time-ratio))))
+                                                 (if (= (-> *setting-control* current video-mode) '100fps)
+                                                     (min 1 (the int (+ 1 (* 0.6 time-ratio))))
+                                                     (the int time-ratio))))
         ;; this counts actual frames, not doubling for lag. Will count 1 per frame drawn
         (+! (-> disp actual-frame-counter) 1)
         ;; game counter will count seconds that we're not in a movie
@@ -1640,7 +1645,7 @@
         ;; need to call reset-graph with some magic number
         ;; also stash this parameter so that if things go really wrong and our DMA transfer
         ;; times out, we can reset-graph to the appropriate video mode
-        (if (= (-> *setting-control* current video-mode) 'ntsc)
+        (if (or (= (-> *setting-control* current video-mode) 'ntsc) (= (-> *setting-control* current video-mode) '150fps) (= (-> *setting-control* current video-mode) '100fps))
             (set! *video-reset-parm* 2)
             (set! *video-reset-parm* 3)
             )
diff --git a/goal_src/engine/draw/process-drawable-h.gc b/goal_src/engine/draw/process-drawable-h.gc
index 68b1cda420..1eabf45db9 100644
--- a/goal_src/engine/draw/process-drawable-h.gc
+++ b/goal_src/engine/draw/process-drawable-h.gc
@@ -148,4 +148,137 @@
   (bit-10 10)
   )
 
-(define-extern process-entity-status! (function process entity-perm-status symbol int))
\ No newline at end of file
+(define-extern process-entity-status! (function process entity-perm-status symbol int))
+
+(defmacro ja-group (&key (chan 0))
+  "get the frame group for self. default channel is 0, the base channel. returns #f if no frame group."
+  `(if (> (-> self skel active-channels) ,chan)
+       (-> self skel root-channel ,chan frame-group))
+  )
+
+(defmacro ja-group? (group &key (chan 0))
+  "is self in this frame group on this channel? default is channel 0, which is the base channel."
+  `(= (ja-group) ,group)
+  )
+
+(defmacro ja (&key (chan 0)
+              &key (group! #f)
+              &key (num! #f)
+              &key (param0 #f)
+              &key (param1 #f)
+              &key (num-func #f)
+              &key (frame-num #f)
+              &key (frame-interp #f)
+              &key (dist #f)
+              &key (eval? #t)
+              )
+  "set various joint anim parameters for self and eval them.
+   you can use this for playing animations!
+
+   chan         = the channel to modify. defaults to 0 (base channel). this is usually what you want.
+   group!       = when not #f, set this as the new frame-group. defaults to #f
+   num!         = set the frame playback function. this is what determines what frame an animation is at. funcs below.
+                  #f = no func will be set, and there wont be a frame eval.
+   num-func     = sets the num-func field for the channel. this lets you change the function with eval'ing.
+   param0       = 1st parameter for the playback function. ONLY USE THESE WITH num-func !!
+   param1       = 2nd parameter for the playback function. ONLY USE THESE WITH num-func !!
+   frame-num    = set the frame-num field.
+   frame-interp = set the frame-interp field.
+   dist         = set the dist field.
+
+   available num! functions:
+   - (+!)       = advance anim.
+   - (-!)       = reverse anim.
+   - (identity num) = play 'num' frame.
+   - (seek! target speed) = animate towards frame target at a speed.
+                            speed is optional and defaults to 1.0 when not provided.
+                            target is optional and defaults to the last frame of the animation.
+                            if you want to set the speed, you therefore must also set the target.
+                            target can be max (no quote), which is just the same as the default value.
+   - (loop! speed)  = loop animation at a speed. default speed is 1.0 when not provided.
+   - (chan channel) = copy frame from another channel.
+   - min = the start of the animation.
+   - max = the end of the animation.
+   "
+
+  (let* ((num-args (if (pair? num!) (cdr num!) '()))
+        (num! (if (pair? num!) (car num!) num!))
+        (nf (cond
+              ((or (eq? num! 'identity)
+                   (eq? num! 'min)
+                   (eq? num! 'max)
+                   )
+                   'num-func-identity)
+              ((eq? num! 'none) 'num-func-none)
+              ((eq? num! '+!) 'num-func-+!)
+              ((eq? num! '-!) 'num-func--!)
+              ((eq? num! 'seek!) 'num-func-seek!)
+              ((eq? num! 'loop!) 'num-func-loop!)
+              ((eq? num! 'blend-in!) 'num-func-blend-in!)
+              ((eq? num! 'chan) 'num-func-chan)
+              ))
+        (p0 (if param0 param0
+                (cond
+                  ((eq? num! 'chan) `(the float ,(car num-args)))
+                  ((eq? num! '+!)    (if (null? num-args) 1.0 (car num-args)))
+                  ((eq? num! '-!)    (if (null? num-args) 1.0 (car num-args)))
+                  ((eq? num! 'loop!) (if (null? num-args) 1.0 (if (eq? 'max (car num-args))
+                                                                  (if group!
+                                                                      `(the float (1- (-> (the art-joint-anim ,group!) data 0 length)))
+                                                                      `(the float (1- (-> ja-ch frame-group data 0 length)))
+                                                                      )
+                                                                  (car num-args))))
+                  ((eq? num! 'seek!) (if (or (null? num-args) (eq? (car num-args) 'max))
+                                         (if group!
+                                             `(the float (1- (-> (the art-joint-anim ,group!) data 0 length)))
+                                             `(the float (1- (-> ja-ch frame-group data 0 length)))
+                                             )
+                                         (car num-args)))
+                  )))
+        (p1 (if param1 param1
+                (cond
+                  ((eq? num! 'seek!) (if (or (null? num-args) (null? (cdr num-args))) 1.0 (cadr num-args)))
+                  )))
+        (frame-num (if (eq? 'max frame-num) (if group!
+                                                `(the float (1- (-> (the art-joint-anim ,group!) data 0 length)))
+                                                `(the float (1- (-> ja-ch frame-group data 0 length)))
+                                                )
+                                            frame-num))
+        (frame-group (if (or p0 p1 frame-num (not nf)) group! #f))
+      )
+  `(let ((ja-ch (-> self skel root-channel ,chan)))
+      ,(if frame-interp `(set! (-> ja-ch frame-interp) ,frame-interp) `(none))
+      ,(if dist `(set! (-> ja-ch dist) ,dist) `(none))
+      ,(if frame-group `(set! (-> ja-ch frame-group) (the art-joint-anim ,frame-group)) `(none))
+      ,(if p0 `(set! (-> ja-ch param 0) ,p0) `(none))
+      ,(if p1 `(set! (-> ja-ch param 1) ,p1) `(none))
+      ,(if num-func `(set! (-> ja-ch num-func) ,num-func) `(none))
+      ,(if frame-num `(set! (-> ja-ch frame-num) ,frame-num) `(none))
+      ,(if nf
+          `(,(if eval? 'joint-control-channel-group-eval! 'joint-control-channel-group!)
+              ja-ch (the art-joint-anim ,group!) ,nf)
+          `(none))
+      ,(cond
+        ((eq? num! 'min) `(set! (-> ja-ch frame-num) 0.0))
+        ((eq? num! 'max) (if group!
+                             `(set! (-> ja-ch frame-num) (the float (1- (-> (the art-joint-anim ,group!) data 0 length))))
+                             `(set! (-> ja-ch frame-num) (the float (1- (-> ja-ch frame-group data 0 length))))
+                             ))
+        ((eq? num! 'identity) `(set! (-> ja-ch frame-num) ,(car num-args)))
+        (#t `(none))
+        )
+      ))
+  )
+
+(defmacro ja-no-eval (&key (chan 0)
+                      &key (group! #f)
+                      &key (num! #f)
+                      &key (param0 #f)
+                      &key (param1 #f)
+                      &key (num-func #f)
+                      &key (frame-num #f)
+                      &key (frame-interp #f)
+                      &key (dist #f)
+                      )
+  `(ja :eval? #f :chan ,chan :group! ,group! :num! ,num! :param0 ,param0 :param1 ,param1 :num-func ,num-func :frame-num ,frame-num :frame-interp ,frame-interp :dist ,dist)
+  )
diff --git a/goal_src/engine/draw/process-drawable.gc b/goal_src/engine/draw/process-drawable.gc
index da55f294c9..e7edd79d9c 100644
--- a/goal_src/engine/draw/process-drawable.gc
+++ b/goal_src/engine/draw/process-drawable.gc
@@ -694,18 +694,6 @@
     )
   )
 
-
-(defmacro ja-group (&key (chan 0))
-  "get the frame group for self. default channel is 0, the base channel. returns #f if no frame group."
-  `(if (> (-> self skel active-channels) ,chan)
-       (-> self skel root-channel ,chan frame-group))
-  )
-
-(defmacro ja-group? (group &key (chan 0))
-  "is self in this frame group on this channel? default is channel 0, which is the base channel."
-  `(= (ja-group) ,group)
-  )
-
 (defbehavior ja-num-frames process-drawable ((arg0 int))
   (+ (-> self skel root-channel arg0 frame-group data 0 length) -1)
   )
@@ -881,128 +869,6 @@
       )
   )
 
-(defmacro ja (&key (chan 0)
-              &key (group! #f)
-              &key (num! #f)
-              &key (param0 #f)
-              &key (param1 #f)
-              &key (num-func #f)
-              &key (frame-num #f)
-              &key (frame-interp #f)
-              &key (dist #f)
-              &key (eval? #t)
-              )
-  "set various joint anim parameters for self and eval them.
-   you can use this for playing animations!
-
-   chan         = the channel to modify. defaults to 0 (base channel). this is usually what you want.
-   group!       = when not #f, set this as the new frame-group. defaults to #f
-   num!         = set the frame playback function. this is what determines what frame an animation is at. funcs below.
-                  #f = no func will be set, and there wont be a frame eval.
-   num-func     = sets the num-func field for the channel. this lets you change the function with eval'ing.
-   param0       = 1st parameter for the playback function. ONLY USE THESE WITH num-func !!
-   param1       = 2nd parameter for the playback function. ONLY USE THESE WITH num-func !!
-   frame-num    = set the frame-num field.
-   frame-interp = set the frame-interp field.
-   dist         = set the dist field.
-
-   available num! functions:
-   - (+!)       = advance anim.
-   - (-!)       = reverse anim.
-   - (identity num) = play 'num' frame.
-   - (seek! target speed) = animate towards frame target at a speed.
-                            speed is optional and defaults to 1.0 when not provided.
-                            target is optional and defaults to the last frame of the animation.
-                            if you want to set the speed, you therefore must also set the target.
-                            target can be max (no quote), which is just the same as the default value.
-   - (loop! speed)  = loop animation at a speed. default speed is 1.0 when not provided.
-   - (chan channel) = copy frame from another channel.
-   - min = the start of the animation.
-   - max = the end of the animation.
-   "
-
-  (let* ((num-args (if (pair? num!) (cdr num!) '()))
-        (num! (if (pair? num!) (car num!) num!))
-        (nf (cond
-              ((or (eq? num! 'identity)
-                   (eq? num! 'min)
-                   (eq? num! 'max)
-                   )
-                   'num-func-identity)
-              ((eq? num! 'none) 'num-func-none)
-              ((eq? num! '+!) 'num-func-+!)
-              ((eq? num! '-!) 'num-func--!)
-              ((eq? num! 'seek!) 'num-func-seek!)
-              ((eq? num! 'loop!) 'num-func-loop!)
-              ((eq? num! 'blend-in!) 'num-func-blend-in!)
-              ((eq? num! 'chan) 'num-func-chan)
-              ))
-        (p0 (if param0 param0
-                (cond
-                  ((eq? num! 'chan) `(the float ,(car num-args)))
-                  ((eq? num! '+!)    (if (null? num-args) 1.0 (car num-args)))
-                  ((eq? num! '-!)    (if (null? num-args) 1.0 (car num-args)))
-                  ((eq? num! 'loop!) (if (null? num-args) 1.0 (if (eq? 'max (car num-args))
-                                                                  (if group!
-                                                                      `(the float (1- (-> (the art-joint-anim ,group!) data 0 length)))
-                                                                      `(the float (1- (-> ja-ch frame-group data 0 length)))
-                                                                      )
-                                                                  (car num-args))))
-                  ((eq? num! 'seek!) (if (or (null? num-args) (eq? (car num-args) 'max))
-                                         (if group!
-                                             `(the float (1- (-> (the art-joint-anim ,group!) data 0 length)))
-                                             `(the float (1- (-> ja-ch frame-group data 0 length)))
-                                             )
-                                         (car num-args)))
-                  )))
-        (p1 (if param1 param1
-                (cond
-                  ((eq? num! 'seek!) (if (or (null? num-args) (null? (cdr num-args))) 1.0 (cadr num-args)))
-                  )))
-        (frame-num (if (eq? 'max frame-num) (if group!
-                                                `(the float (1- (-> (the art-joint-anim ,group!) data 0 length)))
-                                                `(the float (1- (-> ja-ch frame-group data 0 length)))
-                                                )
-                                            frame-num))
-        (frame-group (if (or p0 p1 frame-num (not nf)) group! #f))
-      )
-  `(let ((ja-ch (-> self skel root-channel ,chan)))
-      ,(if frame-interp `(set! (-> ja-ch frame-interp) ,frame-interp) `(none))
-      ,(if dist `(set! (-> ja-ch dist) ,dist) `(none))
-      ,(if frame-group `(set! (-> ja-ch frame-group) (the art-joint-anim ,frame-group)) `(none))
-      ,(if p0 `(set! (-> ja-ch param 0) ,p0) `(none))
-      ,(if p1 `(set! (-> ja-ch param 1) ,p1) `(none))
-      ,(if num-func `(set! (-> ja-ch num-func) ,num-func) `(none))
-      ,(if frame-num `(set! (-> ja-ch frame-num) ,frame-num) `(none))
-      ,(if nf
-          `(,(if eval? 'joint-control-channel-group-eval! 'joint-control-channel-group!)
-              ja-ch (the art-joint-anim ,group!) ,nf)
-          `(none))
-      ,(cond
-        ((eq? num! 'min) `(set! (-> ja-ch frame-num) 0.0))
-        ((eq? num! 'max) (if group!
-                             `(set! (-> ja-ch frame-num) (the float (1- (-> (the art-joint-anim ,group!) data 0 length))))
-                             `(set! (-> ja-ch frame-num) (the float (1- (-> ja-ch frame-group data 0 length))))
-                             ))
-        ((eq? num! 'identity) `(set! (-> ja-ch frame-num) ,(car num-args)))
-        (#t `(none))
-        )
-      ))
-  )
-
-(defmacro ja-no-eval (&key (chan 0)
-                      &key (group! #f)
-                      &key (num! #f)
-                      &key (param0 #f)
-                      &key (param1 #f)
-                      &key (num-func #f)
-                      &key (frame-num #f)
-                      &key (frame-interp #f)
-                      &key (dist #f)
-                      )
-  `(ja :eval? #f :chan ,chan :group! ,group! :num! ,num! :param0 ,param0 :param1 ,param1 :num-func ,num-func :frame-num ,frame-num :frame-interp ,frame-interp :dist ,dist)
-  )
-
 (defbehavior ja-eval process-drawable ()
   (let ((gp-0 (-> self skel root-channel 0))
         (s5-0 (-> self skel channel (-> self skel active-channels)))
diff --git a/goal_src/engine/game/effect-control.gc b/goal_src/engine/game/effect-control.gc
index 54bfc59a06..687d7c6add 100644
--- a/goal_src/engine/game/effect-control.gc
+++ b/goal_src/engine/game/effect-control.gc
@@ -430,14 +430,10 @@
            arg1
            s5-0
            )
-         (sp-launch-particles-var
-           *sp-particle-system-2d*
-           (the-as sparticle-launcher s3-0)
-           (vector<-cspace! (new 'stack-no-clear 'vector) (-> obj process node-list data s5-0))
-           (the-as sparticle-launch-state #f)
-           (the-as sparticle-launch-control #f)
-           1.0
-           )
+         (launch-particles :rate 1.0
+          (the-as sparticle-launcher s3-0)
+          (vector<-cspace! (new 'stack-no-clear 'vector) (-> obj process node-list data s5-0))
+          )
          )
         ((= (-> (the-as basic s3-0) type) sparticle-launch-group)
          (if *debug-effect-control*
diff --git a/goal_src/engine/game/generic-obs.gc b/goal_src/engine/game/generic-obs.gc
index ea570ab84b..357b21d8c0 100644
--- a/goal_src/engine/game/generic-obs.gc
+++ b/goal_src/engine/game/generic-obs.gc
@@ -2364,14 +2364,7 @@
       (dotimes (s4-1 3)
         (quaternion-rotate-local-z! s5-0 s5-0 10922.667)
         (quaternion-copy! *particle-quat* s5-0)
-        (sp-launch-particles-var
-          *sp-particle-system-3d*
-          (-> *part-id-table* 2528)
-          gp-1
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
+        (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 2528) gp-1)
         )
       )
     )
diff --git a/goal_src/engine/game/powerups.gc b/goal_src/engine/game/powerups.gc
index 661a4d10fe..1aa721767f 100644
--- a/goal_src/engine/game/powerups.gc
+++ b/goal_src/engine/game/powerups.gc
@@ -503,24 +503,10 @@
   :init-specs ((sp-flt spt-fade-g 0.0))
   )
 
-(defun eco-blue-glow ((arg0 vector))
-  (sp-launch-particles-var
-    *sp-particle-system-2d*
-    (-> *part-id-table* 255)
-    arg0
-    (the-as sparticle-launch-state #f)
-    (the-as sparticle-launch-control #f)
-    1.0
-    )
+(defun eco-blue-glow ((origin vector))
+  (launch-particles (-> *part-id-table* 255) origin)
   (if (rand-vu-percent? 0.5)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 257)
-        arg0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles (-> *part-id-table* 257) origin)
       )
   0
   (none)
@@ -555,14 +541,7 @@
                    )
                 (rand-vu-percent? 0.5)
                 )
-           (sp-launch-particles-var
-             *sp-particle-system-3d*
-             (-> *part-id-table* 2391)
-             gp-0
-             (the-as sparticle-launch-state #f)
-             (the-as sparticle-launch-control #f)
-             1.0
-             )
+           (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 2391) gp-0)
            )
        )
      (let ((gp-1 (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data 70))))
@@ -576,14 +555,7 @@
                    )
                 (rand-vu-percent? 0.5)
                 )
-           (sp-launch-particles-var
-             *sp-particle-system-3d*
-             (-> *part-id-table* 2391)
-             gp-1
-             (the-as sparticle-launch-state #f)
-             (the-as sparticle-launch-control #f)
-             1.0
-             )
+           (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 2391) gp-1)
            )
        )
      (let ((f0-8 (lerp-scale 60.0 90.0 (-> self control unknown-float01) 0.0 81920.0)))
@@ -656,29 +628,15 @@
       (((pickup-type eco-yellow))
        (change-sound! (-> self sound) (static-sound-name "yel-eco-jak"))
        (let ((s4-0 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-         (sp-launch-particles-var
-           *sp-particle-system-2d*
-           (-> *part-id-table* (if (rand-vu-percent? 0.5)
-                                   269
-                                   270
-                                   )
-               )
-           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data s4-0))
-           (the-as sparticle-launch-state #f)
-           (the-as sparticle-launch-control #f)
-           1.0
-           )
+         (launch-particles (-> *part-id-table* (if (rand-vu-percent? 0.5) 269 270))
+                           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data s4-0))
+                           )
          )
        (dotimes (gp-4 2)
          (let ((v1-111 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-           (sp-launch-particles-var
-             *sp-particle-system-2d*
-             (-> *part-id-table* 271)
-             (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-111))
-             (the-as sparticle-launch-state #f)
-             (the-as sparticle-launch-control #f)
-             1.0
-             )
+           (launch-particles (-> *part-id-table* 271)
+                             (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-111))
+                             )
            )
          )
        )
@@ -694,29 +652,15 @@
        (update-transforms! (-> self control))
        (change-sound! (-> self sound) (static-sound-name "red-eco-jak"))
        (let ((s4-2 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-         (sp-launch-particles-var
-           *sp-particle-system-2d*
-           (-> *part-id-table* (if (rand-vu-percent? 0.5)
-                                   273
-                                   274
-                                   )
-               )
-           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data s4-2))
-           (the-as sparticle-launch-state #f)
-           (the-as sparticle-launch-control #f)
-           1.0
-           )
+         (launch-particles (-> *part-id-table* (if (rand-vu-percent? 0.5) 273 274))
+                           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data s4-2))
+                           )
          )
        (dotimes (gp-6 2)
          (let ((v1-139 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-           (sp-launch-particles-var
-             *sp-particle-system-2d*
-             (-> *part-id-table* 275)
-             (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-139))
-             (the-as sparticle-launch-state #f)
-             (the-as sparticle-launch-control #f)
-             1.0
-             )
+           (launch-particles (-> *part-id-table* 275)
+                             (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-139))
+                             )
            )
          )
        )
@@ -735,77 +679,35 @@
              (set! (-> *part-id-table* 259 init-specs 4 random-rangef) 16384.0)
              )
            )
-         (sp-launch-particles-var
-           *sp-particle-system-2d*
-           (-> *part-id-table* 259)
-           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-150))
-           (the-as sparticle-launch-state #f)
-           (the-as sparticle-launch-control #f)
-           1.0
-           )
+         (launch-particles (-> *part-id-table* 259) (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-150))
+)
          )
        (let ((gp-8 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-         (sp-launch-particles-var
-           *sp-particle-system-2d*
-           (-> *part-id-table* (if (rand-vu-percent? 0.5)
-                                   255
-                                   256
-                                   )
-               )
-           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data gp-8))
-           (the-as sparticle-launch-state #f)
-           (the-as sparticle-launch-control #f)
-           1.0
-           )
+         (launch-particles (-> *part-id-table* (if (rand-vu-percent? 0.5) 255 256))
+                           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data gp-8))
+                           )
          (if (rand-vu-percent? 0.5)
-             (sp-launch-particles-var
-               *sp-particle-system-2d*
-               (-> *part-id-table* 257)
-               (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data gp-8))
-               (the-as sparticle-launch-state #f)
-               (the-as sparticle-launch-control #f)
-               1.0
-               )
+             (launch-particles  (-> *part-id-table* 257)
+                                (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data gp-8))
+                                )
              )
          )
        (let ((v1-168 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-         (sp-launch-particles-var
-           *sp-particle-system-2d*
-           (-> *part-id-table* 260)
-           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-168))
-           (the-as sparticle-launch-state #f)
-           (the-as sparticle-launch-control #f)
-           1.0
-           )
+         (launch-particles (-> *part-id-table* 260)
+                           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-168))
+                           )
          )
        (cpad-set-buzz! (-> *cpad-list* cpads 0) 0 1 (seconds 0.1))
        )
       (((pickup-type eco-green))
        (change-sound! (-> self sound) (static-sound-name "green-eco-jak"))
        (let ((s4-8 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-         (sp-launch-particles-var
-           *sp-particle-system-2d*
-           (-> *part-id-table* (if (rand-vu-percent? 0.5)
-                                   277
-                                   278
-                                   )
-               )
-           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data s4-8))
-           (the-as sparticle-launch-state #f)
-           (the-as sparticle-launch-control #f)
-           1.0
-           )
+         (launch-particles (-> *part-id-table* (if (rand-vu-percent? 0.5) 277 278))
+                           (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data s4-8)))
          )
        (dotimes (gp-11 2)
          (let ((v1-188 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-           (sp-launch-particles-var
-             *sp-particle-system-2d*
-             (-> *part-id-table* 279)
-             (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-188))
-             (the-as sparticle-launch-state #f)
-             (the-as sparticle-launch-control #f)
-             1.0
-             )
+           (launch-particles (-> *part-id-table* 279) (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-188)))
            )
          )
        )
diff --git a/goal_src/engine/game/settings.gc b/goal_src/engine/game/settings.gc
index 8cbc040e31..b1b4964cc0 100644
--- a/goal_src/engine/game/settings.gc
+++ b/goal_src/engine/game/settings.gc
@@ -445,10 +445,20 @@
      (set! (-> gp-0 aspect-ratio) 'aspect4x3)
      )
     )
-   (if (zero? *boot-video-mode*)
-    (set! (-> gp-0 video-mode) 'ntsc)
-    (set! (-> gp-0 video-mode) 'pal)
-    )
+   (case *boot-video-mode*
+     ((0)
+      (set! (-> gp-0 video-mode) 'ntsc)
+      )
+     ((1)
+      (set! (-> gp-0 video-mode) 'pal)
+      )
+     ((2)
+      (set! (-> gp-0 video-mode) '100fps)
+      )
+     ((3)
+      (set! (-> gp-0 video-mode) '150fps)
+      )
+     )
    (set! (-> s5-0 sfx-volume) (+ -1.0 (-> gp-0 sfx-volume)))
    (set! (-> s5-0 music-volume) (+ -1.0 (-> gp-0 music-volume)))
    (set! (-> s5-0 dialog-volume) (+ -1.0 (-> gp-0 dialog-volume)))
diff --git a/goal_src/engine/game/video.gc b/goal_src/engine/game/video.gc
index 88ea7b7434..971c3419ee 100644
--- a/goal_src/engine/game/video.gc
+++ b/goal_src/engine/game/video.gc
@@ -33,6 +33,30 @@
      (set! (-> *math-camera* y-clip) 512.0)
      (set! (-> *shadow-data* texoffset y) 128.5)
      )
+    (('100fps)
+     (set! (-> *video-parms* screen-sy) 224)
+     (set! (-> *setting-control* default screenx) 0)
+     (set! (-> *setting-control* default screeny) 8)
+     (set! (-> *video-parms* screen-pages-high) 7)
+     (set! (-> *video-parms* relative-y-scale) 1.0)
+     (set! *ticks-per-frame* 5859)
+     (set! (-> *math-camera* isometric vector 1 y) 0.5)
+     (set! (-> *math-camera* y-pix) 112.0)
+     (set! (-> *math-camera* y-clip) 448.0)
+     (set! (-> *shadow-data* texoffset y) 112.5)
+     )
+    (('150fps)
+     (set! (-> *video-parms* screen-sy) 224)
+     (set! (-> *setting-control* default screenx) 0)
+     (set! (-> *setting-control* default screeny) 8)
+     (set! (-> *video-parms* screen-pages-high) 7)
+     (set! (-> *video-parms* relative-y-scale) 1.0)
+     (set! *ticks-per-frame* 3906)
+     (set! (-> *math-camera* isometric vector 1 y) 0.5)
+     (set! (-> *math-camera* y-pix) 112.0)
+     (set! (-> *math-camera* y-clip) 448.0)
+     (set! (-> *shadow-data* texoffset y) 112.5)
+     )
     )
   (set-time-ratios *display* (-> *display* time-ratio))
   (set! (-> *video-parms* reset-video-mode) #t)
diff --git a/goal_src/engine/gfx/hw/display.gc b/goal_src/engine/gfx/hw/display.gc
index 42a44e8bb2..1e44cec4e2 100644
--- a/goal_src/engine/gfx/hw/display.gc
+++ b/goal_src/engine/gfx/hw/display.gc
@@ -45,13 +45,30 @@
        ;; 6 "ticks" per frame * 50 fps = 300 ticks per second.
        (set! (-> obj time-factor) 6.0)
        )
-      (else
+      (('ntsc)
        (set! (-> obj time-adjust-ratio) ratio)
        (set! (-> obj seconds-per-frame) (* 0.016666668 ratio))
        (set! (-> obj frames-per-second) (* 60.0 (/ 1.0 ratio)))
        ;; 5 "ticks" per frame * 60 fps = 300 ticks per second.
        (set! (-> obj time-factor) 5.0)
        )
+      (('100fps)
+       (set! (-> obj time-adjust-ratio) (* 0.6 ratio))
+       (set! (-> obj seconds-per-frame) (* 0.01 ratio))
+       (set! (-> obj frames-per-second) (* 100.0 (/ 1.0 ratio)))
+       ;; 3 "ticks" per frame * 100 fps = 300 ticks per second.
+       (set! (-> obj time-factor) 3.0)
+       )
+      (('150fps)
+       (set! (-> obj time-adjust-ratio) (* 0.4 ratio))
+       (set! (-> obj seconds-per-frame) (* 0.006666667 ratio))
+       (set! (-> obj frames-per-second) (* 150.0 (/ 1.0 ratio)))
+       ;; 2 "ticks" per frame * 150 fps = 300 ticks per second.
+       (set! (-> obj time-factor) 2.0)
+       )
+      (else
+       (format #t "Warning: Tried to set unsupported video-mode")
+       )
       )
     )
   (-> obj time-ratio)
diff --git a/goal_src/engine/gfx/ocean/ocean.gc b/goal_src/engine/gfx/ocean/ocean.gc
index 8c06b24b51..1f4342e766 100644
--- a/goal_src/engine/gfx/ocean/ocean.gc
+++ b/goal_src/engine/gfx/ocean/ocean.gc
@@ -747,4 +747,4 @@
     )
   0
   (none)
-  )
\ No newline at end of file
+  )
diff --git a/goal_src/engine/gfx/sky/sky-tng.gc b/goal_src/engine/gfx/sky/sky-tng.gc
index b0bac528af..69910fb11c 100644
--- a/goal_src/engine/gfx/sky/sky-tng.gc
+++ b/goal_src/engine/gfx/sky/sky-tng.gc
@@ -168,15 +168,31 @@
   (sky-make-sun-data *sky-parms* 1 arg0)
   (sky-make-moon-data *sky-parms* arg0)
   (let ((v1-0 *sky-tng-data*))
-   (+! (-> v1-0 off-s-0) 16)
-   (+! (-> v1-0 off-t-0) 32)
-   (+! (-> v1-0 off-s-1) -21)
-   (+! (-> v1-0 off-t-1) 42)
-   (set! (-> v1-0 time) arg0)
+    (case (-> *setting-control* current video-mode)
+      (('150fps)
+       (+! (-> v1-0 off-s-0) 6.4)
+       (+! (-> v1-0 off-t-0) 12.8)
+       (+! (-> v1-0 off-s-1) -8.4)
+       (+! (-> v1-0 off-t-1) 16.8)
+       )
+      (('100fps)
+       (+! (-> v1-0 off-s-0) 9.6)
+       (+! (-> v1-0 off-t-0) 19.2)
+       (+! (-> v1-0 off-s-1) -12.6)
+       (+! (-> v1-0 off-t-1) 25.2)
+       )
+      (else
+       (+! (-> v1-0 off-s-0) 16)
+       (+! (-> v1-0 off-t-0) 32)
+       (+! (-> v1-0 off-s-1) -21)
+       (+! (-> v1-0 off-t-1) 42)
+       )      
+      )
+    (set! (-> v1-0 time) arg0)
    )
   0
   (none)
-  )
+)
 
 #|
 (defun init-sky-regs ()
diff --git a/goal_src/engine/gfx/water/water.gc b/goal_src/engine/gfx/water/water.gc
index 614aa8b8b5..d77d7998d0 100644
--- a/goal_src/engine/gfx/water/water.gc
+++ b/goal_src/engine/gfx/water/water.gc
@@ -48,14 +48,7 @@
       (sp-kill-particle arg0 arg1)
       (set-vector! s5-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
       (sound-play "water-drop" :position (the-as symbol s5-0))
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 108)
-        s5-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :system *sp-particle-system-3d* :rate 1.0 (-> *part-id-table* 108) s5-0)
       )
     )
   0
@@ -728,42 +721,14 @@
                   (set! (-> *part-id-table* 118 init-specs 1 initial-valuef) (* 0.0000036621095 f28-0))
                   (set! (-> *part-id-table* 118 init-specs 2 initial-valuef) (* 0.1 f28-0))
                   (set! (-> *part-id-table* 118 init-specs 13 initial-valuef) 0.7111111)
-                  (sp-launch-particles-var
-                    *sp-particle-system-3d*
-                    (-> *part-id-table* 118)
-                    s4-0
-                    (the-as sparticle-launch-state #f)
-                    (the-as sparticle-launch-control #f)
-                    1.0
-                    )
+                  (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 118) s4-0)
                   (set! (-> *part-id-table* 121 init-specs 1 initial-valuef) (* 0.000004150391 f28-0))
                   (set! (-> *part-id-table* 121 init-specs 18 initial-valuef) f30-1)
-                  (sp-launch-particles-var
-                    *sp-particle-system-3d*
-                    (-> *part-id-table* 121)
-                    s4-0
-                    (the-as sparticle-launch-state #f)
-                    (the-as sparticle-launch-control #f)
-                    1.0
-                    )
+                  (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 121) s4-0)
                   (when (< f28-0 4096.0)
                     (set! (-> *part-id-table* 112 init-specs 4 random-rangef) (-> obj ripple-size))
-                    (sp-launch-particles-var
-                      *sp-particle-system-3d*
-                      (-> *part-id-table* 112)
-                      s4-0
-                      (the-as sparticle-launch-state #f)
-                      (the-as sparticle-launch-control #f)
-                      1.0
-                      )
-                    (sp-launch-particles-var
-                      *sp-particle-system-3d*
-                      (-> *part-id-table* 115)
-                      s4-0
-                      (the-as sparticle-launch-state #f)
-                      (the-as sparticle-launch-control #f)
-                      1.0
-                      )
+                    (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 112) s4-0)
+                    (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 115) s4-0)
                     )
                   )
                 )
@@ -786,23 +751,9 @@
                       (* 0.00012207031 (vector-xz-length (-> obj process root transv)))
                       )
                    )
-             (sp-launch-particles-var
-               *sp-particle-system-2d*
-               (-> *part-id-table* 110)
-               s4-1
-               (the-as sparticle-launch-state #f)
-               (the-as sparticle-launch-control #f)
-               1.0
-               )
+             (launch-particles (-> *part-id-table* 110) s4-1)
              (set! (-> *part-id-table* 111 init-specs 16 initial-valuef) (-> obj surface-height))
-             (sp-launch-particles-var
-               *sp-particle-system-2d*
-               (-> *part-id-table* 111)
-               s4-1
-               (the-as sparticle-launch-state #f)
-               (the-as sparticle-launch-control #f)
-               1.0
-               )
+             (launch-particles (-> *part-id-table* 111) s4-1)
              )
            )
          (let ((f30-3 (- (+ (-> obj base-height) (-> obj ocean-offset) (-> obj bob-offset) (-> obj align-offset))
@@ -990,14 +941,7 @@
            (set! (-> *part-id-table* 145 init-specs 8 initial-valuef) (* 0.05 (- (-> a2-15 x) (-> obj drip-old-pos x))))
            (set! (-> *part-id-table* 145 init-specs 9 initial-valuef) (* 0.05 (- (-> a2-15 y) (-> obj drip-old-pos y))))
            (set! (-> *part-id-table* 145 init-specs 10 initial-valuef) (* 0.05 (- (-> a2-15 z) (-> obj drip-old-pos z))))
-           (sp-launch-particles-var
-             *sp-particle-system-2d*
-             (-> *part-id-table* 145)
-             a2-15
-             (the-as sparticle-launch-state #f)
-             (the-as sparticle-launch-control #f)
-             1.0
-             )
+           (launch-particles (-> *part-id-table* 145) a2-15)
            )
          (set! (-> obj drip-time) (-> *display* base-frame-counter))
          (logclear! (-> obj flags) (water-flags wt15))
diff --git a/goal_src/engine/gfx/wind.gc b/goal_src/engine/gfx/wind.gc
index 0ee8bfc63c..acef24b799 100644
--- a/goal_src/engine/gfx/wind.gc
+++ b/goal_src/engine/gfx/wind.gc
@@ -21,9 +21,9 @@
   (set! (-> arg0 wind-time) (+ (-> arg0 wind-time) 1))
   (let* ((s4-0 (logand (-> arg0 wind-time) 63))
          (f0-4 (rand-vu-float-range 0.0 100.0))
-         (v1-5 (/ (-> arg0 wind-time) (the-as uint 120)))
+         (v1-5 (/ (if (or (eq? (-> *setting-control* current video-mode) '150fps) (eq? (-> *setting-control* current video-mode) '100fps)) (the uint (min 1.0 (* (if (eq? (-> *setting-control* current video-mode) '150fps) 0.4 0.6) (-> arg0 wind-time)))) (-> arg0 wind-time)) (the-as uint 120)))
          (f1-6 (* 0.008333334
-                  (the float (mod (-> arg0 wind-time) (the-as uint 120)))
+                  (the float (mod (if (or (eq? (-> *setting-control* current video-mode) '150fps)  (eq? (-> *setting-control* current video-mode) '100fps)) (the uint (min 1.0 (* (if (eq? (-> *setting-control* current video-mode) '150fps) 0.4 0.6) (-> arg0 wind-time)))) (-> arg0 wind-time)) (the-as uint 120)))
                   )
            )
          (f2-4 (* 0.0625
diff --git a/goal_src/engine/load/loader.gc b/goal_src/engine/load/loader.gc
index 84e99c6e15..f503b02e0f 100644
--- a/goal_src/engine/load/loader.gc
+++ b/goal_src/engine/load/loader.gc
@@ -892,7 +892,7 @@
   (set! sv-56 0)
   (set! spool-sound (new-sound-id))
   (backup-load-state-and-set-cmds *load-state* (-> arg0 command-list))
-  (set-setting! 'spooling (the-as symbol (process->ppointer self)) 0.0 0)
+  (set-setting! 'spooling (process->ppointer self) 0.0 0)
   (logior! (-> self skel status) (janim-status inited drawn done))
   (kill-current-level-hint '() '() 'die)
   (level-hint-surpress!)
@@ -900,16 +900,9 @@
   (when (or (handle->process (-> *art-control* spool-lock)) (!= *master-mode* 'game))
     (cond
       (arg1
-        (when (!= (if (> (-> self skel active-channels) 0)
-                      (-> self skel root-channel 0 frame-group)
-                      )
-                  arg1
-                  )
+        (when (!= (ja-group) arg1)
           (ja-channel-push! 1 (seconds 0.05))
-          (let ((s2-0 (-> self skel root-channel 0)))
-            (joint-control-channel-group-eval! s2-0 arg1 num-func-identity)
-            (set! (-> s2-0 frame-num) 0.0)
-            )
+          (ja :group! arg1 :num! min)
           )
         )
       (else
@@ -923,12 +916,9 @@
           )
       (spool-push *art-control* (-> arg0 name) spool-part self -9.0)
       (suspend)
-      (when arg1
-        (let ((a0-17 (-> self skel root-channel 0)))
-          (set! (-> a0-17 param 0) 1.0)
-          (joint-control-channel-group-eval! a0-17 (the-as art-joint-anim #f) num-func-loop!)
+      (if arg1
+          (ja :num! (loop!))
           )
-        )
       )
     )
   (let ((v1-46 (process->ppointer self)))
@@ -936,7 +926,7 @@
           (new 'static 'handle :process v1-46 :pid (-> (the-as process (-> v1-46 0)) pid))
           )
     )
-  (set! sv-48 (-> *display* base-frame-counter))
+  (set! sv-48 (the-as int (-> *display* base-frame-counter)))
   (while (< spool-part (-> arg0 parts))
     (spool-push *art-control* (-> arg0 name) spool-part self -20.0)
     (update *art-control* #f)
@@ -944,16 +934,9 @@
     (when (!= (file-status *art-control* (-> arg0 name) spool-part) 'active)
       (cond
         (arg1
-          (when (!= (if (> (-> self skel active-channels) 0)
-                        (-> self skel root-channel 0 frame-group)
-                        )
-                    arg1
-                    )
+          (when (!= (ja-group) arg1)
             (ja-channel-set! 1)
-            (let ((s2-2 (-> self skel root-channel 0)))
-              (joint-control-channel-group-eval! s2-2 arg1 num-func-identity)
-              (set! (-> s2-2 frame-num) 0.0)
-              )
+            (ja :group! arg1 :num! min)
             )
           )
         (else
@@ -967,12 +950,9 @@
         (spool-push *art-control* (-> arg0 name) spool-part self -20.0)
         (format #t "WARNING: ---------------------> loader stall on art ~S ~D~%" (-> arg0 name) spool-part)
         (suspend)
-        (when arg1
-          (let ((a0-37 (-> self skel root-channel 0)))
-            (set! (-> a0-37 param 0) 1.0)
-            (joint-control-channel-group-eval! a0-37 (the-as art-joint-anim #f) num-func-loop!)
+        (if arg1
+            (ja :num! (loop!))
             )
-          )
         )
       )
     (spool-push *art-control* (-> arg0 name) spool-part self -20.0)
@@ -980,28 +960,34 @@
       (cond
         (s2-4
           (ja-channel-set! 1)
-          (let ((a0-42 (-> self skel root-channel 0)))
-            (set! (-> a0-42 frame-group) s2-4)
-            (set! (-> a0-42 param 0) (the float (+ (-> s2-4 data 0 length) -1)))
-            (set! (-> a0-42 param 1) 1.0)
-            (set! (-> a0-42 frame-num) 0.0)
-            (joint-control-channel-group! a0-42 s2-4 num-func-seek!)
-            )
+          (ja-no-eval :group! s2-4 :num! (seek!) :frame-num 0.0)
           (when (zero? spool-part)
             (str-play-async (-> arg0 name) spool-sound)
             (set! (-> *art-control* active-stream) (-> arg0 name))
             )
-          (let* ((f30-0 (* 0.05859375 (-> s2-4 speed)))
+          ;; When running at higher than 60fps, Jak's idle animations are spedup but no other animations appear to be
+          ;; We couldn't figure out a simple way to detect these animations, so this is checking against the idle animation names
+          (let* ((f30-0 (* (if (and (or (= (-> *setting-control* current video-mode) '150fps) (= (-> *setting-control* current video-mode) '100fps))
+                                    (or (string= (-> arg0 name) "eichar-ambient-1")
+                                        (string= (-> arg0 name) "eichar-ambient-2")
+                                        (string= (-> arg0 name) "eichar-ambient-3")
+                                        (string= (-> arg0 name) "eichar-ambient-4")
+                                        )
+                                    )
+                               (if (= (-> *setting-control* current video-mode) '150fps) 0.4 0.6)
+                               1.0
+                               )
+                           0.05859375 (-> s2-4 speed)))
                  (f28-0 (+ sv-24 (/ (the float (+ (-> s2-4 data 0 length) -1)) f30-0)))
                  )
             (set! sv-72 (current-str-pos spool-sound))
-            (set! sv-40 (-> *display* base-frame-counter))
+            (set! sv-40 (the-as int (-> *display* base-frame-counter)))
             (until (>= (the float v0-39) f28-0)
               (if (= (-> self skel root-channel 0) (-> self skel channel))
                   (logior! (-> self skel status) (janim-status spool))
                   )
               (if (or (arg3 self)
-                      (and (<= sv-72 0) (>= (- (-> *display* base-frame-counter) sv-40) 1200))
+                      (and (<= sv-72 0) (>= (- (-> *display* base-frame-counter) sv-40) (seconds 4)))
                       (and (< 300 sv-56) (<= sv-72 0))
                       )
                   (goto cfg-88)
@@ -1015,22 +1001,17 @@
               (cond
                 ((and (< sv-32 sv-72) (= (current-str-id) spool-sound))
                  (set! sv-56 (+ sv-56 (- (-> *display* base-frame-counter) (-> *display* old-base-frame-counter))))
-                 (set! sv-40 (-> *display* base-frame-counter))
+                 (set! sv-40 (the-as int (-> *display* base-frame-counter)))
                  )
                 (else
                   0
                   )
                 )
               (set! sv-32 sv-72)
-              (set! sv-48 (-> *display* base-frame-counter))
+              (set! sv-48 (the-as int (-> *display* base-frame-counter)))
               (suspend)
-              (let ((f0-14 (* (- (the float (current-str-pos spool-sound)) sv-24) f30-0))
-                    (a0-69 (-> self skel root-channel 0))
-                    )
-                (set! (-> a0-69 param 0) (the float (+ (-> a0-69 frame-group data 0 length) -1)))
-                (set! (-> a0-69 param 1) 1.0)
-                (set! (-> a0-69 frame-num) f0-14)
-                (joint-control-channel-group! a0-69 (the-as art-joint-anim #f) num-func-seek!)
+              (let ((f0-14 (* (- (the float (current-str-pos spool-sound)) sv-24) f30-0)))
+                (ja-no-eval :num! (seek!) :frame-num f0-14)
                 )
               (set! v0-39 (current-str-pos spool-sound))
               (set! sv-72 v0-39)
@@ -1055,34 +1036,28 @@
 
 (defbehavior ja-abort-spooled-anim process-drawable ((arg0 spool-anim) (arg1 art-joint-anim) (arg2 int))
   "Abort a spooled animation."
-
   (restore-load-state-and-cleanup *load-state*)
   (str-play-stop (-> arg0 name))
   (set! (-> *art-control* active-stream) #f)
   (logclear! (-> self skel status) (janim-status drawn done))
   (if (zero? (logand (-> self skel status) (janim-status inited)))
-    (logclear! (-> self skel status) (janim-status inited))
-    )
+      (logclear! (-> self skel status) (janim-status inited))
+      )
   (remove-setting! 'spooling)
   (cond
-   ((and arg1 (>= arg2 0))
-    (ja-channel-push! 1 (seconds 0.1))
-    (set! (-> self skel root-channel 0 frame-group) arg1)
-    (while (!= (-> self skel root-channel 0) (-> self skel channel))
-      (spool-push *art-control* (-> arg0 name) arg2 self -20.0)
-      (suspend)
-      ;; TODO macro
-      (let ((a0-12 (-> self skel root-channel 0)))
-       (set! (-> a0-12 param 0) (the float (+ (-> a0-12 frame-group data 0 length) -1)))
-       (set! (-> a0-12 param 1) 1.0)
-       (joint-control-channel-group-eval! a0-12 (the-as art-joint-anim #f) num-func-seek!)
+    ((and arg1 (>= arg2 0))
+     (ja-channel-push! 1 (seconds 0.1))
+     (set! (-> self skel root-channel 0 frame-group) arg1)
+     (while (!= (-> self skel root-channel 0) (-> self skel channel))
+       (spool-push *art-control* (-> arg0 name) arg2 self -20.0)
+       (suspend)
+       (ja :num! (seek!))
        )
+     )
+    (else
+      (ja-channel-set! 0)
       )
     )
-   (else
-    (ja-channel-set! 0)
-    )
-   )
   (set! (-> *art-control* spool-lock) (the-as handle #f))
   0
   )
diff --git a/goal_src/engine/sparticle/sparticle.gc b/goal_src/engine/sparticle/sparticle.gc
index c380ff8a36..3f936a2dc9 100644
--- a/goal_src/engine/sparticle/sparticle.gc
+++ b/goal_src/engine/sparticle/sparticle.gc
@@ -127,6 +127,12 @@
         (new 'global 'sparticle-system 256 0 #t (-> *sprite-array-3d* vec-data) (-> *sprite-array-3d* adgif-data))
         )
 
+(defmacro launch-particles (&key (system *sp-particle-system-2d*) particle origin &key (launch-state (the-as sparticle-launch-state #f)) &key (launch-control (the-as sparticle-launch-control #f))
+                            &key (rate (if (= (-> *setting-control* current video-mode) '150fps)
+                                           (if (= (-> *setting-control* current video-mode) '100fps) 0.6 0.4)
+                                           1.0)))
+  `(sp-launch-particles-var ,system ,particle ,origin ,launch-state ,launch-control ,rate)
+  )
 
 
 ;;;;;;;;;;;;;;;;;;;;
@@ -676,46 +682,9 @@
   (none)
   )
 
-(defun set-particle-frame-time ((arg0 int))
-  (cond
-   ((= arg0 5)
-    (set-vector!
-     *sp-frame-time*
-     0.00000000000000000000000000000000000001175495
-     5.0
-     1.0
-     1.0
-     )
-    )
-   ((= arg0 6)
-    (set-vector!
-     *sp-frame-time*
-     0.000000000000000000000000000000000000011754952
-     6.0
-     1.2
-     1.2
-     )
-    )
-   ((= arg0 10)
-    (set-vector!
-     *sp-frame-time*
-     0.000000000000000000000000000000000000011754958
-     10.0
-     2.0
-     2.0
-     )
-    )
-   ((= arg0 12)
-    (set-vector!
-     *sp-frame-time*
-     0.00000000000000000000000000000000000001175496
-     12.0
-     2.4
-     2.4
-     )
-    )
-   )
-  0
+(defun set-particle-frame-time ((scaled-seconds int))
+  "Adjusts particle frame time based on the frame-rate (scaled-seconds). Note: This used to be a case statement and has been rewritten as a more generic formula"
+  (set-vector! *sp-frame-time* (the-as float (logior #x800000 scaled-seconds)) (the float scaled-seconds) (* 0.2 scaled-seconds) (* 0.2 scaled-seconds))
   (none)
   )
 
diff --git a/goal_src/engine/target/target-part.gc b/goal_src/engine/target/target-part.gc
index b016d29e38..e658c3d855 100644
--- a/goal_src/engine/target/target-part.gc
+++ b/goal_src/engine/target/target-part.gc
@@ -2172,14 +2172,7 @@
                   )
                 )
               )
-          (sp-launch-particles-var
-            *sp-particle-system-2d*
-            (-> *part-id-table* 2002)
-            a2-3
-            (the-as sparticle-launch-state #f)
-            (the-as sparticle-launch-control #f)
-            (the-as float 1.0)
-            )
+          (launch-particles (-> *part-id-table* 2002) a2-3)
           )
         (suspend)
         0
diff --git a/goal_src/engine/target/target.gc b/goal_src/engine/target/target.gc
index c29628d8a6..406a1f0e03 100644
--- a/goal_src/engine/target/target.gc
+++ b/goal_src/engine/target/target.gc
@@ -2697,10 +2697,14 @@
                               (or (= v1-6 eichar-flop-down-loop-ja) (= v1-6 eichar-moving-flop-down-ja))
                               )
                  )
-        (when (and (or (< (target-move-dist (seconds 0.1)) 1638.4)
-                       (and (logtest? (-> self control status) (cshape-moving-flags twall)) (< 0.7 (-> self control poly-angle)))
-                       )
-                   (zero? (logand (-> self control status) (cshape-moving-flags t-act)))
+        (when (and (or (< (target-move-dist (if (or (= (-> *setting-control* current video-mode) '150fps)  (= (-> *setting-control* current video-mode) '150fps))
+                                                (if (= (-> *setting-control* current video-mode) '150ps) (seconds 0.04) (seconds 0.06))
+                                                (seconds 0.1)))
+                          (if (or (= (-> *setting-control* current video-mode) '150fps) (= (-> *setting-control* current video-mode) '100fps)) (if (= (-> *setting-control* current video-mode) '150fps) 655.36 983.04) 1638.4)
+                          )
+                          (and (logtest? (-> self control status) (cshape-moving-flags twall)) (< 0.7 (-> self control poly-angle)))
+                          )
+                       (zero? (logand (-> self control status) (cshape-moving-flags t-act)))
                    (>= (-> self control unknown-uint20) (the-as uint 2))
                    )
           (set! (-> self control unknown-dword36) (-> *display* base-frame-counter))
diff --git a/goal_src/examples/debug-draw-example.gc b/goal_src/examples/debug-draw-example.gc
index e20e9311f4..5ef1ef25e0 100644
--- a/goal_src/examples/debug-draw-example.gc
+++ b/goal_src/examples/debug-draw-example.gc
@@ -102,23 +102,9 @@
         )
       
       (dotimes (i 10)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 37)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
+        (launch-particles (-> *part-id-table* 37) gp-0)
+        (launch-particles (-> *part-id-table* 38) gp-0)
         )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 38)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      )
       )
     )
   0
@@ -368,4 +354,4 @@
       (load-state-want-display-level *debug-load-level* #t)
       )
      )
-  )
\ No newline at end of file
+  )
diff --git a/goal_src/levels/beach/beach-part.gc b/goal_src/levels/beach/beach-part.gc
index 365d775043..d199592f41 100644
--- a/goal_src/levels/beach/beach-part.gc
+++ b/goal_src/levels/beach/beach-part.gc
@@ -93,24 +93,10 @@
                (f0-0 (vector-vector-distance (-> self root trans) gp-0))
                )
           (if (or (< (-> gp-0 y) (-> self root trans y)) (< 122880.0 f0-0))
-              (sp-launch-particles-var
-                *sp-particle-system-2d*
-                (-> *part-id-table* 666)
-                (-> self root trans)
-                (the-as sparticle-launch-state #f)
-                (the-as sparticle-launch-control #f)
-                1.0
-                )
+              (launch-particles (-> *part-id-table* 666) (-> self root trans))
               )
           )
-        (sp-launch-particles-var
-          *sp-particle-system-2d*
-          (-> *part-id-table* 668)
-          (-> self root trans)
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
+        (launch-particles (-> *part-id-table* 668) (-> self root trans))
         )
       (suspend)
       )
diff --git a/goal_src/levels/beach/lurkerworm.gc b/goal_src/levels/beach/lurkerworm.gc
index 58300d0bc1..b7d16a2ff9 100644
--- a/goal_src/levels/beach/lurkerworm.gc
+++ b/goal_src/levels/beach/lurkerworm.gc
@@ -241,44 +241,16 @@
 
 (defmethod particle-effect lurkerworm ((obj lurkerworm))
   (let ((a2-0 (vector<-cspace! (new 'stack-no-clear 'vector) (-> obj node-list data 5))))
-    (sp-launch-particles-var
-      *sp-particle-system-2d*
-      (-> *part-id-table* 661)
-      a2-0
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles (-> *part-id-table* 661) a2-0)
     )
   (let ((a2-1 (vector<-cspace! (new 'stack-no-clear 'vector) (-> obj node-list data 6))))
-    (sp-launch-particles-var
-      *sp-particle-system-2d*
-      (-> *part-id-table* 661)
-      a2-1
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles (-> *part-id-table* 661) a2-1)
     )
   (let ((a2-2 (vector<-cspace! (new 'stack-no-clear 'vector) (-> obj node-list data 7))))
-    (sp-launch-particles-var
-      *sp-particle-system-2d*
-      (-> *part-id-table* 661)
-      a2-2
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles (-> *part-id-table* 661) a2-2)
     )
   (let ((a2-3 (vector<-cspace! (new 'stack-no-clear 'vector) (-> obj node-list data 8))))
-    (sp-launch-particles-var
-      *sp-particle-system-2d*
-      (-> *part-id-table* 661)
-      a2-3
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles (-> *part-id-table* 661) a2-3)
     )
   0
   (none)
diff --git a/goal_src/levels/citadel/citadel-part.gc b/goal_src/levels/citadel/citadel-part.gc
index 913d405e81..671cadf8f5 100644
--- a/goal_src/levels/citadel/citadel-part.gc
+++ b/goal_src/levels/citadel/citadel-part.gc
@@ -63,22 +63,8 @@
     (let ((gp-0 (new 'stack-no-clear 'vector)))
       (sp-kill-particle arg0 arg1)
       (set-vector! gp-0 (-> arg2 x) (-> arg2 y) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2882)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        (the-as float 1.0)
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2883)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        (the-as float 1.0)
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 2882) gp-0)
+      (launch-particles :rate 1.0 (-> *part-id-table* 2883) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/citadel/citadel-sages.gc b/goal_src/levels/citadel/citadel-sages.gc
index 0c04a53f6e..e8e9215a92 100644
--- a/goal_src/levels/citadel/citadel-sages.gc
+++ b/goal_src/levels/citadel/citadel-sages.gc
@@ -71,14 +71,7 @@
             )
         (dotimes (s2-2 12)
           (vector-matrix*! s5-0 (-> self bar-array s2-2) gp-0)
-          (sp-launch-particles-var
-            s3-1
-            s4-1
-            s5-0
-            (the-as sparticle-launch-state #f)
-            (the-as sparticle-launch-control #f)
-            1.0
-            )
+          (launch-particles :system s3-1 s4-1 s5-0)
           )
         )
       )
diff --git a/goal_src/levels/citadel/citb-plat.gc b/goal_src/levels/citadel/citb-plat.gc
index 897526e5e6..840e4a9cd1 100644
--- a/goal_src/levels/citadel/citb-plat.gc
+++ b/goal_src/levels/citadel/citb-plat.gc
@@ -887,14 +887,7 @@
     (dotimes (s5-0 16)
       (quaternion-rotate-local-z! gp-0 gp-0 2048.0)
       (quaternion-copy! *particle-quat* gp-0)
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 2541)
-        (-> self blast-pos)
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 2541) (-> self blast-pos))
       )
     )
   (spawn (-> self part) (-> self blast-pos))
diff --git a/goal_src/levels/finalboss/light-eco.gc b/goal_src/levels/finalboss/light-eco.gc
index ee5e3013a1..2786515d1c 100644
--- a/goal_src/levels/finalboss/light-eco.gc
+++ b/goal_src/levels/finalboss/light-eco.gc
@@ -188,22 +188,8 @@
     (let ((gp-0 (new 'stack-no-clear 'vector)))
       (sp-kill-particle arg0 arg1)
       (set-vector! gp-0 (-> arg2 x) (-> arg2 y) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2904)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2905)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 2904) gp-0)
+      (launch-particles :rate 1.0 (-> *part-id-table* 2905) gp-0)
       )
     )
   (none)
@@ -321,22 +307,8 @@
     (let ((gp-0 (new 'stack-no-clear 'vector)))
       (sp-kill-particle arg0 arg1)
       (set-vector! gp-0 (-> arg2 x) (-> arg2 y) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2910)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2911)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 2910) gp-0)
+      (launch-particles :rate 1.0 (-> *part-id-table* 2911) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/finalboss/sage-finalboss-part.gc b/goal_src/levels/finalboss/sage-finalboss-part.gc
index 14f996ec1c..27308eb822 100644
--- a/goal_src/levels/finalboss/sage-finalboss-part.gc
+++ b/goal_src/levels/finalboss/sage-finalboss-part.gc
@@ -352,22 +352,8 @@
     (let ((gp-0 (new 'stack-no-clear 'vector)))
       (sp-kill-particle arg0 arg1)
       (set-vector! gp-0 (-> arg2 x) (-> arg2 y) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2933)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2934)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 2933) gp-0)
+      (launch-particles :rate 1.0 (-> *part-id-table* 2934) gp-0)
       )
     )
   (none)
@@ -1041,22 +1027,8 @@
     (let ((gp-0 (new 'stack-no-clear 'vector)))
       (sp-kill-particle arg0 arg1)
       (set-vector! gp-0 (-> arg2 x) (-> arg2 y) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2962)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2963)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 2962) gp-0)
+      (launch-particles :rate 1.0 (-> *part-id-table* 2963) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/jungle/darkvine.gc b/goal_src/levels/jungle/darkvine.gc
index 291f34c129..c00e933300 100644
--- a/goal_src/levels/jungle/darkvine.gc
+++ b/goal_src/levels/jungle/darkvine.gc
@@ -218,14 +218,7 @@
     (ja-channel-push! 1 (seconds 0.15))
     (ja-no-eval :group! darkvine-retreat-ja :num! (seek!) :frame-num 0.0)
     (until (ja-done? 0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 800)
-        (-> self root-override trans)
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles (-> *part-id-table* 800) (-> self root-override trans))
       (suspend)
       (ja :num! (seek!))
       )
diff --git a/goal_src/levels/jungle/fisher.gc b/goal_src/levels/jungle/fisher.gc
index 543f8b1de3..826e31ba9d 100644
--- a/goal_src/levels/jungle/fisher.gc
+++ b/goal_src/levels/jungle/fisher.gc
@@ -853,14 +853,7 @@
     (set! (-> *part-id-table* 118 init-specs 1 initial-valuef) 0.05)
     (set! (-> *part-id-table* 118 init-specs 2 initial-valuef) 0.0)
     (set! (-> *part-id-table* 118 init-specs 13 initial-valuef) 0.35555556)
-    (sp-launch-particles-var
-      *sp-particle-system-3d*
-      (-> *part-id-table* 118)
-      gp-0
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 118) gp-0)
     )
   0
   (none)
@@ -1742,42 +1735,14 @@
     (when (-> self training)
       (let ((gp-0 (new-stack-vector0)))
         (vector<-cspace! gp-0 (-> self node-list data 74))
-        (sp-launch-particles-var
-          *sp-particle-system-2d*
-          (-> *part-id-table* 2001)
-          gp-0
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
+        (launch-particles (-> *part-id-table* 2001) gp-0)
         (fisher-fish-water gp-0 (+ 32768.0 (vector-y-angle (-> self node-list data 75 bone transform vector 1))))
         (vector<-cspace! gp-0 (-> self node-list data 77))
-        (sp-launch-particles-var
-          *sp-particle-system-2d*
-          (-> *part-id-table* 2001)
-          gp-0
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
+        (launch-particles (-> *part-id-table* 2001) gp-0)
         (fisher-fish-water gp-0 (+ 32768.0 (vector-y-angle (-> self node-list data 78 bone transform vector 1))))
         (vector<-cspace! gp-0 (-> self node-list data 80))
-        (sp-launch-particles-var
-          *sp-particle-system-2d*
-          (-> *part-id-table* 828)
-          gp-0
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
-        (sp-launch-particles-var
-          *sp-particle-system-2d*
-          (-> *part-id-table* 2013)
-          gp-0
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
+        (launch-particles (-> *part-id-table* 828) gp-0)
+        (launch-particles (-> *part-id-table* 2013) gp-0)
         (fisher-fish-water gp-0 (+ 32768.0 (vector-y-angle (-> self node-list data 80 bone transform vector 1))))
         )
       )
diff --git a/goal_src/levels/jungle/jungle-mirrors.gc b/goal_src/levels/jungle/jungle-mirrors.gc
index 61c740d3d5..714d768efd 100644
--- a/goal_src/levels/jungle/jungle-mirrors.gc
+++ b/goal_src/levels/jungle/jungle-mirrors.gc
@@ -998,14 +998,7 @@
   (when (periscope-has-power-input?)
     (update! (-> self sound))
     (if (logtest? (-> self draw status) (draw-status was-drawn))
-        (sp-launch-particles-var
-          *sp-particle-system-2d*
-          (-> *part-id-table* 825)
-          (-> self reflector-trans)
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
+        (launch-particles (-> *part-id-table* 825) (-> self reflector-trans))
         )
     )
   0
@@ -1828,14 +1821,7 @@
         (draw-power-beam gp-0 (-> self beam-end))
         (update! (-> self sound))
         (when (logtest? (-> self draw status) (draw-status was-drawn))
-          (sp-launch-particles-var
-            *sp-particle-system-2d*
-            (-> *part-id-table* 825)
-            (-> self beam-end)
-            (the-as sparticle-launch-state #f)
-            (the-as sparticle-launch-control #f)
-            1.0
-            )
+          (launch-particles (-> *part-id-table* 825) (-> self beam-end))
           (when (and *target* (>= 24576.0 (vector-vector-distance (-> self root-override trans) (-> *target* control trans))))
             (start-hint-timer (game-text-id jungle-mirrors-break-the-mirror-jak))
             (level-hint-spawn
diff --git a/goal_src/levels/maincave/maincave-obs.gc b/goal_src/levels/maincave/maincave-obs.gc
index 56aa06a64c..9a96531ac2 100644
--- a/goal_src/levels/maincave/maincave-obs.gc
+++ b/goal_src/levels/maincave/maincave-obs.gc
@@ -483,22 +483,8 @@
       (cond
         ((< gp-0 a0-1)
          (when (sphere-in-view-frustum? (the-as sphere (-> self root-override root-prim prim-core)))
-           (sp-launch-particles-var
-             *sp-particle-system-2d*
-             (-> *part-id-table* 704)
-             (the-as vector (-> self launch-pos))
-             (the-as sparticle-launch-state #f)
-             (the-as sparticle-launch-control #f)
-             1.0
-             )
-           (sp-launch-particles-var
-             *sp-particle-system-2d*
-             (-> *part-id-table* 705)
-             (the-as vector (&-> self stack 112))
-             (the-as sparticle-launch-state #f)
-             (the-as sparticle-launch-control #f)
-             1.0
-             )
+           (launch-particles (-> *part-id-table* 704) (the-as vector (-> self launch-pos)))
+           (launch-particles (-> *part-id-table* 705) (the-as vector (&-> self stack 112)))
            )
          (when (-> self should-play-sound?)
            (set! (-> self should-play-sound?) #f)
diff --git a/goal_src/levels/maincave/maincave-part.gc b/goal_src/levels/maincave/maincave-part.gc
index 696762375c..9a85d60567 100644
--- a/goal_src/levels/maincave/maincave-part.gc
+++ b/goal_src/levels/maincave/maincave-part.gc
@@ -293,22 +293,8 @@
           (sound-play "water-drop")
           )
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2231)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 2232)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 2231) gp-0)
+      (launch-particles :system *sp-particle-system-3d* :rate 1.0 (-> *part-id-table* 2232) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/misty/misty-obs.gc b/goal_src/levels/misty/misty-obs.gc
index 056470a7c0..2657d82112 100644
--- a/goal_src/levels/misty/misty-obs.gc
+++ b/goal_src/levels/misty/misty-obs.gc
@@ -1394,14 +1394,7 @@
 (defstate breakaway-about-to-fall (breakaway)
   :code (behavior ()
     (sound-play "falling-bones")
-    (sp-launch-particles-var
-      *sp-particle-system-2d*
-      (-> *part-id-table* 281)
-      (-> self root-override trans)
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles (-> *part-id-table* 281) (-> self root-override trans))              
     (let ((gp-1 #f)
           (s5-1 (-> *display* base-frame-counter))
           )
diff --git a/goal_src/levels/misty/muse.gc b/goal_src/levels/misty/muse.gc
index 1a994dae59..be7ddcf094 100644
--- a/goal_src/levels/misty/muse.gc
+++ b/goal_src/levels/misty/muse.gc
@@ -150,14 +150,7 @@
 (defmethod dummy-51 muse ((obj muse))
   (dotimes (s5-0 2)
     (let ((v1-2 (rand-vu-int-range 3 (+ (-> obj node-list length) -1))))
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 271)
-        (vector<-cspace! (new 'stack-no-clear 'vector) (-> obj node-list data v1-2))
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles (-> *part-id-table* 271) (vector<-cspace! (new 'stack-no-clear 'vector) (-> obj node-list data v1-2)))
       )
     )
   0
diff --git a/goal_src/levels/racer_common/racer-states.gc b/goal_src/levels/racer_common/racer-states.gc
index 89e8fdd180..6582b1b6be 100644
--- a/goal_src/levels/racer_common/racer-states.gc
+++ b/goal_src/levels/racer_common/racer-states.gc
@@ -407,14 +407,7 @@
                     )
            (when (>= (-> self control unknown-float01) 40960.0)
              (set! (-> *part-id-table* 2225 init-specs 1 initial-valuef) 100.0)
-             (sp-launch-particles-var
-               *sp-particle-system-2d*
-               (-> *part-id-table* 2225)
-               (-> self control trans)
-               (the-as sparticle-launch-state #f)
-               (the-as sparticle-launch-control #f)
-               1.0
-               )
+             (launch-particles (-> *part-id-table* 2225) (-> self control trans))
              )
            (target-land-effect)
            (when (and (>= (-> self control ground-impact-vel) 61440.0) (zero? (-> self racer bounce)))
diff --git a/goal_src/levels/racer_common/target-racer.gc b/goal_src/levels/racer_common/target-racer.gc
index 63ecaf5c54..b882c77432 100644
--- a/goal_src/levels/racer_common/target-racer.gc
+++ b/goal_src/levels/racer_common/target-racer.gc
@@ -650,14 +650,10 @@
     (when (< 0.0 (-> *part-id-table* 2212 init-specs 11 initial-valuef))
       (set! (-> *part-id-table* 2212 init-specs 14 initial-valuef) (the-as float gp-0))
       (set! (-> *part-id-table* 2212 init-specs 3 initial-valuef) 15155.2)
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 2212)
-        (vector<-cspace! (new 'stack-no-clear 'vector) (-> self manipy 0 node-list data 4))
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :system *sp-particle-system-3d*
+                        (-> *part-id-table* 2212)
+                        (vector<-cspace! (new 'stack-no-clear 'vector) (-> self manipy 0 node-list data 4))
+                        )
       )
     )
   (let ((gp-2 (vector-y-quaternion! (new 'stack-no-clear 'vector) (-> self control unknown-quaternion00))))
@@ -670,14 +666,10 @@
     (when (< 0.0 (-> *part-id-table* 2212 init-specs 11 initial-valuef))
       (set! (-> *part-id-table* 2212 init-specs 14 initial-valuef) (the-as float gp-2))
       (set! (-> *part-id-table* 2212 init-specs 3 initial-valuef) 10240.0)
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 2212)
-        (vector<-cspace! (new 'stack-no-clear 'vector) (-> self manipy 0 node-list data 10))
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :system *sp-particle-system-3d*
+                        (-> *part-id-table* 2212)
+                        (vector<-cspace! (new 'stack-no-clear 'vector) (-> self manipy 0 node-list data 10))
+                        )
       )
     )
   (when (and (racer-on-ground?) (< (-> self control unknown-float01) 90112.0))
@@ -696,22 +688,8 @@
                          (set! (-> *part-id-table* 2275 init-specs 19 initial-valuef) (+ 49152.0 f1-3))
                          (set! (-> *part-id-table* 2275 init-specs 1 initial-valuef) (* 0.0000036621095 f0-17))
                          (set! (-> *part-id-table* 2275 init-specs 2 initial-valuef) (* 0.1 f0-17))
-                         (sp-launch-particles-var
-                           *sp-particle-system-3d*
-                           (-> *part-id-table* 2275)
-                           s4-2
-                           (the-as sparticle-launch-state #f)
-                           (the-as sparticle-launch-control #f)
-                           1.0
-                           )
-                         (sp-launch-particles-var
-                           *sp-particle-system-3d*
-                           (-> *part-id-table* 2276)
-                           s4-2
-                           (the-as sparticle-launch-state #f)
-                           (the-as sparticle-launch-control #f)
-                           1.0
-                           )
+                         (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 2275) s4-2)
+                         (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 2276) s4-2)
                          )
                        )
                      (-> *part-id-table* 2208)
@@ -753,17 +731,10 @@
            )
       (set! (-> s5-2 y) (-> self control shadow-pos y))
       (if (nonzero? a1-13)
-          (sp-launch-particles-var
-            (if gp-4
-                *sp-particle-system-3d*
-                *sp-particle-system-2d*
-                )
-            a1-13
-            s5-2
-            (the-as sparticle-launch-state #f)
-            (the-as sparticle-launch-control #f)
-            1.0
-            )
+          (launch-particles :system (if gp-4 *sp-particle-system-3d* *sp-particle-system-2d*)
+                            a1-13
+                            s5-2
+                            )
           )
       )
     (let* ((gp-5 #f)
@@ -790,17 +761,10 @@
            )
       (set! (-> a2-7 y) (-> self control shadow-pos y))
       (if (nonzero? s5-3)
-          (sp-launch-particles-var
-            (if gp-5
-                *sp-particle-system-3d*
-                *sp-particle-system-2d*
-                )
-            (the-as sparticle-launcher s5-3)
-            a2-7
-            (the-as sparticle-launch-state #f)
-            (the-as sparticle-launch-control #f)
-            1.0
-            )
+          (launch-particles :system (if gp-5 *sp-particle-system-3d* *sp-particle-system-2d*)
+                            (the-as sparticle-launcher s5-3)
+                            a2-7
+                            )
           )
       )
     )
@@ -916,14 +880,9 @@
   (when (!= (-> self racer boost-output) 0.0)
     (dotimes (gp-7 8)
       (let ((v1-258 (rand-vu-int-range 3 (+ (-> self node-list length) -1))))
-        (sp-launch-particles-var
-          *sp-particle-system-2d*
-          (-> *part-id-table* 2229)
-          (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-258))
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
+        (launch-particles (-> *part-id-table* 2229)
+                          (vector<-cspace! (new 'stack-no-clear 'vector) (-> self node-list data v1-258))
+                          )
         )
       )
     (cpad-set-buzz! (-> *cpad-list* cpads 0) 0 1 (seconds 0.1))
@@ -948,14 +907,7 @@
                )
              )
            )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        a1-54
-        gp-8
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles a1-54 gp-8)
       )
     (cpad-set-buzz! (-> *cpad-list* cpads 0) 0 51 (seconds 0.05))
     (sound-play-by-name
@@ -986,14 +938,7 @@
               )
             )
           )
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 2227)
-        a2-28
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles (-> *part-id-table* 2227) a2-28)
       )
     )
   0
diff --git a/goal_src/levels/rolling/rolling-lightning-mole.gc b/goal_src/levels/rolling/rolling-lightning-mole.gc
index c692629c1e..37ecb572cb 100644
--- a/goal_src/levels/rolling/rolling-lightning-mole.gc
+++ b/goal_src/levels/rolling/rolling-lightning-mole.gc
@@ -1157,14 +1157,7 @@
     (let ((gp-0 (new 'stack-no-clear 'vector)))
       (sp-kill-particle arg0 arg1)
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 1771)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 1771) gp-0)
       )
     )
   (none)
@@ -1178,14 +1171,7 @@
           (sound-play "land-grass")
           )
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 1772)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 1772) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/sunken/orbit-plat.gc b/goal_src/levels/sunken/orbit-plat.gc
index bdb3c610de..e3cd038d55 100644
--- a/goal_src/levels/sunken/orbit-plat.gc
+++ b/goal_src/levels/sunken/orbit-plat.gc
@@ -152,14 +152,7 @@
       )
     (set! (-> *part-id-table* 1717 init-specs 1 random-rangef) (* 0.000009494358 f30-0))
     )
-  (sp-launch-particles-var
-    *sp-particle-system-2d*
-    (-> *part-id-table* 1717)
-    arg0
-    (the-as sparticle-launch-state #f)
-    (the-as sparticle-launch-control #f)
-    1.0
-    )
+  (launch-particles (-> *part-id-table* 1717) arg0)
   0
   (none)
   )
diff --git a/goal_src/levels/sunken/sun-exit-chamber.gc b/goal_src/levels/sunken/sun-exit-chamber.gc
index e3ec051794..b6041d69d3 100644
--- a/goal_src/levels/sunken/sun-exit-chamber.gc
+++ b/goal_src/levels/sunken/sun-exit-chamber.gc
@@ -571,14 +571,7 @@
     (vector-matrix*! gp-0 gp-0 s4-0)
     (vector-float*! gp-0 gp-0 (/ 1.0 (-> gp-0 w)))
     (set! (-> *part-id-table* 2515 init-specs 13 initial-valuef) (+ 24576.0 (-> gp-0 y)))
-    (sp-launch-particles-var
-      *sp-particle-system-2d*
-      (-> *part-id-table* 2515)
-      gp-0
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles (-> *part-id-table* 2515) gp-0)
     )
   (none)
   )
diff --git a/goal_src/levels/sunken/sunken-water.gc b/goal_src/levels/sunken/sunken-water.gc
index 01cdde025c..8bb8022a5c 100644
--- a/goal_src/levels/sunken/sunken-water.gc
+++ b/goal_src/levels/sunken/sunken-water.gc
@@ -80,14 +80,7 @@
       )
     (set! (-> gp-0 vertex-skip) 128)
     (dotimes (s5-0 (-> gp-0 vertex-count))
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 1736)
-        (-> gp-0 data s5-0)
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles (-> *part-id-table* 1736) (-> gp-0 data s5-0))
       )
     )
   #f
diff --git a/goal_src/levels/training/training-part.gc b/goal_src/levels/training/training-part.gc
index 264cc74f21..b999b2d384 100644
--- a/goal_src/levels/training/training-part.gc
+++ b/goal_src/levels/training/training-part.gc
@@ -293,22 +293,8 @@
           (sound-play "water-drop")
           )
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 763)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 764)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 763) gp-0)
+      (launch-particles :system *sp-particle-system-3d* :rate 1.0 (-> *part-id-table* 764) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/village1/assistant.gc b/goal_src/levels/village1/assistant.gc
index b05217c073..0ffdeff23b 100644
--- a/goal_src/levels/village1/assistant.gc
+++ b/goal_src/levels/village1/assistant.gc
@@ -359,14 +359,7 @@
           (sound-play "water-drop")
           )
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 367)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 367) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/village1/fishermans-boat.gc b/goal_src/levels/village1/fishermans-boat.gc
index a221ff40d3..ed5a3c28a5 100644
--- a/goal_src/levels/village1/fishermans-boat.gc
+++ b/goal_src/levels/village1/fishermans-boat.gc
@@ -688,14 +688,7 @@
     (set! (-> *part-id-table* 2896 init-specs 19 initial-valuef) (+ 49152.0 arg1))
     (set! (-> *part-id-table* 2896 init-specs 1 initial-valuef) (* 0.0000036621095 arg2))
     (set! (-> *part-id-table* 2896 init-specs 2 initial-valuef) (* 0.1 arg2))
-    (sp-launch-particles-var
-      *sp-particle-system-3d*
-      (-> *part-id-table* 2896)
-      gp-0
-      (the-as sparticle-launch-state #f)
-      (the-as sparticle-launch-control #f)
-      1.0
-      )
+    (launch-particles :system *sp-particle-system-3d* (-> *part-id-table* 2896) gp-0)
     )
   0
   (none)
diff --git a/goal_src/levels/village1/village1-part.gc b/goal_src/levels/village1/village1-part.gc
index c72df280b5..bda4286fd3 100644
--- a/goal_src/levels/village1/village1-part.gc
+++ b/goal_src/levels/village1/village1-part.gc
@@ -1209,22 +1209,8 @@
     (let ((gp-0 (new 'stack-no-clear 'vector)))
       (sp-kill-particle arg0 arg1)
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 434)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        (the-as float 1.0)
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 435)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        (the-as float 1.0)
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 434) gp-0)
+      (launch-particles :system *sp-particle-system-3d* :rate 1.0 (-> *part-id-table* 435) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/village1/village1-part2.gc b/goal_src/levels/village1/village1-part2.gc
index 9604e20dee..c83094d2f0 100644
--- a/goal_src/levels/village1/village1-part2.gc
+++ b/goal_src/levels/village1/village1-part2.gc
@@ -1613,22 +1613,8 @@
           (sound-play "water-drop")
           )
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 502)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        (the-as float 1.0)
-        )
-      (sp-launch-particles-var
-        *sp-particle-system-3d*
-        (-> *part-id-table* 503)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        (the-as float 1.0)
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 502) gp-0)
+      (launch-particles :system *sp-particle-system-3d* :rate 1.0 (-> *part-id-table* 503) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/village2/assistant-village2.gc b/goal_src/levels/village2/assistant-village2.gc
index f11bb6a5e0..7161b86d80 100644
--- a/goal_src/levels/village2/assistant-village2.gc
+++ b/goal_src/levels/village2/assistant-village2.gc
@@ -687,14 +687,7 @@
           (sound-play "water-drop")
           )
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 1324)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        1.0
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 1324) gp-0)
       )
     )
   (none)
diff --git a/goal_src/levels/village2/swamp-blimp.gc b/goal_src/levels/village2/swamp-blimp.gc
index bfc94caec3..96aceefecb 100644
--- a/goal_src/levels/village2/swamp-blimp.gc
+++ b/goal_src/levels/village2/swamp-blimp.gc
@@ -1030,14 +1030,7 @@
       (let ((a2-1 (new 'static 'vector)))
         (set! (-> a2-1 quad) (-> self root-override trans quad))
         (set! (-> a2-1 y) 0.0)
-        (sp-launch-particles-var
-          *sp-particle-system-3d*
-          (-> *part-id-table* 2017)
-          a2-1
-          (the-as sparticle-launch-state #f)
-          (the-as sparticle-launch-control #f)
-          1.0
-          )
+        (launch-particles :rate 1.0 (-> *part-id-table* 2017) a2-1)
         )
       )
     (none)
diff --git a/goal_src/levels/village2/village2-part.gc b/goal_src/levels/village2/village2-part.gc
index 2eb4111ae2..d05f9afcd2 100644
--- a/goal_src/levels/village2/village2-part.gc
+++ b/goal_src/levels/village2/village2-part.gc
@@ -1856,14 +1856,7 @@
           (sound-play "water-drop")
           )
       (set-vector! gp-0 (-> arg2 x) (-> arg1 user-float) (-> arg2 z) 1.0)
-      (sp-launch-particles-var
-        *sp-particle-system-2d*
-        (-> *part-id-table* 1207)
-        gp-0
-        (the-as sparticle-launch-state #f)
-        (the-as sparticle-launch-control #f)
-        (the-as float 1.0)
-        )
+      (launch-particles :rate 1.0 (-> *part-id-table* 1207) gp-0)
       )
     )
   (none)

From 196c09a232f74a19a093927b5760a0222ec14e99 Mon Sep 17 00:00:00 2001
From: water111 <48171810+water111@users.noreply.github.com>
Date: Sun, 19 Jun 2022 19:48:34 -0400
Subject: [PATCH 06/17] Clean up libstb_image (#1494)

---
 CMakeLists.txt                                |    2 +
 common/CMakeLists.txt                         |    3 +-
 common/util/image_loading.cpp                 |   13 -
 common/util/image_loading.h                   |    1 -
 decompiler/CMakeLists.txt                     |    7 +-
 decompiler/data/TextureDB.cpp                 |    2 +-
 game/CMakeLists.txt                           |    2 +-
 game/graphics/pipelines/opengl.cpp            |    2 +-
 third-party/stb_image.h                       | 8709 -----------------
 third-party/stb_image/CMakeLists.txt          |    9 +
 third-party/stb_image/stb_image.cpp           |    4 +
 .../{tiny_gltf => stb_image}/stb_image.h      |    0
 .../stb_image_write.h                         |    0
 third-party/tiny_gltf/CMakeLists.txt          |    3 +-
 third-party/tiny_gltf/tiny_gltf.cpp           |    6 +-
 15 files changed, 29 insertions(+), 8734 deletions(-)
 delete mode 100644 common/util/image_loading.cpp
 delete mode 100644 common/util/image_loading.h
 delete mode 100644 third-party/stb_image.h
 create mode 100644 third-party/stb_image/CMakeLists.txt
 create mode 100644 third-party/stb_image/stb_image.cpp
 rename third-party/{tiny_gltf => stb_image}/stb_image.h (100%)
 rename third-party/{tiny_gltf => stb_image}/stb_image_write.h (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5ac40a0be8..a8a81a781a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -162,8 +162,10 @@ add_subdirectory(third-party/lzokay EXCLUDE_FROM_ALL)
 
 # build format library
 add_subdirectory(third-party/fmt EXCLUDE_FROM_ALL)
+add_subdirectory(third-party/stb_image EXCLUDE_FROM_ALL)
 add_subdirectory(third-party/tiny_gltf EXCLUDE_FROM_ALL)
 
+
 # discord rich presence
 include_directories(third-party/discord-rpc/include)
 add_subdirectory(third-party/discord-rpc EXCLUDE_FROM_ALL)
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 45e753162f..f813120fc1 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -47,8 +47,7 @@ add_library(common
         util/os.cpp
         util/print_float.cpp
         util/FontUtils.cpp
-        util/FrameLimiter.cpp
-        util/image_loading.cpp)
+        util/FrameLimiter.cpp)
 
 target_link_libraries(common fmt lzokay replxx libzstd_static)
 
diff --git a/common/util/image_loading.cpp b/common/util/image_loading.cpp
deleted file mode 100644
index 62971a6551..0000000000
--- a/common/util/image_loading.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-
-// hides warnings
-#ifdef __linux__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wcast-qual"
-#endif
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "third-party/stb_image.h"
-
-#ifdef __linux__
-#pragma GCC diagnostic pop
-#endif
\ No newline at end of file
diff --git a/common/util/image_loading.h b/common/util/image_loading.h
deleted file mode 100644
index 36fe77bfaf..0000000000
--- a/common/util/image_loading.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "third-party/stb_image.h"
diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt
index cb0ab3ebbe..82754934f4 100644
--- a/decompiler/CMakeLists.txt
+++ b/decompiler/CMakeLists.txt
@@ -83,6 +83,7 @@ target_link_libraries(decomp
         lzokay
         common
         fmt
+        stb_image
         )
 
 add_executable(decompiler
@@ -92,7 +93,8 @@ target_link_libraries(decompiler
         decomp
         common
         lzokay
-        fmt)
+        fmt
+        stb_image)
 
 
 add_executable(extractor
@@ -103,4 +105,5 @@ target_link_libraries(extractor
         common
         lzokay
         fmt
-        compiler)
+        compiler
+        stb_image)
diff --git a/decompiler/data/TextureDB.cpp b/decompiler/data/TextureDB.cpp
index f145fd1e58..45d46d114c 100644
--- a/decompiler/data/TextureDB.cpp
+++ b/decompiler/data/TextureDB.cpp
@@ -2,7 +2,7 @@
 
 #include "third-party/fmt/core.h"
 #include "common/util/Assert.h"
-#include "third-party/stb_image.h"
+#include "third-party/stb_image/stb_image.h"
 #include <filesystem>
 
 namespace decompiler {
diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt
index c2b4955b67..1a3175406c 100644
--- a/game/CMakeLists.txt
+++ b/game/CMakeLists.txt
@@ -157,7 +157,7 @@ add_subdirectory(sound)
 # we build the runtime as a static library.
 add_library(runtime STATIC ${RUNTIME_SOURCE} "../third-party/glad/src/glad.c")
 
-target_link_libraries(runtime common fmt glfw imgui discord-rpc sound)
+target_link_libraries(runtime common fmt glfw imgui discord-rpc sound stb_image)
 if(WIN32)
     target_link_libraries(runtime mman)
 else()
diff --git a/game/graphics/pipelines/opengl.cpp b/game/graphics/pipelines/opengl.cpp
index f85190e1ae..909b628f5f 100644
--- a/game/graphics/pipelines/opengl.cpp
+++ b/game/graphics/pipelines/opengl.cpp
@@ -21,7 +21,7 @@
 #include "game/system/newpad.h"
 #include "common/log/log.h"
 #include "common/goal_constants.h"
-#include "common/util/image_loading.h"
+#include "third-party/stb_image/stb_image.h"
 #include "game/runtime.h"
 #include "common/util/Timer.h"
 #include "game/graphics/opengl_renderer/debug_gui.h"
diff --git a/third-party/stb_image.h b/third-party/stb_image.h
deleted file mode 100644
index 9c88b32170..0000000000
--- a/third-party/stb_image.h
+++ /dev/null
@@ -1,8709 +0,0 @@
-/* stb_image - v2.27 - public domain image loader - http://nothings.org/stb
-                                  no warranty implied; use at your own risk
-
-   Do this:
-      #define STB_IMAGE_IMPLEMENTATION
-   before you include this file in *one* C or C++ file to create the implementation.
-
-   // i.e. it should look like this:
-   #include ...
-   #include ...
-   #include ...
-   #define STB_IMAGE_IMPLEMENTATION
-   #include "stb_image.h"
-
-   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
-   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
-
-
-   QUICK NOTES:
-      Primarily of interest to game developers and other people who can
-          avoid problematic images and only need the trivial interface
-
-      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
-      PNG 1/2/4/8/16-bit-per-channel
-
-      TGA (not sure what subset, if a subset)
-      BMP non-1bpp, non-RLE
-      PSD (composited view only, no extra channels, 8/16 bit-per-channel)
-
-      GIF (*comp always reports as 4-channel)
-      HDR (radiance rgbE format)
-      PIC (Softimage PIC)
-      PNM (PPM and PGM binary only)
-
-      Animated GIF still needs a proper API, but here's one way to do it:
-          http://gist.github.com/urraka/685d9a6340b26b830d49
-
-      - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
-      - decode from arbitrary I/O callbacks
-      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
-
-   Full documentation under "DOCUMENTATION" below.
-
-
-LICENSE
-
-  See end of file for license information.
-
-RECENT REVISION HISTORY:
-
-      2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
-      2.26  (2020-07-13) many minor fixes
-      2.25  (2020-02-02) fix warnings
-      2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
-      2.23  (2019-08-11) fix clang static analysis warning
-      2.22  (2019-03-04) gif fixes, fix warnings
-      2.21  (2019-02-25) fix typo in comment
-      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
-      2.19  (2018-02-11) fix warning
-      2.18  (2018-01-30) fix warnings
-      2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
-      2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
-      2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
-      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
-      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
-      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
-      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
-                         RGB-format JPEG; remove white matting in PSD;
-                         allocate large structures on the stack;
-                         correct channel count for PNG & BMP
-      2.10  (2016-01-22) avoid warning introduced in 2.09
-      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
-
-   See end of file for full revision history.
-
-
- ============================    Contributors    =========================
-
- Image formats                          Extensions, features
-    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
-    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
-    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
-    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
-    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
-    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
-    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
-    github:urraka (animated gif)           Junggon Kim (PNM comments)
-    Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
-                                           socks-the-fox (16-bit PNG)
-                                           Jeremy Sawicki (handle all ImageNet JPGs)
- Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
-    Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
-    Arseny Kapoulkine                      Simon Breuss (16-bit PNM)
-    John-Mark Allen
-    Carmelo J Fdez-Aguera
-
- Bug & warning fixes
-    Marc LeBlanc            David Woo          Guillaume George     Martins Mozeiko
-    Christpher Lloyd        Jerry Jansson      Joseph Thomson       Blazej Dariusz Roszkowski
-    Phil Jordan                                Dave Moore           Roy Eltham
-    Hayaki Saito            Nathan Reed        Won Chun
-    Luke Graham             Johan Duparc       Nick Verigakis       the Horde3D community
-    Thomas Ruf              Ronny Chevalier                         github:rlyeh
-    Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou
-    Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk
-    Eugene Golushkov        Laurent Gomila     Cort Stratton        github:snagar
-    Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex
-    Cass Everitt            Ryamond Barbiero                        github:grim210
-    Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw
-    Philipp Wiesemann       Dale Weiler        Oriol Ferrer Mesia   github:phprus
-    Josh Tobin                                 Matthew Gregan       github:poppolopoppo
-    Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji
-    Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007
-                            Brad Weinberger    Matvey Cherevko      github:mosra
-    Luca Sas                Alexander Veselov  Zack Middleton       [reserved]
-    Ryan C. Gordon          [reserved]                              [reserved]
-                     DO NOT ADD YOUR NAME HERE
-
-                     Jacko Dirks
-
-  To add your name to the credits, pick a random blank space in the middle and fill it.
-  80% of merge conflicts on stb PRs are due to people adding their name at the end
-  of the credits.
-*/
-
-#ifndef STBI_INCLUDE_STB_IMAGE_H
-#define STBI_INCLUDE_STB_IMAGE_H
-
-// DOCUMENTATION
-//
-// Limitations:
-//    - no 12-bit-per-channel JPEG
-//    - no JPEGs with arithmetic coding
-//    - GIF always returns *comp=4
-//
-// Basic usage (see HDR discussion below for HDR usage):
-//    int x,y,n;
-//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
-//    // ... process data if not NULL ...
-//    // ... x = width, y = height, n = # 8-bit components per pixel ...
-//    // ... replace '0' with '1'..'4' to force that many components per pixel
-//    // ... but 'n' will always be the number that it would have been if you said 0
-//    stbi_image_free(data)
-//
-// Standard parameters:
-//    int *x                 -- outputs image width in pixels
-//    int *y                 -- outputs image height in pixels
-//    int *channels_in_file  -- outputs # of image components in image file
-//    int desired_channels   -- if non-zero, # of image components requested in result
-//
-// The return value from an image loader is an 'unsigned char *' which points
-// to the pixel data, or NULL on an allocation failure or if the image is
-// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
-// with each pixel consisting of N interleaved 8-bit components; the first
-// pixel pointed to is top-left-most in the image. There is no padding between
-// image scanlines or between pixels, regardless of format. The number of
-// components N is 'desired_channels' if desired_channels is non-zero, or
-// *channels_in_file otherwise. If desired_channels is non-zero,
-// *channels_in_file has the number of components that _would_ have been
-// output otherwise. E.g. if you set desired_channels to 4, you will always
-// get RGBA output, but you can check *channels_in_file to see if it's trivially
-// opaque because e.g. there were only 3 channels in the source image.
-//
-// An output image with N components has the following components interleaved
-// in this order in each pixel:
-//
-//     N=#comp     components
-//       1           grey
-//       2           grey, alpha
-//       3           red, green, blue
-//       4           red, green, blue, alpha
-//
-// If image loading fails for any reason, the return value will be NULL,
-// and *x, *y, *channels_in_file will be unchanged. The function
-// stbi_failure_reason() can be queried for an extremely brief, end-user
-// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
-// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
-// more user-friendly ones.
-//
-// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
-//
-// To query the width, height and component count of an image without having to
-// decode the full file, you can use the stbi_info family of functions:
-//
-//   int x,y,n,ok;
-//   ok = stbi_info(filename, &x, &y, &n);
-//   // returns ok=1 and sets x, y, n if image is a supported format,
-//   // 0 otherwise.
-//
-// Note that stb_image pervasively uses ints in its public API for sizes,
-// including sizes of memory buffers. This is now part of the API and thus
-// hard to change without causing breakage. As a result, the various image
-// loaders all have certain limits on image size; these differ somewhat
-// by format but generally boil down to either just under 2GB or just under
-// 1GB. When the decoded image would be larger than this, stb_image decoding
-// will fail.
-//
-// Additionally, stb_image will reject image files that have any of their
-// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
-// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
-// the only way to have an image with such dimensions load correctly
-// is for it to have a rather extreme aspect ratio. Either way, the
-// assumption here is that such larger images are likely to be malformed
-// or malicious. If you do need to load an image with individual dimensions
-// larger than that, and it still fits in the overall size limit, you can
-// #define STBI_MAX_DIMENSIONS on your own to be something larger.
-//
-// ===========================================================================
-//
-// UNICODE:
-//
-//   If compiling for Windows and you wish to use Unicode filenames, compile
-//   with
-//       #define STBI_WINDOWS_UTF8
-//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
-//   Windows wchar_t filenames to utf8.
-//
-// ===========================================================================
-//
-// Philosophy
-//
-// stb libraries are designed with the following priorities:
-//
-//    1. easy to use
-//    2. easy to maintain
-//    3. good performance
-//
-// Sometimes I let "good performance" creep up in priority over "easy to maintain",
-// and for best performance I may provide less-easy-to-use APIs that give higher
-// performance, in addition to the easy-to-use ones. Nevertheless, it's important
-// to keep in mind that from the standpoint of you, a client of this library,
-// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
-//
-// Some secondary priorities arise directly from the first two, some of which
-// provide more explicit reasons why performance can't be emphasized.
-//
-//    - Portable ("ease of use")
-//    - Small source code footprint ("easy to maintain")
-//    - No dependencies ("ease of use")
-//
-// ===========================================================================
-//
-// I/O callbacks
-//
-// I/O callbacks allow you to read from arbitrary sources, like packaged
-// files or some other source. Data read from callbacks are processed
-// through a small internal buffer (currently 128 bytes) to try to reduce
-// overhead.
-//
-// The three functions you must define are "read" (reads some bytes of data),
-// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
-//
-// ===========================================================================
-//
-// SIMD support
-//
-// The JPEG decoder will try to automatically use SIMD kernels on x86 when
-// supported by the compiler. For ARM Neon support, you must explicitly
-// request it.
-//
-// (The old do-it-yourself SIMD API is no longer supported in the current
-// code.)
-//
-// On x86, SSE2 will automatically be used when available based on a run-time
-// test; if not, the generic C versions are used as a fall-back. On ARM targets,
-// the typical path is to have separate builds for NEON and non-NEON devices
-// (at least this is true for iOS and Android). Therefore, the NEON support is
-// toggled by a build flag: define STBI_NEON to get NEON loops.
-//
-// If for some reason you do not want to use any of SIMD code, or if
-// you have issues compiling it, you can disable it entirely by
-// defining STBI_NO_SIMD.
-//
-// ===========================================================================
-//
-// HDR image support   (disable by defining STBI_NO_HDR)
-//
-// stb_image supports loading HDR images in general, and currently the Radiance
-// .HDR file format specifically. You can still load any file through the existing
-// interface; if you attempt to load an HDR file, it will be automatically remapped
-// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
-// both of these constants can be reconfigured through this interface:
-//
-//     stbi_hdr_to_ldr_gamma(2.2f);
-//     stbi_hdr_to_ldr_scale(1.0f);
-//
-// (note, do not use _inverse_ constants; stbi_image will invert them
-// appropriately).
-//
-// Additionally, there is a new, parallel interface for loading files as
-// (linear) floats to preserve the full dynamic range:
-//
-//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
-//
-// If you load LDR images through this interface, those images will
-// be promoted to floating point values, run through the inverse of
-// constants corresponding to the above:
-//
-//     stbi_ldr_to_hdr_scale(1.0f);
-//     stbi_ldr_to_hdr_gamma(2.2f);
-//
-// Finally, given a filename (or an open file or memory block--see header
-// file for details) containing image data, you can query for the "most
-// appropriate" interface to use (that is, whether the image is HDR or
-// not), using:
-//
-//     stbi_is_hdr(char *filename);
-//
-// ===========================================================================
-//
-// iPhone PNG support:
-//
-// We optionally support converting iPhone-formatted PNGs (which store
-// premultiplied BGRA) back to RGB, even though they're internally encoded
-// differently. To enable this conversion, call
-// stbi_convert_iphone_png_to_rgb(1).
-//
-// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
-// pixel to remove any premultiplied alpha *only* if the image file explicitly
-// says there's premultiplied data (currently only happens in iPhone images,
-// and only if iPhone convert-to-rgb processing is on).
-//
-// ===========================================================================
-//
-// ADDITIONAL CONFIGURATION
-//
-//  - You can suppress implementation of any of the decoders to reduce
-//    your code footprint by #defining one or more of the following
-//    symbols before creating the implementation.
-//
-//        STBI_NO_JPEG
-//        STBI_NO_PNG
-//        STBI_NO_BMP
-//        STBI_NO_PSD
-//        STBI_NO_TGA
-//        STBI_NO_GIF
-//        STBI_NO_HDR
-//        STBI_NO_PIC
-//        STBI_NO_PNM   (.ppm and .pgm)
-//
-//  - You can request *only* certain decoders and suppress all other ones
-//    (this will be more forward-compatible, as addition of new decoders
-//    doesn't require you to disable them explicitly):
-//
-//        STBI_ONLY_JPEG
-//        STBI_ONLY_PNG
-//        STBI_ONLY_BMP
-//        STBI_ONLY_PSD
-//        STBI_ONLY_TGA
-//        STBI_ONLY_GIF
-//        STBI_ONLY_HDR
-//        STBI_ONLY_PIC
-//        STBI_ONLY_PNM   (.ppm and .pgm)
-//
-//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
-//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
-//
-//  - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
-//    than that size (in either width or height) without further processing.
-//    This is to let programs in the wild set an upper bound to prevent
-//    denial-of-service attacks on untrusted data, as one could generate a
-//    valid image of gigantic dimensions and force stb_image to allocate a
-//    huge block of memory and spend disproportionate time decoding it. By
-//    default this is set to (1 << 24), which is 16777216, but that's still
-//    very big.
-
-#ifndef STBI_NO_STDIO
-#include <stdio.h>
-#endif  // STBI_NO_STDIO
-
-#define STBI_VERSION 1
-
-enum {
-  STBI_default = 0,  // only used for desired_channels
-
-  STBI_grey = 1,
-  STBI_grey_alpha = 2,
-  STBI_rgb = 3,
-  STBI_rgb_alpha = 4
-};
-
-#include <stdlib.h>
-typedef unsigned char stbi_uc;
-typedef unsigned short stbi_us;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef STBIDEF
-#ifdef STB_IMAGE_STATIC
-#define STBIDEF static
-#else
-#define STBIDEF extern
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// PRIMARY API - works on images of any type
-//
-
-//
-// load image by filename, open file, or memory buffer
-//
-
-typedef struct {
-  int (*read)(void* user,
-              char* data,
-              int size);  // fill 'data' with 'size' bytes.  return number of bytes actually read
-  void (*skip)(void* user,
-               int n);     // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
-  int (*eof)(void* user);  // returns nonzero if we are at end of file/data
-} stbi_io_callbacks;
-
-////////////////////////////////////
-//
-// 8-bits-per-channel interface
-//
-
-STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc const* buffer,
-                                       int len,
-                                       int* x,
-                                       int* y,
-                                       int* channels_in_file,
-                                       int desired_channels);
-STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk,
-                                          void* user,
-                                          int* x,
-                                          int* y,
-                                          int* channels_in_file,
-                                          int desired_channels);
-
-#ifndef STBI_NO_STDIO
-STBIDEF stbi_uc* stbi_load(char const* filename,
-                           int* x,
-                           int* y,
-                           int* channels_in_file,
-                           int desired_channels);
-STBIDEF stbi_uc* stbi_load_from_file(FILE* f,
-                                     int* x,
-                                     int* y,
-                                     int* channels_in_file,
-                                     int desired_channels);
-// for stbi_load_from_file, file pointer is left pointing immediately after image
-#endif
-
-#ifndef STBI_NO_GIF
-STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer,
-                                           int len,
-                                           int** delays,
-                                           int* x,
-                                           int* y,
-                                           int* z,
-                                           int* comp,
-                                           int req_comp);
-#endif
-
-#ifdef STBI_WINDOWS_UTF8
-STBIDEF int stbi_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input);
-#endif
-
-////////////////////////////////////
-//
-// 16-bits-per-channel interface
-//
-
-STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer,
-                                          int len,
-                                          int* x,
-                                          int* y,
-                                          int* channels_in_file,
-                                          int desired_channels);
-STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk,
-                                             void* user,
-                                             int* x,
-                                             int* y,
-                                             int* channels_in_file,
-                                             int desired_channels);
-
-#ifndef STBI_NO_STDIO
-STBIDEF stbi_us* stbi_load_16(char const* filename,
-                              int* x,
-                              int* y,
-                              int* channels_in_file,
-                              int desired_channels);
-STBIDEF stbi_us* stbi_load_from_file_16(FILE* f,
-                                        int* x,
-                                        int* y,
-                                        int* channels_in_file,
-                                        int desired_channels);
-#endif
-
-////////////////////////////////////
-//
-// float-per-channel interface
-//
-#ifndef STBI_NO_LINEAR
-STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer,
-                                      int len,
-                                      int* x,
-                                      int* y,
-                                      int* channels_in_file,
-                                      int desired_channels);
-STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk,
-                                         void* user,
-                                         int* x,
-                                         int* y,
-                                         int* channels_in_file,
-                                         int desired_channels);
-
-#ifndef STBI_NO_STDIO
-STBIDEF float* stbi_loadf(char const* filename,
-                          int* x,
-                          int* y,
-                          int* channels_in_file,
-                          int desired_channels);
-STBIDEF float* stbi_loadf_from_file(FILE* f,
-                                    int* x,
-                                    int* y,
-                                    int* channels_in_file,
-                                    int desired_channels);
-#endif
-#endif
-
-#ifndef STBI_NO_HDR
-STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
-STBIDEF void stbi_hdr_to_ldr_scale(float scale);
-#endif  // STBI_NO_HDR
-
-#ifndef STBI_NO_LINEAR
-STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
-STBIDEF void stbi_ldr_to_hdr_scale(float scale);
-#endif  // STBI_NO_LINEAR
-
-// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
-STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user);
-STBIDEF int stbi_is_hdr_from_memory(stbi_uc const* buffer, int len);
-#ifndef STBI_NO_STDIO
-STBIDEF int stbi_is_hdr(char const* filename);
-STBIDEF int stbi_is_hdr_from_file(FILE* f);
-#endif  // STBI_NO_STDIO
-
-// get a VERY brief reason for failure
-// on most compilers (and ALL modern mainstream compilers) this is threadsafe
-STBIDEF const char* stbi_failure_reason(void);
-
-// free the loaded image -- this is just free()
-STBIDEF void stbi_image_free(void* retval_from_stbi_load);
-
-// get image dimensions & components without fully decoding
-STBIDEF int stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp);
-STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const* clbk,
-                                     void* user,
-                                     int* x,
-                                     int* y,
-                                     int* comp);
-STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len);
-STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* clbk, void* user);
-
-#ifndef STBI_NO_STDIO
-STBIDEF int stbi_info(char const* filename, int* x, int* y, int* comp);
-STBIDEF int stbi_info_from_file(FILE* f, int* x, int* y, int* comp);
-STBIDEF int stbi_is_16_bit(char const* filename);
-STBIDEF int stbi_is_16_bit_from_file(FILE* f);
-#endif
-
-// for image formats that explicitly notate that they have premultiplied alpha,
-// we just return the colors as stored in the file. set this flag to force
-// unpremultiplication. results are undefined if the unpremultiply overflow.
-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
-
-// indicate whether we should process iphone images back to canonical format,
-// or just pass them through "as-is"
-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
-
-// flip the image vertically, so the first pixel in the output array is the bottom left
-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
-
-// as above, but only applies to images loaded on the thread that calls the function
-// this function is only available if your compiler supports thread-local variables;
-// calling it will fail to link if your compiler doesn't
-STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
-STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
-STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
-
-// ZLIB client - used by PNG, available for other purposes
-
-STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer,
-                                                int len,
-                                                int initial_size,
-                                                int* outlen);
-STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer,
-                                                           int len,
-                                                           int initial_size,
-                                                           int* outlen,
-                                                           int parse_header);
-STBIDEF char* stbi_zlib_decode_malloc(const char* buffer, int len, int* outlen);
-STBIDEF int stbi_zlib_decode_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);
-
-STBIDEF char* stbi_zlib_decode_noheader_malloc(const char* buffer, int len, int* outlen);
-STBIDEF int stbi_zlib_decode_noheader_buffer(char* obuffer,
-                                             int olen,
-                                             const char* ibuffer,
-                                             int ilen);
-
-#ifdef __cplusplus
-}
-#endif
-
-//
-//
-////   end header file   /////////////////////////////////////////////////////
-#endif  // STBI_INCLUDE_STB_IMAGE_H
-
-#ifdef STB_IMAGE_IMPLEMENTATION
-
-#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) || \
-    defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) ||  \
-    defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) ||  \
-    defined(STBI_ONLY_ZLIB)
-#ifndef STBI_ONLY_JPEG
-#define STBI_NO_JPEG
-#endif
-#ifndef STBI_ONLY_PNG
-#define STBI_NO_PNG
-#endif
-#ifndef STBI_ONLY_BMP
-#define STBI_NO_BMP
-#endif
-#ifndef STBI_ONLY_PSD
-#define STBI_NO_PSD
-#endif
-#ifndef STBI_ONLY_TGA
-#define STBI_NO_TGA
-#endif
-#ifndef STBI_ONLY_GIF
-#define STBI_NO_GIF
-#endif
-#ifndef STBI_ONLY_HDR
-#define STBI_NO_HDR
-#endif
-#ifndef STBI_ONLY_PIC
-#define STBI_NO_PIC
-#endif
-#ifndef STBI_ONLY_PNM
-#define STBI_NO_PNM
-#endif
-#endif
-
-#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
-#define STBI_NO_ZLIB
-#endif
-
-#include <stdarg.h>
-#include <stddef.h>  // ptrdiff_t on osx
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
-#include <math.h>  // ldexp, pow
-#endif
-
-#ifndef STBI_NO_STDIO
-#include <stdio.h>
-#endif
-
-#ifndef STBI_ASSERT
-#include <assert.h>
-#define STBI_ASSERT(x) assert(x)
-#endif
-
-#ifdef __cplusplus
-#define STBI_EXTERN extern "C"
-#else
-#define STBI_EXTERN extern
-#endif
-
-#ifndef _MSC_VER
-#ifdef __cplusplus
-#define stbi_inline inline
-#else
-#define stbi_inline
-#endif
-#else
-#define stbi_inline __forceinline
-#endif
-
-#ifndef STBI_NO_THREAD_LOCALS
-#if defined(__cplusplus) && __cplusplus >= 201103L
-#define STBI_THREAD_LOCAL thread_local
-#elif defined(__GNUC__) && __GNUC__ < 5
-#define STBI_THREAD_LOCAL __thread
-#elif defined(_MSC_VER)
-#define STBI_THREAD_LOCAL __declspec(thread)
-#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
-#define STBI_THREAD_LOCAL _Thread_local
-#endif
-
-#ifndef STBI_THREAD_LOCAL
-#if defined(__GNUC__)
-#define STBI_THREAD_LOCAL __thread
-#endif
-#endif
-#endif
-
-#ifdef _MSC_VER
-typedef unsigned short stbi__uint16;
-typedef signed short stbi__int16;
-typedef unsigned int stbi__uint32;
-typedef signed int stbi__int32;
-#else
-#include <stdint.h>
-typedef uint16_t stbi__uint16;
-typedef int16_t stbi__int16;
-typedef uint32_t stbi__uint32;
-typedef int32_t stbi__int32;
-#endif
-
-// should produce compiler error if size is wrong
-typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];
-
-#ifdef _MSC_VER
-#define STBI_NOTUSED(v) (void)(v)
-#else
-#define STBI_NOTUSED(v) (void)sizeof(v)
-#endif
-
-#ifdef _MSC_VER
-#define STBI_HAS_LROTL
-#endif
-
-#ifdef STBI_HAS_LROTL
-#define stbi_lrot(x, y) _lrotl(x, y)
-#else
-#define stbi_lrot(x, y) (((x) << (y)) | ((x) >> (-(y)&31)))
-#endif
-
-#if defined(STBI_MALLOC) && defined(STBI_FREE) && \
-    (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
-// ok
-#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && \
-    !defined(STBI_REALLOC_SIZED)
-// ok
-#else
-#error \
-    "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
-#endif
-
-#ifndef STBI_MALLOC
-#define STBI_MALLOC(sz) malloc(sz)
-#define STBI_REALLOC(p, newsz) realloc(p, newsz)
-#define STBI_FREE(p) free(p)
-#endif
-
-#ifndef STBI_REALLOC_SIZED
-#define STBI_REALLOC_SIZED(p, oldsz, newsz) STBI_REALLOC(p, newsz)
-#endif
-
-// x86/x64 detection
-#if defined(__x86_64__) || defined(_M_X64)
-#define STBI__X64_TARGET
-#elif defined(__i386) || defined(_M_IX86)
-#define STBI__X86_TARGET
-#endif
-
-#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
-// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
-// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
-// but previous attempts to provide the SSE2 functions with runtime
-// detection caused numerous issues. The way architecture extensions are
-// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
-// New behavior: if compiled with -msse2, we use SSE2 without any
-// detection; if not, we don't use it at all.
-#define STBI_NO_SIMD
-#endif
-
-#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && \
-    !defined(STBI_NO_SIMD)
-// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
-//
-// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
-// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
-// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
-// simultaneously enabling "-mstackrealign".
-//
-// See https://github.com/nothings/stb/issues/81 for more information.
-//
-// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
-// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
-#define STBI_NO_SIMD
-#endif
-
-#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
-#define STBI_SSE2
-#include <emmintrin.h>
-
-#ifdef _MSC_VER
-
-#if _MSC_VER >= 1400  // not VC6
-#include <intrin.h>   // __cpuid
-static int stbi__cpuid3(void) {
-  int info[4];
-  __cpuid(info, 1);
-  return info[3];
-}
-#else
-static int stbi__cpuid3(void) {
-  int res;
-  __asm {
-      mov  eax,1
-      cpuid
-      mov  res,edx
-  }
-  return res;
-}
-#endif
-
-#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
-
-#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
-static int stbi__sse2_available(void) {
-  int info3 = stbi__cpuid3();
-  return ((info3 >> 26) & 1) != 0;
-}
-#endif
-
-#else  // assume GCC-style if not VC++
-#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
-
-#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
-static int stbi__sse2_available(void) {
-  // If we're even attempting to compile this on GCC/Clang, that means
-  // -msse2 is on, which means the compiler is allowed to use SSE2
-  // instructions at will, and so are we.
-  return 1;
-}
-#endif
-
-#endif
-#endif
-
-// ARM NEON
-#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
-#undef STBI_NEON
-#endif
-
-#ifdef STBI_NEON
-#include <arm_neon.h>
-#ifdef _MSC_VER
-#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
-#else
-#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
-#endif
-#endif
-
-#ifndef STBI_SIMD_ALIGN
-#define STBI_SIMD_ALIGN(type, name) type name
-#endif
-
-#ifndef STBI_MAX_DIMENSIONS
-#define STBI_MAX_DIMENSIONS (1 << 24)
-#endif
-
-///////////////////////////////////////////////
-//
-//  stbi__context struct and start_xxx functions
-
-// stbi__context structure is our basic context used by all images, so it
-// contains all the IO context, plus some basic image information
-typedef struct {
-  stbi__uint32 img_x, img_y;
-  int img_n, img_out_n;
-
-  stbi_io_callbacks io;
-  void* io_user_data;
-
-  int read_from_callbacks;
-  int buflen;
-  stbi_uc buffer_start[128];
-  int callback_already_read;
-
-  stbi_uc *img_buffer, *img_buffer_end;
-  stbi_uc *img_buffer_original, *img_buffer_original_end;
-} stbi__context;
-
-static void stbi__refill_buffer(stbi__context* s);
-
-// initialize a memory-decode context
-static void stbi__start_mem(stbi__context* s, stbi_uc const* buffer, int len) {
-  s->io.read = NULL;
-  s->read_from_callbacks = 0;
-  s->callback_already_read = 0;
-  s->img_buffer = s->img_buffer_original = (stbi_uc*)buffer;
-  s->img_buffer_end = s->img_buffer_original_end = (stbi_uc*)buffer + len;
-}
-
-// initialize a callback-based context
-static void stbi__start_callbacks(stbi__context* s, stbi_io_callbacks* c, void* user) {
-  s->io = *c;
-  s->io_user_data = user;
-  s->buflen = sizeof(s->buffer_start);
-  s->read_from_callbacks = 1;
-  s->callback_already_read = 0;
-  s->img_buffer = s->img_buffer_original = s->buffer_start;
-  stbi__refill_buffer(s);
-  s->img_buffer_original_end = s->img_buffer_end;
-}
-
-#ifndef STBI_NO_STDIO
-
-static int stbi__stdio_read(void* user, char* data, int size) {
-  return (int)fread(data, 1, size, (FILE*)user);
-}
-
-static void stbi__stdio_skip(void* user, int n) {
-  int ch;
-  fseek((FILE*)user, n, SEEK_CUR);
-  ch = fgetc((FILE*)user); /* have to read a byte to reset feof()'s flag */
-  if (ch != EOF) {
-    ungetc(ch, (FILE*)user); /* push byte back onto stream if valid. */
-  }
-}
-
-static int stbi__stdio_eof(void* user) {
-  return feof((FILE*)user) || ferror((FILE*)user);
-}
-
-static stbi_io_callbacks stbi__stdio_callbacks = {
-    stbi__stdio_read,
-    stbi__stdio_skip,
-    stbi__stdio_eof,
-};
-
-static void stbi__start_file(stbi__context* s, FILE* f) {
-  stbi__start_callbacks(s, &stbi__stdio_callbacks, (void*)f);
-}
-
-// static void stop_file(stbi__context *s) { }
-
-#endif  // !STBI_NO_STDIO
-
-static void stbi__rewind(stbi__context* s) {
-  // conceptually rewind SHOULD rewind to the beginning of the stream,
-  // but we just rewind to the beginning of the initial buffer, because
-  // we only use it after doing 'test', which only ever looks at at most 92 bytes
-  s->img_buffer = s->img_buffer_original;
-  s->img_buffer_end = s->img_buffer_original_end;
-}
-
-enum { STBI_ORDER_RGB, STBI_ORDER_BGR };
-
-typedef struct {
-  int bits_per_channel;
-  int num_channels;
-  int channel_order;
-} stbi__result_info;
-
-#ifndef STBI_NO_JPEG
-static int stbi__jpeg_test(stbi__context* s);
-static void* stbi__jpeg_load(stbi__context* s,
-                             int* x,
-                             int* y,
-                             int* comp,
-                             int req_comp,
-                             stbi__result_info* ri);
-static int stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp);
-#endif
-
-#ifndef STBI_NO_PNG
-static int stbi__png_test(stbi__context* s);
-static void* stbi__png_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri);
-static int stbi__png_info(stbi__context* s, int* x, int* y, int* comp);
-static int stbi__png_is16(stbi__context* s);
-#endif
-
-#ifndef STBI_NO_BMP
-static int stbi__bmp_test(stbi__context* s);
-static void* stbi__bmp_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri);
-static int stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp);
-#endif
-
-#ifndef STBI_NO_TGA
-static int stbi__tga_test(stbi__context* s);
-static void* stbi__tga_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri);
-static int stbi__tga_info(stbi__context* s, int* x, int* y, int* comp);
-#endif
-
-#ifndef STBI_NO_PSD
-static int stbi__psd_test(stbi__context* s);
-static void* stbi__psd_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri,
-                            int bpc);
-static int stbi__psd_info(stbi__context* s, int* x, int* y, int* comp);
-static int stbi__psd_is16(stbi__context* s);
-#endif
-
-#ifndef STBI_NO_HDR
-static int stbi__hdr_test(stbi__context* s);
-static float* stbi__hdr_load(stbi__context* s,
-                             int* x,
-                             int* y,
-                             int* comp,
-                             int req_comp,
-                             stbi__result_info* ri);
-static int stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp);
-#endif
-
-#ifndef STBI_NO_PIC
-static int stbi__pic_test(stbi__context* s);
-static void* stbi__pic_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri);
-static int stbi__pic_info(stbi__context* s, int* x, int* y, int* comp);
-#endif
-
-#ifndef STBI_NO_GIF
-static int stbi__gif_test(stbi__context* s);
-static void* stbi__gif_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri);
-static void* stbi__load_gif_main(stbi__context* s,
-                                 int** delays,
-                                 int* x,
-                                 int* y,
-                                 int* z,
-                                 int* comp,
-                                 int req_comp);
-static int stbi__gif_info(stbi__context* s, int* x, int* y, int* comp);
-#endif
-
-#ifndef STBI_NO_PNM
-static int stbi__pnm_test(stbi__context* s);
-static void* stbi__pnm_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri);
-static int stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp);
-static int stbi__pnm_is16(stbi__context* s);
-#endif
-
-static
-#ifdef STBI_THREAD_LOCAL
-    STBI_THREAD_LOCAL
-#endif
-    const char* stbi__g_failure_reason;
-
-STBIDEF const char* stbi_failure_reason(void) {
-  return stbi__g_failure_reason;
-}
-
-#ifndef STBI_NO_FAILURE_STRINGS
-static int stbi__err(const char* str) {
-  stbi__g_failure_reason = str;
-  return 0;
-}
-#endif
-
-static void* stbi__malloc(size_t size) {
-  return STBI_MALLOC(size);
-}
-
-// stb_image uses ints pervasively, including for offset calculations.
-// therefore the largest decoded image size we can support with the
-// current code, even on 64-bit targets, is INT_MAX. this is not a
-// significant limitation for the intended use case.
-//
-// we do, however, need to make sure our size calculations don't
-// overflow. hence a few helper functions for size calculations that
-// multiply integers together, making sure that they're non-negative
-// and no overflow occurs.
-
-// return 1 if the sum is valid, 0 on overflow.
-// negative terms are considered invalid.
-static int stbi__addsizes_valid(int a, int b) {
-  if (b < 0)
-    return 0;
-  // now 0 <= b <= INT_MAX, hence also
-  // 0 <= INT_MAX - b <= INTMAX.
-  // And "a + b <= INT_MAX" (which might overflow) is the
-  // same as a <= INT_MAX - b (no overflow)
-  return a <= INT_MAX - b;
-}
-
-// returns 1 if the product is valid, 0 on overflow.
-// negative factors are considered invalid.
-static int stbi__mul2sizes_valid(int a, int b) {
-  if (a < 0 || b < 0)
-    return 0;
-  if (b == 0)
-    return 1;  // mul-by-0 is always safe
-  // portable way to check for no overflows in a*b
-  return a <= INT_MAX / b;
-}
-
-#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || \
-    !defined(STBI_NO_HDR)
-// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
-static int stbi__mad2sizes_valid(int a, int b, int add) {
-  return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add);
-}
-#endif
-
-// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
-static int stbi__mad3sizes_valid(int a, int b, int c, int add) {
-  return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
-         stbi__addsizes_valid(a * b * c, add);
-}
-
-// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
-static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) {
-  return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
-         stbi__mul2sizes_valid(a * b * c, d) && stbi__addsizes_valid(a * b * c * d, add);
-}
-#endif
-
-#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || \
-    !defined(STBI_NO_HDR)
-// mallocs with size overflow checking
-static void* stbi__malloc_mad2(int a, int b, int add) {
-  if (!stbi__mad2sizes_valid(a, b, add))
-    return NULL;
-  return stbi__malloc(a * b + add);
-}
-#endif
-
-static void* stbi__malloc_mad3(int a, int b, int c, int add) {
-  if (!stbi__mad3sizes_valid(a, b, c, add))
-    return NULL;
-  return stbi__malloc(a * b * c + add);
-}
-
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
-static void* stbi__malloc_mad4(int a, int b, int c, int d, int add) {
-  if (!stbi__mad4sizes_valid(a, b, c, d, add))
-    return NULL;
-  return stbi__malloc(a * b * c * d + add);
-}
-#endif
-
-// stbi__err - error
-// stbi__errpf - error returning pointer to float
-// stbi__errpuc - error returning pointer to unsigned char
-
-#ifdef STBI_NO_FAILURE_STRINGS
-#define stbi__err(x, y) 0
-#elif defined(STBI_FAILURE_USERMSG)
-#define stbi__err(x, y) stbi__err(y)
-#else
-#define stbi__err(x, y) stbi__err(x)
-#endif
-
-#define stbi__errpf(x, y) ((float*)(size_t)(stbi__err(x, y) ? NULL : NULL))
-#define stbi__errpuc(x, y) ((unsigned char*)(size_t)(stbi__err(x, y) ? NULL : NULL))
-
-STBIDEF void stbi_image_free(void* retval_from_stbi_load) {
-  STBI_FREE(retval_from_stbi_load);
-}
-
-#ifndef STBI_NO_LINEAR
-static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp);
-#endif
-
-#ifndef STBI_NO_HDR
-static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp);
-#endif
-
-static int stbi__vertically_flip_on_load_global = 0;
-
-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) {
-  stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
-}
-
-#ifndef STBI_THREAD_LOCAL
-#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global
-#else
-static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
-
-STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) {
-  stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
-  stbi__vertically_flip_on_load_set = 1;
-}
-
-#define stbi__vertically_flip_on_load                                      \
-  (stbi__vertically_flip_on_load_set ? stbi__vertically_flip_on_load_local \
-                                     : stbi__vertically_flip_on_load_global)
-#endif  // STBI_THREAD_LOCAL
-
-static void* stbi__load_main(stbi__context* s,
-                             int* x,
-                             int* y,
-                             int* comp,
-                             int req_comp,
-                             stbi__result_info* ri,
-                             int bpc) {
-  memset(ri, 0, sizeof(*ri));          // make sure it's initialized if we add new fields
-  ri->bits_per_channel = 8;            // default is 8 so most paths don't have to be changed
-  ri->channel_order = STBI_ORDER_RGB;  // all current input & output are this, but this is here so
-                                       // we can add BGR order
-  ri->num_channels = 0;
-
-// test the formats with a very explicit header first (at least a FOURCC
-// or distinctive magic number first)
-#ifndef STBI_NO_PNG
-  if (stbi__png_test(s))
-    return stbi__png_load(s, x, y, comp, req_comp, ri);
-#endif
-#ifndef STBI_NO_BMP
-  if (stbi__bmp_test(s))
-    return stbi__bmp_load(s, x, y, comp, req_comp, ri);
-#endif
-#ifndef STBI_NO_GIF
-  if (stbi__gif_test(s))
-    return stbi__gif_load(s, x, y, comp, req_comp, ri);
-#endif
-#ifndef STBI_NO_PSD
-  if (stbi__psd_test(s))
-    return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);
-#else
-  STBI_NOTUSED(bpc);
-#endif
-#ifndef STBI_NO_PIC
-  if (stbi__pic_test(s))
-    return stbi__pic_load(s, x, y, comp, req_comp, ri);
-#endif
-
-// then the formats that can end up attempting to load with just 1 or 2
-// bytes matching expectations; these are prone to false positives, so
-// try them later
-#ifndef STBI_NO_JPEG
-  if (stbi__jpeg_test(s))
-    return stbi__jpeg_load(s, x, y, comp, req_comp, ri);
-#endif
-#ifndef STBI_NO_PNM
-  if (stbi__pnm_test(s))
-    return stbi__pnm_load(s, x, y, comp, req_comp, ri);
-#endif
-
-#ifndef STBI_NO_HDR
-  if (stbi__hdr_test(s)) {
-    float* hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);
-    return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
-  }
-#endif
-
-#ifndef STBI_NO_TGA
-  // test tga last because it's a crappy test!
-  if (stbi__tga_test(s))
-    return stbi__tga_load(s, x, y, comp, req_comp, ri);
-#endif
-
-  return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
-}
-
-static stbi_uc* stbi__convert_16_to_8(stbi__uint16* orig, int w, int h, int channels) {
-  int i;
-  int img_len = w * h * channels;
-  stbi_uc* reduced;
-
-  reduced = (stbi_uc*)stbi__malloc(img_len);
-  if (reduced == NULL)
-    return stbi__errpuc("outofmem", "Out of memory");
-
-  for (i = 0; i < img_len; ++i)
-    reduced[i] =
-        (stbi_uc)((orig[i] >> 8) &
-                  0xFF);  // top half of each byte is sufficient approx of 16->8 bit scaling
-
-  STBI_FREE(orig);
-  return reduced;
-}
-
-static stbi__uint16* stbi__convert_8_to_16(stbi_uc* orig, int w, int h, int channels) {
-  int i;
-  int img_len = w * h * channels;
-  stbi__uint16* enlarged;
-
-  enlarged = (stbi__uint16*)stbi__malloc(img_len * 2);
-  if (enlarged == NULL)
-    return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");
-
-  for (i = 0; i < img_len; ++i)
-    enlarged[i] =
-        (stbi__uint16)((orig[i] << 8) +
-                       orig[i]);  // replicate to high and low byte, maps 0->0, 255->0xffff
-
-  STBI_FREE(orig);
-  return enlarged;
-}
-
-static void stbi__vertical_flip(void* image, int w, int h, int bytes_per_pixel) {
-  int row;
-  size_t bytes_per_row = (size_t)w * bytes_per_pixel;
-  stbi_uc temp[2048];
-  stbi_uc* bytes = (stbi_uc*)image;
-
-  for (row = 0; row < (h >> 1); row++) {
-    stbi_uc* row0 = bytes + row * bytes_per_row;
-    stbi_uc* row1 = bytes + (h - row - 1) * bytes_per_row;
-    // swap row0 with row1
-    size_t bytes_left = bytes_per_row;
-    while (bytes_left) {
-      size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
-      memcpy(temp, row0, bytes_copy);
-      memcpy(row0, row1, bytes_copy);
-      memcpy(row1, temp, bytes_copy);
-      row0 += bytes_copy;
-      row1 += bytes_copy;
-      bytes_left -= bytes_copy;
-    }
-  }
-}
-
-#ifndef STBI_NO_GIF
-static void stbi__vertical_flip_slices(void* image, int w, int h, int z, int bytes_per_pixel) {
-  int slice;
-  int slice_size = w * h * bytes_per_pixel;
-
-  stbi_uc* bytes = (stbi_uc*)image;
-  for (slice = 0; slice < z; ++slice) {
-    stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
-    bytes += slice_size;
-  }
-}
-#endif
-
-static unsigned char* stbi__load_and_postprocess_8bit(stbi__context* s,
-                                                      int* x,
-                                                      int* y,
-                                                      int* comp,
-                                                      int req_comp) {
-  stbi__result_info ri;
-  void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
-
-  if (result == NULL)
-    return NULL;
-
-  // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
-  STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
-
-  if (ri.bits_per_channel != 8) {
-    result = stbi__convert_16_to_8((stbi__uint16*)result, *x, *y, req_comp == 0 ? *comp : req_comp);
-    ri.bits_per_channel = 8;
-  }
-
-  // @TODO: move stbi__convert_format to here
-
-  if (stbi__vertically_flip_on_load) {
-    int channels = req_comp ? req_comp : *comp;
-    stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
-  }
-
-  return (unsigned char*)result;
-}
-
-static stbi__uint16* stbi__load_and_postprocess_16bit(stbi__context* s,
-                                                      int* x,
-                                                      int* y,
-                                                      int* comp,
-                                                      int req_comp) {
-  stbi__result_info ri;
-  void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
-
-  if (result == NULL)
-    return NULL;
-
-  // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
-  STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
-
-  if (ri.bits_per_channel != 16) {
-    result = stbi__convert_8_to_16((stbi_uc*)result, *x, *y, req_comp == 0 ? *comp : req_comp);
-    ri.bits_per_channel = 16;
-  }
-
-  // @TODO: move stbi__convert_format16 to here
-  // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
-
-  if (stbi__vertically_flip_on_load) {
-    int channels = req_comp ? req_comp : *comp;
-    stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
-  }
-
-  return (stbi__uint16*)result;
-}
-
-#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
-static void stbi__float_postprocess(float* result, int* x, int* y, int* comp, int req_comp) {
-  if (stbi__vertically_flip_on_load && result != NULL) {
-    int channels = req_comp ? req_comp : *comp;
-    stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
-  }
-}
-#endif
-
-#ifndef STBI_NO_STDIO
-
-#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
-STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp,
-                                                                    unsigned long flags,
-                                                                    const char* str,
-                                                                    int cbmb,
-                                                                    wchar_t* widestr,
-                                                                    int cchwide);
-STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp,
-                                                                    unsigned long flags,
-                                                                    const wchar_t* widestr,
-                                                                    int cchwide,
-                                                                    char* str,
-                                                                    int cbmb,
-                                                                    const char* defchar,
-                                                                    int* used_default);
-#endif
-
-#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
-STBIDEF int stbi_convert_wchar_to_utf8(char* buffer, size_t bufferlen, const wchar_t* input) {
-  return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int)bufferlen, NULL, NULL);
-}
-#endif
-
-static FILE* stbi__fopen(char const* filename, char const* mode) {
-  FILE* f;
-#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
-  wchar_t wMode[64];
-  wchar_t wFilename[1024];
-  if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename,
-                               sizeof(wFilename) / sizeof(*wFilename)))
-    return 0;
-
-  if (0 ==
-      MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode) / sizeof(*wMode)))
-    return 0;
-
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-  if (0 != _wfopen_s(&f, wFilename, wMode))
-    f = 0;
-#else
-  f = _wfopen(wFilename, wMode);
-#endif
-
-#elif defined(_MSC_VER) && _MSC_VER >= 1400
-  if (0 != fopen_s(&f, filename, mode))
-    f = 0;
-#else
-  f = fopen(filename, mode);
-#endif
-  return f;
-}
-
-STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* comp, int req_comp) {
-  FILE* f = stbi__fopen(filename, "rb");
-  unsigned char* result;
-  if (!f)
-    return stbi__errpuc("can't fopen", "Unable to open file");
-  result = stbi_load_from_file(f, x, y, comp, req_comp);
-  fclose(f);
-  return result;
-}
-
-STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* comp, int req_comp) {
-  unsigned char* result;
-  stbi__context s;
-  stbi__start_file(&s, f);
-  result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
-  if (result) {
-    // need to 'unget' all the characters in the IO buffer
-    fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
-  }
-  return result;
-}
-
-STBIDEF stbi__uint16* stbi_load_from_file_16(FILE* f, int* x, int* y, int* comp, int req_comp) {
-  stbi__uint16* result;
-  stbi__context s;
-  stbi__start_file(&s, f);
-  result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp);
-  if (result) {
-    // need to 'unget' all the characters in the IO buffer
-    fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
-  }
-  return result;
-}
-
-STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* comp, int req_comp) {
-  FILE* f = stbi__fopen(filename, "rb");
-  stbi__uint16* result;
-  if (!f)
-    return (stbi_us*)stbi__errpuc("can't fopen", "Unable to open file");
-  result = stbi_load_from_file_16(f, x, y, comp, req_comp);
-  fclose(f);
-  return result;
-}
-
-#endif  //! STBI_NO_STDIO
-
-STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer,
-                                          int len,
-                                          int* x,
-                                          int* y,
-                                          int* channels_in_file,
-                                          int desired_channels) {
-  stbi__context s;
-  stbi__start_mem(&s, buffer, len);
-  return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);
-}
-
-STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk,
-                                             void* user,
-                                             int* x,
-                                             int* y,
-                                             int* channels_in_file,
-                                             int desired_channels) {
-  stbi__context s;
-  stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
-  return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);
-}
-
-STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc const* buffer,
-                                       int len,
-                                       int* x,
-                                       int* y,
-                                       int* comp,
-                                       int req_comp) {
-  stbi__context s;
-  stbi__start_mem(&s, buffer, len);
-  return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
-}
-
-STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk,
-                                          void* user,
-                                          int* x,
-                                          int* y,
-                                          int* comp,
-                                          int req_comp) {
-  stbi__context s;
-  stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
-  return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
-}
-
-#ifndef STBI_NO_GIF
-STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer,
-                                           int len,
-                                           int** delays,
-                                           int* x,
-                                           int* y,
-                                           int* z,
-                                           int* comp,
-                                           int req_comp) {
-  unsigned char* result;
-  stbi__context s;
-  stbi__start_mem(&s, buffer, len);
-
-  result = (unsigned char*)stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
-  if (stbi__vertically_flip_on_load) {
-    stbi__vertical_flip_slices(result, *x, *y, *z, *comp);
-  }
-
-  return result;
-}
-#endif
-
-#ifndef STBI_NO_LINEAR
-static float* stbi__loadf_main(stbi__context* s, int* x, int* y, int* comp, int req_comp) {
-  unsigned char* data;
-#ifndef STBI_NO_HDR
-  if (stbi__hdr_test(s)) {
-    stbi__result_info ri;
-    float* hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);
-    if (hdr_data)
-      stbi__float_postprocess(hdr_data, x, y, comp, req_comp);
-    return hdr_data;
-  }
-#endif
-  data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
-  if (data)
-    return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
-  return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
-}
-
-STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer,
-                                      int len,
-                                      int* x,
-                                      int* y,
-                                      int* comp,
-                                      int req_comp) {
-  stbi__context s;
-  stbi__start_mem(&s, buffer, len);
-  return stbi__loadf_main(&s, x, y, comp, req_comp);
-}
-
-STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk,
-                                         void* user,
-                                         int* x,
-                                         int* y,
-                                         int* comp,
-                                         int req_comp) {
-  stbi__context s;
-  stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
-  return stbi__loadf_main(&s, x, y, comp, req_comp);
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* comp, int req_comp) {
-  float* result;
-  FILE* f = stbi__fopen(filename, "rb");
-  if (!f)
-    return stbi__errpf("can't fopen", "Unable to open file");
-  result = stbi_loadf_from_file(f, x, y, comp, req_comp);
-  fclose(f);
-  return result;
-}
-
-STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* comp, int req_comp) {
-  stbi__context s;
-  stbi__start_file(&s, f);
-  return stbi__loadf_main(&s, x, y, comp, req_comp);
-}
-#endif  // !STBI_NO_STDIO
-
-#endif  // !STBI_NO_LINEAR
-
-// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
-// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
-// reports false!
-
-STBIDEF int stbi_is_hdr_from_memory(stbi_uc const* buffer, int len) {
-#ifndef STBI_NO_HDR
-  stbi__context s;
-  stbi__start_mem(&s, buffer, len);
-  return stbi__hdr_test(&s);
-#else
-  STBI_NOTUSED(buffer);
-  STBI_NOTUSED(len);
-  return 0;
-#endif
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF int stbi_is_hdr(char const* filename) {
-  FILE* f = stbi__fopen(filename, "rb");
-  int result = 0;
-  if (f) {
-    result = stbi_is_hdr_from_file(f);
-    fclose(f);
-  }
-  return result;
-}
-
-STBIDEF int stbi_is_hdr_from_file(FILE* f) {
-#ifndef STBI_NO_HDR
-  long pos = ftell(f);
-  int res;
-  stbi__context s;
-  stbi__start_file(&s, f);
-  res = stbi__hdr_test(&s);
-  fseek(f, pos, SEEK_SET);
-  return res;
-#else
-  STBI_NOTUSED(f);
-  return 0;
-#endif
-}
-#endif  // !STBI_NO_STDIO
-
-STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user) {
-#ifndef STBI_NO_HDR
-  stbi__context s;
-  stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
-  return stbi__hdr_test(&s);
-#else
-  STBI_NOTUSED(clbk);
-  STBI_NOTUSED(user);
-  return 0;
-#endif
-}
-
-#ifndef STBI_NO_LINEAR
-static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;
-
-STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) {
-  stbi__l2h_gamma = gamma;
-}
-STBIDEF void stbi_ldr_to_hdr_scale(float scale) {
-  stbi__l2h_scale = scale;
-}
-#endif
-
-static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;
-
-STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) {
-  stbi__h2l_gamma_i = 1 / gamma;
-}
-STBIDEF void stbi_hdr_to_ldr_scale(float scale) {
-  stbi__h2l_scale_i = 1 / scale;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// Common code used by all image loaders
-//
-
-enum { STBI__SCAN_load = 0, STBI__SCAN_type, STBI__SCAN_header };
-
-static void stbi__refill_buffer(stbi__context* s) {
-  int n = (s->io.read)(s->io_user_data, (char*)s->buffer_start, s->buflen);
-  s->callback_already_read += (int)(s->img_buffer - s->img_buffer_original);
-  if (n == 0) {
-    // at end of file, treat same as if from memory, but need to handle case
-    // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
-    s->read_from_callbacks = 0;
-    s->img_buffer = s->buffer_start;
-    s->img_buffer_end = s->buffer_start + 1;
-    *s->img_buffer = 0;
-  } else {
-    s->img_buffer = s->buffer_start;
-    s->img_buffer_end = s->buffer_start + n;
-  }
-}
-
-stbi_inline static stbi_uc stbi__get8(stbi__context* s) {
-  if (s->img_buffer < s->img_buffer_end)
-    return *s->img_buffer++;
-  if (s->read_from_callbacks) {
-    stbi__refill_buffer(s);
-    return *s->img_buffer++;
-  }
-  return 0;
-}
-
-#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
-// nothing
-#else
-stbi_inline static int stbi__at_eof(stbi__context* s) {
-  if (s->io.read) {
-    if (!(s->io.eof)(s->io_user_data))
-      return 0;
-    // if feof() is true, check if buffer = end
-    // special case: we've only got the special 0 character at the end
-    if (s->read_from_callbacks == 0)
-      return 1;
-  }
-
-  return s->img_buffer >= s->img_buffer_end;
-}
-#endif
-
-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && \
-    defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
-// nothing
-#else
-static void stbi__skip(stbi__context* s, int n) {
-  if (n == 0)
-    return;  // already there!
-  if (n < 0) {
-    s->img_buffer = s->img_buffer_end;
-    return;
-  }
-  if (s->io.read) {
-    int blen = (int)(s->img_buffer_end - s->img_buffer);
-    if (blen < n) {
-      s->img_buffer = s->img_buffer_end;
-      (s->io.skip)(s->io_user_data, n - blen);
-      return;
-    }
-  }
-  s->img_buffer += n;
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
-// nothing
-#else
-static int stbi__getn(stbi__context* s, stbi_uc* buffer, int n) {
-  if (s->io.read) {
-    int blen = (int)(s->img_buffer_end - s->img_buffer);
-    if (blen < n) {
-      int res, count;
-
-      memcpy(buffer, s->img_buffer, blen);
-
-      count = (s->io.read)(s->io_user_data, (char*)buffer + blen, n - blen);
-      res = (count == (n - blen));
-      s->img_buffer = s->img_buffer_end;
-      return res;
-    }
-  }
-
-  if (s->img_buffer + n <= s->img_buffer_end) {
-    memcpy(buffer, s->img_buffer, n);
-    s->img_buffer += n;
-    return 1;
-  } else
-    return 0;
-}
-#endif
-
-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
-// nothing
-#else
-static int stbi__get16be(stbi__context* s) {
-  int z = stbi__get8(s);
-  return (z << 8) + stbi__get8(s);
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
-// nothing
-#else
-static stbi__uint32 stbi__get32be(stbi__context* s) {
-  stbi__uint32 z = stbi__get16be(s);
-  return (z << 16) + stbi__get16be(s);
-}
-#endif
-
-#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
-// nothing
-#else
-static int stbi__get16le(stbi__context* s) {
-  int z = stbi__get8(s);
-  return z + (stbi__get8(s) << 8);
-}
-#endif
-
-#ifndef STBI_NO_BMP
-static stbi__uint32 stbi__get32le(stbi__context* s) {
-  stbi__uint32 z = stbi__get16le(s);
-  z += (stbi__uint32)stbi__get16le(s) << 16;
-  return z;
-}
-#endif
-
-#define STBI__BYTECAST(x) ((stbi_uc)((x)&255))  // truncate int to byte without warnings
-
-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && \
-    defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) &&  \
-    defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
-// nothing
-#else
-//////////////////////////////////////////////////////////////////////////////
-//
-//  generic converter from built-in img_n to req_comp
-//    individual types do this automatically as much as possible (e.g. jpeg
-//    does all cases internally since it needs to colorspace convert anyway,
-//    and it never has alpha, so very few cases ). png can automatically
-//    interleave an alpha=255 channel, but falls back to this for other cases
-//
-//  assume data buffer is malloced, so malloc a new one and free that one
-//  only failure mode is malloc failing
-
-static stbi_uc stbi__compute_y(int r, int g, int b) {
-  return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8);
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && \
-    defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
-// nothing
-#else
-static unsigned char* stbi__convert_format(unsigned char* data,
-                                           int img_n,
-                                           int req_comp,
-                                           unsigned int x,
-                                           unsigned int y) {
-  int i, j;
-  unsigned char* good;
-
-  if (req_comp == img_n)
-    return data;
-  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
-
-  good = (unsigned char*)stbi__malloc_mad3(req_comp, x, y, 0);
-  if (good == NULL) {
-    STBI_FREE(data);
-    return stbi__errpuc("outofmem", "Out of memory");
-  }
-
-  for (j = 0; j < (int)y; ++j) {
-    unsigned char* src = data + j * x * img_n;
-    unsigned char* dest = good + j * x * req_comp;
-
-#define STBI__COMBO(a, b) ((a)*8 + (b))
-#define STBI__CASE(a, b)  \
-  case STBI__COMBO(a, b): \
-    for (i = x - 1; i >= 0; --i, src += a, dest += b)
-    // convert source image with img_n components to one with req_comp components;
-    // avoid switch per pixel, so use switch per scanline and massive macros
-    switch (STBI__COMBO(img_n, req_comp)) {
-      STBI__CASE(1, 2) {
-        dest[0] = src[0];
-        dest[1] = 255;
-      }
-      break;
-      STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
-      break;
-      STBI__CASE(1, 4) {
-        dest[0] = dest[1] = dest[2] = src[0];
-        dest[3] = 255;
-      }
-      break;
-      STBI__CASE(2, 1) { dest[0] = src[0]; }
-      break;
-      STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
-      break;
-      STBI__CASE(2, 4) {
-        dest[0] = dest[1] = dest[2] = src[0];
-        dest[3] = src[1];
-      }
-      break;
-      STBI__CASE(3, 4) {
-        dest[0] = src[0];
-        dest[1] = src[1];
-        dest[2] = src[2];
-        dest[3] = 255;
-      }
-      break;
-      STBI__CASE(3, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); }
-      break;
-      STBI__CASE(3, 2) {
-        dest[0] = stbi__compute_y(src[0], src[1], src[2]);
-        dest[1] = 255;
-      }
-      break;
-      STBI__CASE(4, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); }
-      break;
-      STBI__CASE(4, 2) {
-        dest[0] = stbi__compute_y(src[0], src[1], src[2]);
-        dest[1] = src[3];
-      }
-      break;
-      STBI__CASE(4, 3) {
-        dest[0] = src[0];
-        dest[1] = src[1];
-        dest[2] = src[2];
-      }
-      break;
-      default:
-        STBI_ASSERT(0);
-        STBI_FREE(data);
-        STBI_FREE(good);
-        return stbi__errpuc("unsupported", "Unsupported format conversion");
-    }
-#undef STBI__CASE
-  }
-
-  STBI_FREE(data);
-  return good;
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
-// nothing
-#else
-static stbi__uint16 stbi__compute_y_16(int r, int g, int b) {
-  return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8);
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
-// nothing
-#else
-static stbi__uint16* stbi__convert_format16(stbi__uint16* data,
-                                            int img_n,
-                                            int req_comp,
-                                            unsigned int x,
-                                            unsigned int y) {
-  int i, j;
-  stbi__uint16* good;
-
-  if (req_comp == img_n)
-    return data;
-  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
-
-  good = (stbi__uint16*)stbi__malloc(req_comp * x * y * 2);
-  if (good == NULL) {
-    STBI_FREE(data);
-    return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");
-  }
-
-  for (j = 0; j < (int)y; ++j) {
-    stbi__uint16* src = data + j * x * img_n;
-    stbi__uint16* dest = good + j * x * req_comp;
-
-#define STBI__COMBO(a, b) ((a)*8 + (b))
-#define STBI__CASE(a, b)  \
-  case STBI__COMBO(a, b): \
-    for (i = x - 1; i >= 0; --i, src += a, dest += b)
-    // convert source image with img_n components to one with req_comp components;
-    // avoid switch per pixel, so use switch per scanline and massive macros
-    switch (STBI__COMBO(img_n, req_comp)) {
-      STBI__CASE(1, 2) {
-        dest[0] = src[0];
-        dest[1] = 0xffff;
-      }
-      break;
-      STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
-      break;
-      STBI__CASE(1, 4) {
-        dest[0] = dest[1] = dest[2] = src[0];
-        dest[3] = 0xffff;
-      }
-      break;
-      STBI__CASE(2, 1) { dest[0] = src[0]; }
-      break;
-      STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; }
-      break;
-      STBI__CASE(2, 4) {
-        dest[0] = dest[1] = dest[2] = src[0];
-        dest[3] = src[1];
-      }
-      break;
-      STBI__CASE(3, 4) {
-        dest[0] = src[0];
-        dest[1] = src[1];
-        dest[2] = src[2];
-        dest[3] = 0xffff;
-      }
-      break;
-      STBI__CASE(3, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); }
-      break;
-      STBI__CASE(3, 2) {
-        dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
-        dest[1] = 0xffff;
-      }
-      break;
-      STBI__CASE(4, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); }
-      break;
-      STBI__CASE(4, 2) {
-        dest[0] = stbi__compute_y_16(src[0], src[1], src[2]);
-        dest[1] = src[3];
-      }
-      break;
-      STBI__CASE(4, 3) {
-        dest[0] = src[0];
-        dest[1] = src[1];
-        dest[2] = src[2];
-      }
-      break;
-      default:
-        STBI_ASSERT(0);
-        STBI_FREE(data);
-        STBI_FREE(good);
-        return (stbi__uint16*)stbi__errpuc("unsupported", "Unsupported format conversion");
-    }
-#undef STBI__CASE
-  }
-
-  STBI_FREE(data);
-  return good;
-}
-#endif
-
-#ifndef STBI_NO_LINEAR
-static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp) {
-  int i, k, n;
-  float* output;
-  if (!data)
-    return NULL;
-  output = (float*)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
-  if (output == NULL) {
-    STBI_FREE(data);
-    return stbi__errpf("outofmem", "Out of memory");
-  }
-  // compute number of non-alpha components
-  if (comp & 1)
-    n = comp;
-  else
-    n = comp - 1;
-  for (i = 0; i < x * y; ++i) {
-    for (k = 0; k < n; ++k) {
-      output[i * comp + k] =
-          (float)(pow(data[i * comp + k] / 255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
-    }
-  }
-  if (n < comp) {
-    for (i = 0; i < x * y; ++i) {
-      output[i * comp + n] = data[i * comp + n] / 255.0f;
-    }
-  }
-  STBI_FREE(data);
-  return output;
-}
-#endif
-
-#ifndef STBI_NO_HDR
-#define stbi__float2int(x) ((int)(x))
-static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp) {
-  int i, k, n;
-  stbi_uc* output;
-  if (!data)
-    return NULL;
-  output = (stbi_uc*)stbi__malloc_mad3(x, y, comp, 0);
-  if (output == NULL) {
-    STBI_FREE(data);
-    return stbi__errpuc("outofmem", "Out of memory");
-  }
-  // compute number of non-alpha components
-  if (comp & 1)
-    n = comp;
-  else
-    n = comp - 1;
-  for (i = 0; i < x * y; ++i) {
-    for (k = 0; k < n; ++k) {
-      float z = (float)pow(data[i * comp + k] * stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
-      if (z < 0)
-        z = 0;
-      if (z > 255)
-        z = 255;
-      output[i * comp + k] = (stbi_uc)stbi__float2int(z);
-    }
-    if (k < comp) {
-      float z = data[i * comp + k] * 255 + 0.5f;
-      if (z < 0)
-        z = 0;
-      if (z > 255)
-        z = 255;
-      output[i * comp + k] = (stbi_uc)stbi__float2int(z);
-    }
-  }
-  STBI_FREE(data);
-  return output;
-}
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-//
-//  "baseline" JPEG/JFIF decoder
-//
-//    simple implementation
-//      - doesn't support delayed output of y-dimension
-//      - simple interface (only one output format: 8-bit interleaved RGB)
-//      - doesn't try to recover corrupt jpegs
-//      - doesn't allow partial loading, loading multiple at once
-//      - still fast on x86 (copying globals into locals doesn't help x86)
-//      - allocates lots of intermediate memory (full size of all components)
-//        - non-interleaved case requires this anyway
-//        - allows good upsampling (see next)
-//    high-quality
-//      - upsampled channels are bilinearly interpolated, even across blocks
-//      - quality integer IDCT derived from IJG's 'slow'
-//    performance
-//      - fast huffman; reasonable integer IDCT
-//      - some SIMD kernels for common paths on targets with SSE2/NEON
-//      - uses a lot of intermediate memory, could cache poorly
-
-#ifndef STBI_NO_JPEG
-
-// huffman decoding acceleration
-#define FAST_BITS 9  // larger handles more cases; smaller stomps less cache
-
-typedef struct {
-  stbi_uc fast[1 << FAST_BITS];
-  // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
-  stbi__uint16 code[256];
-  stbi_uc values[256];
-  stbi_uc size[257];
-  unsigned int maxcode[18];
-  int delta[17];  // old 'firstsymbol' - old 'firstcode'
-} stbi__huffman;
-
-typedef struct {
-  stbi__context* s;
-  stbi__huffman huff_dc[4];
-  stbi__huffman huff_ac[4];
-  stbi__uint16 dequant[4][64];
-  stbi__int16 fast_ac[4][1 << FAST_BITS];
-
-  // sizes for components, interleaved MCUs
-  int img_h_max, img_v_max;
-  int img_mcu_x, img_mcu_y;
-  int img_mcu_w, img_mcu_h;
-
-  // definition of jpeg image component
-  struct {
-    int id;
-    int h, v;
-    int tq;
-    int hd, ha;
-    int dc_pred;
-
-    int x, y, w2, h2;
-    stbi_uc* data;
-    void *raw_data, *raw_coeff;
-    stbi_uc* linebuf;
-    short* coeff;          // progressive only
-    int coeff_w, coeff_h;  // number of 8x8 coefficient blocks
-  } img_comp[4];
-
-  stbi__uint32 code_buffer;  // jpeg entropy-coded buffer
-  int code_bits;             // number of valid bits
-  unsigned char marker;      // marker seen while filling entropy buffer
-  int nomore;                // flag if we saw a marker so must stop
-
-  int progressive;
-  int spec_start;
-  int spec_end;
-  int succ_high;
-  int succ_low;
-  int eob_run;
-  int jfif;
-  int app14_color_transform;  // Adobe APP14 tag
-  int rgb;
-
-  int scan_n, order[4];
-  int restart_interval, todo;
-
-  // kernels
-  void (*idct_block_kernel)(stbi_uc* out, int out_stride, short data[64]);
-  void (*YCbCr_to_RGB_kernel)(stbi_uc* out,
-                              const stbi_uc* y,
-                              const stbi_uc* pcb,
-                              const stbi_uc* pcr,
-                              int count,
-                              int step);
-  stbi_uc* (
-      *resample_row_hv_2_kernel)(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs);
-} stbi__jpeg;
-
-static int stbi__build_huffman(stbi__huffman* h, int* count) {
-  int i, j, k = 0;
-  unsigned int code;
-  // build size list for each symbol (from JPEG spec)
-  for (i = 0; i < 16; ++i)
-    for (j = 0; j < count[i]; ++j)
-      h->size[k++] = (stbi_uc)(i + 1);
-  h->size[k] = 0;
-
-  // compute actual symbols (from jpeg spec)
-  code = 0;
-  k = 0;
-  for (j = 1; j <= 16; ++j) {
-    // compute delta to add to code to compute symbol id
-    h->delta[j] = k - code;
-    if (h->size[k] == j) {
-      while (h->size[k] == j)
-        h->code[k++] = (stbi__uint16)(code++);
-      if (code - 1 >= (1u << j))
-        return stbi__err("bad code lengths", "Corrupt JPEG");
-    }
-    // compute largest code + 1 for this size, preshifted as needed later
-    h->maxcode[j] = code << (16 - j);
-    code <<= 1;
-  }
-  h->maxcode[j] = 0xffffffff;
-
-  // build non-spec acceleration table; 255 is flag for not-accelerated
-  memset(h->fast, 255, 1 << FAST_BITS);
-  for (i = 0; i < k; ++i) {
-    int s = h->size[i];
-    if (s <= FAST_BITS) {
-      int c = h->code[i] << (FAST_BITS - s);
-      int m = 1 << (FAST_BITS - s);
-      for (j = 0; j < m; ++j) {
-        h->fast[c + j] = (stbi_uc)i;
-      }
-    }
-  }
-  return 1;
-}
-
-// build a table that decodes both magnitude and value of small ACs in
-// one go.
-static void stbi__build_fast_ac(stbi__int16* fast_ac, stbi__huffman* h) {
-  int i;
-  for (i = 0; i < (1 << FAST_BITS); ++i) {
-    stbi_uc fast = h->fast[i];
-    fast_ac[i] = 0;
-    if (fast < 255) {
-      int rs = h->values[fast];
-      int run = (rs >> 4) & 15;
-      int magbits = rs & 15;
-      int len = h->size[fast];
-
-      if (magbits && len + magbits <= FAST_BITS) {
-        // magnitude code followed by receive_extend code
-        int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
-        int m = 1 << (magbits - 1);
-        if (k < m)
-          k += (~0U << magbits) + 1;
-        // if the result is small enough, we can fit it in fast_ac table
-        if (k >= -128 && k <= 127)
-          fast_ac[i] = (stbi__int16)((k * 256) + (run * 16) + (len + magbits));
-      }
-    }
-  }
-}
-
-static void stbi__grow_buffer_unsafe(stbi__jpeg* j) {
-  do {
-    unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
-    if (b == 0xff) {
-      int c = stbi__get8(j->s);
-      while (c == 0xff)
-        c = stbi__get8(j->s);  // consume fill bytes
-      if (c != 0) {
-        j->marker = (unsigned char)c;
-        j->nomore = 1;
-        return;
-      }
-    }
-    j->code_buffer |= b << (24 - j->code_bits);
-    j->code_bits += 8;
-  } while (j->code_bits <= 24);
-}
-
-// (1 << n) - 1
-static const stbi__uint32 stbi__bmask[17] = {0,   1,    3,    7,    15,   31,    63,    127,  255,
-                                             511, 1023, 2047, 4095, 8191, 16383, 32767, 65535};
-
-// decode a jpeg huffman value from the bitstream
-stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg* j, stbi__huffman* h) {
-  unsigned int temp;
-  int c, k;
-
-  if (j->code_bits < 16)
-    stbi__grow_buffer_unsafe(j);
-
-  // look at the top FAST_BITS and determine what symbol ID it is,
-  // if the code is <= FAST_BITS
-  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
-  k = h->fast[c];
-  if (k < 255) {
-    int s = h->size[k];
-    if (s > j->code_bits)
-      return -1;
-    j->code_buffer <<= s;
-    j->code_bits -= s;
-    return h->values[k];
-  }
-
-  // naive test is to shift the code_buffer down so k bits are
-  // valid, then test against maxcode. To speed this up, we've
-  // preshifted maxcode left so that it has (16-k) 0s at the
-  // end; in other words, regardless of the number of bits, it
-  // wants to be compared against something shifted to have 16;
-  // that way we don't need to shift inside the loop.
-  temp = j->code_buffer >> 16;
-  for (k = FAST_BITS + 1;; ++k)
-    if (temp < h->maxcode[k])
-      break;
-  if (k == 17) {
-    // error! code not found
-    j->code_bits -= 16;
-    return -1;
-  }
-
-  if (k > j->code_bits)
-    return -1;
-
-  // convert the huffman code to the symbol id
-  c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
-  STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
-
-  // convert the id to a symbol
-  j->code_bits -= k;
-  j->code_buffer <<= k;
-  return h->values[c];
-}
-
-// bias[n] = (-1<<n) + 1
-static const int stbi__jbias[16] = {0,    -1,   -3,    -7,    -15,   -31,   -63,    -127,
-                                    -255, -511, -1023, -2047, -4095, -8191, -16383, -32767};
-
-// combined JPEG 'receive' and JPEG 'extend', since baseline
-// always extends everything it receives.
-stbi_inline static int stbi__extend_receive(stbi__jpeg* j, int n) {
-  unsigned int k;
-  int sgn;
-  if (j->code_bits < n)
-    stbi__grow_buffer_unsafe(j);
-
-  sgn = j->code_buffer >>
-        31;  // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
-  k = stbi_lrot(j->code_buffer, n);
-  j->code_buffer = k & ~stbi__bmask[n];
-  k &= stbi__bmask[n];
-  j->code_bits -= n;
-  return k + (stbi__jbias[n] & (sgn - 1));
-}
-
-// get some unsigned bits
-stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg* j, int n) {
-  unsigned int k;
-  if (j->code_bits < n)
-    stbi__grow_buffer_unsafe(j);
-  k = stbi_lrot(j->code_buffer, n);
-  j->code_buffer = k & ~stbi__bmask[n];
-  k &= stbi__bmask[n];
-  j->code_bits -= n;
-  return k;
-}
-
-stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg* j) {
-  unsigned int k;
-  if (j->code_bits < 1)
-    stbi__grow_buffer_unsafe(j);
-  k = j->code_buffer;
-  j->code_buffer <<= 1;
-  --j->code_bits;
-  return k & 0x80000000;
-}
-
-// given a value that's at position X in the zigzag stream,
-// where does it appear in the 8x8 matrix coded as row-major?
-static const stbi_uc stbi__jpeg_dezigzag[64 + 15] = {
-    0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20,
-    13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59,
-    52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63,
-    // let corrupt input sample past end
-    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63};
-
-// decode one 64-entry block--
-static int stbi__jpeg_decode_block(stbi__jpeg* j,
-                                   short data[64],
-                                   stbi__huffman* hdc,
-                                   stbi__huffman* hac,
-                                   stbi__int16* fac,
-                                   int b,
-                                   stbi__uint16* dequant) {
-  int diff, dc, k;
-  int t;
-
-  if (j->code_bits < 16)
-    stbi__grow_buffer_unsafe(j);
-  t = stbi__jpeg_huff_decode(j, hdc);
-  if (t < 0 || t > 15)
-    return stbi__err("bad huffman code", "Corrupt JPEG");
-
-  // 0 all the ac values now so we can do it 32-bits at a time
-  memset(data, 0, 64 * sizeof(data[0]));
-
-  diff = t ? stbi__extend_receive(j, t) : 0;
-  dc = j->img_comp[b].dc_pred + diff;
-  j->img_comp[b].dc_pred = dc;
-  data[0] = (short)(dc * dequant[0]);
-
-  // decode AC components, see JPEG spec
-  k = 1;
-  do {
-    unsigned int zig;
-    int c, r, s;
-    if (j->code_bits < 16)
-      stbi__grow_buffer_unsafe(j);
-    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
-    r = fac[c];
-    if (r) {               // fast-AC path
-      k += (r >> 4) & 15;  // run
-      s = r & 15;          // combined length
-      j->code_buffer <<= s;
-      j->code_bits -= s;
-      // decode into unzigzag'd location
-      zig = stbi__jpeg_dezigzag[k++];
-      data[zig] = (short)((r >> 8) * dequant[zig]);
-    } else {
-      int rs = stbi__jpeg_huff_decode(j, hac);
-      if (rs < 0)
-        return stbi__err("bad huffman code", "Corrupt JPEG");
-      s = rs & 15;
-      r = rs >> 4;
-      if (s == 0) {
-        if (rs != 0xf0)
-          break;  // end block
-        k += 16;
-      } else {
-        k += r;
-        // decode into unzigzag'd location
-        zig = stbi__jpeg_dezigzag[k++];
-        data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);
-      }
-    }
-  } while (k < 64);
-  return 1;
-}
-
-static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg* j,
-                                           short data[64],
-                                           stbi__huffman* hdc,
-                                           int b) {
-  int diff, dc;
-  int t;
-  if (j->spec_end != 0)
-    return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-
-  if (j->code_bits < 16)
-    stbi__grow_buffer_unsafe(j);
-
-  if (j->succ_high == 0) {
-    // first scan for DC coefficient, must be first
-    memset(data, 0, 64 * sizeof(data[0]));  // 0 all the ac values now
-    t = stbi__jpeg_huff_decode(j, hdc);
-    if (t < 0 || t > 15)
-      return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-    diff = t ? stbi__extend_receive(j, t) : 0;
-
-    dc = j->img_comp[b].dc_pred + diff;
-    j->img_comp[b].dc_pred = dc;
-    data[0] = (short)(dc * (1 << j->succ_low));
-  } else {
-    // refinement scan for DC coefficient
-    if (stbi__jpeg_get_bit(j))
-      data[0] += (short)(1 << j->succ_low);
-  }
-  return 1;
-}
-
-// @OPTIMIZE: store non-zigzagged during the decode passes,
-// and only de-zigzag when dequantizing
-static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg* j,
-                                           short data[64],
-                                           stbi__huffman* hac,
-                                           stbi__int16* fac) {
-  int k;
-  if (j->spec_start == 0)
-    return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-
-  if (j->succ_high == 0) {
-    int shift = j->succ_low;
-
-    if (j->eob_run) {
-      --j->eob_run;
-      return 1;
-    }
-
-    k = j->spec_start;
-    do {
-      unsigned int zig;
-      int c, r, s;
-      if (j->code_bits < 16)
-        stbi__grow_buffer_unsafe(j);
-      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
-      r = fac[c];
-      if (r) {               // fast-AC path
-        k += (r >> 4) & 15;  // run
-        s = r & 15;          // combined length
-        j->code_buffer <<= s;
-        j->code_bits -= s;
-        zig = stbi__jpeg_dezigzag[k++];
-        data[zig] = (short)((r >> 8) * (1 << shift));
-      } else {
-        int rs = stbi__jpeg_huff_decode(j, hac);
-        if (rs < 0)
-          return stbi__err("bad huffman code", "Corrupt JPEG");
-        s = rs & 15;
-        r = rs >> 4;
-        if (s == 0) {
-          if (r < 15) {
-            j->eob_run = (1 << r);
-            if (r)
-              j->eob_run += stbi__jpeg_get_bits(j, r);
-            --j->eob_run;
-            break;
-          }
-          k += 16;
-        } else {
-          k += r;
-          zig = stbi__jpeg_dezigzag[k++];
-          data[zig] = (short)(stbi__extend_receive(j, s) * (1 << shift));
-        }
-      }
-    } while (k <= j->spec_end);
-  } else {
-    // refinement scan for these AC coefficients
-
-    short bit = (short)(1 << j->succ_low);
-
-    if (j->eob_run) {
-      --j->eob_run;
-      for (k = j->spec_start; k <= j->spec_end; ++k) {
-        short* p = &data[stbi__jpeg_dezigzag[k]];
-        if (*p != 0)
-          if (stbi__jpeg_get_bit(j))
-            if ((*p & bit) == 0) {
-              if (*p > 0)
-                *p += bit;
-              else
-                *p -= bit;
-            }
-      }
-    } else {
-      k = j->spec_start;
-      do {
-        int r, s;
-        int rs = stbi__jpeg_huff_decode(
-            j, hac);  // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
-        if (rs < 0)
-          return stbi__err("bad huffman code", "Corrupt JPEG");
-        s = rs & 15;
-        r = rs >> 4;
-        if (s == 0) {
-          if (r < 15) {
-            j->eob_run = (1 << r) - 1;
-            if (r)
-              j->eob_run += stbi__jpeg_get_bits(j, r);
-            r = 64;  // force end of block
-          } else {
-            // r=15 s=0 should write 16 0s, so we just do
-            // a run of 15 0s and then write s (which is 0),
-            // so we don't have to do anything special here
-          }
-        } else {
-          if (s != 1)
-            return stbi__err("bad huffman code", "Corrupt JPEG");
-          // sign bit
-          if (stbi__jpeg_get_bit(j))
-            s = bit;
-          else
-            s = -bit;
-        }
-
-        // advance by r
-        while (k <= j->spec_end) {
-          short* p = &data[stbi__jpeg_dezigzag[k++]];
-          if (*p != 0) {
-            if (stbi__jpeg_get_bit(j))
-              if ((*p & bit) == 0) {
-                if (*p > 0)
-                  *p += bit;
-                else
-                  *p -= bit;
-              }
-          } else {
-            if (r == 0) {
-              *p = (short)s;
-              break;
-            }
-            --r;
-          }
-        }
-      } while (k <= j->spec_end);
-    }
-  }
-  return 1;
-}
-
-// take a -128..127 value and stbi__clamp it and convert to 0..255
-stbi_inline static stbi_uc stbi__clamp(int x) {
-  // trick to use a single test to catch both cases
-  if ((unsigned int)x > 255) {
-    if (x < 0)
-      return 0;
-    if (x > 255)
-      return 255;
-  }
-  return (stbi_uc)x;
-}
-
-#define stbi__f2f(x) ((int)(((x)*4096 + 0.5)))
-#define stbi__fsh(x) ((x)*4096)
-
-// derived from jidctint -- DCT_ISLOW
-#define STBI__IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7)     \
-  int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3; \
-  p2 = s2;                                                \
-  p3 = s6;                                                \
-  p1 = (p2 + p3) * stbi__f2f(0.5411961f);                 \
-  t2 = p1 + p3 * stbi__f2f(-1.847759065f);                \
-  t3 = p1 + p2 * stbi__f2f(0.765366865f);                 \
-  p2 = s0;                                                \
-  p3 = s4;                                                \
-  t0 = stbi__fsh(p2 + p3);                                \
-  t1 = stbi__fsh(p2 - p3);                                \
-  x0 = t0 + t3;                                           \
-  x3 = t0 - t3;                                           \
-  x1 = t1 + t2;                                           \
-  x2 = t1 - t2;                                           \
-  t0 = s7;                                                \
-  t1 = s5;                                                \
-  t2 = s3;                                                \
-  t3 = s1;                                                \
-  p3 = t0 + t2;                                           \
-  p4 = t1 + t3;                                           \
-  p1 = t0 + t3;                                           \
-  p2 = t1 + t2;                                           \
-  p5 = (p3 + p4) * stbi__f2f(1.175875602f);               \
-  t0 = t0 * stbi__f2f(0.298631336f);                      \
-  t1 = t1 * stbi__f2f(2.053119869f);                      \
-  t2 = t2 * stbi__f2f(3.072711026f);                      \
-  t3 = t3 * stbi__f2f(1.501321110f);                      \
-  p1 = p5 + p1 * stbi__f2f(-0.899976223f);                \
-  p2 = p5 + p2 * stbi__f2f(-2.562915447f);                \
-  p3 = p3 * stbi__f2f(-1.961570560f);                     \
-  p4 = p4 * stbi__f2f(-0.390180644f);                     \
-  t3 += p1 + p4;                                          \
-  t2 += p2 + p3;                                          \
-  t1 += p2 + p4;                                          \
-  t0 += p1 + p3;
-
-static void stbi__idct_block(stbi_uc* out, int out_stride, short data[64]) {
-  int i, val[64], *v = val;
-  stbi_uc* o;
-  short* d = data;
-
-  // columns
-  for (i = 0; i < 8; ++i, ++d, ++v) {
-    // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
-    if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 &&
-        d[56] == 0) {
-      //    no shortcut                 0     seconds
-      //    (1|2|3|4|5|6|7)==0          0     seconds
-      //    all separate               -0.047 seconds
-      //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
-      int dcterm = d[0] * 4;
-      v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
-    } else {
-      STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
-      // constants scaled things up by 1<<12; let's bring them back
-      // down, but keep 2 extra bits of precision
-      x0 += 512;
-      x1 += 512;
-      x2 += 512;
-      x3 += 512;
-      v[0] = (x0 + t3) >> 10;
-      v[56] = (x0 - t3) >> 10;
-      v[8] = (x1 + t2) >> 10;
-      v[48] = (x1 - t2) >> 10;
-      v[16] = (x2 + t1) >> 10;
-      v[40] = (x2 - t1) >> 10;
-      v[24] = (x3 + t0) >> 10;
-      v[32] = (x3 - t0) >> 10;
-    }
-  }
-
-  for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
-    // no fast case since the first 1D IDCT spread components out
-    STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
-    // constants scaled things up by 1<<12, plus we had 1<<2 from first
-    // loop, plus horizontal and vertical each scale by sqrt(8) so together
-    // we've got an extra 1<<3, so 1<<17 total we need to remove.
-    // so we want to round that, which means adding 0.5 * 1<<17,
-    // aka 65536. Also, we'll end up with -128 to 127 that we want
-    // to encode as 0..255 by adding 128, so we'll add that before the shift
-    x0 += 65536 + (128 << 17);
-    x1 += 65536 + (128 << 17);
-    x2 += 65536 + (128 << 17);
-    x3 += 65536 + (128 << 17);
-    // tried computing the shifts into temps, or'ing the temps to see
-    // if any were out of range, but that was slower
-    o[0] = stbi__clamp((x0 + t3) >> 17);
-    o[7] = stbi__clamp((x0 - t3) >> 17);
-    o[1] = stbi__clamp((x1 + t2) >> 17);
-    o[6] = stbi__clamp((x1 - t2) >> 17);
-    o[2] = stbi__clamp((x2 + t1) >> 17);
-    o[5] = stbi__clamp((x2 - t1) >> 17);
-    o[3] = stbi__clamp((x3 + t0) >> 17);
-    o[4] = stbi__clamp((x3 - t0) >> 17);
-  }
-}
-
-#ifdef STBI_SSE2
-// sse2 integer IDCT. not the fastest possible implementation but it
-// produces bit-identical results to the generic C version so it's
-// fully "transparent".
-static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64]) {
-  // This is constructed to match our regular (generic) integer IDCT exactly.
-  __m128i row0, row1, row2, row3, row4, row5, row6, row7;
-  __m128i tmp;
-
-// dot product constant: even elems=x, odd elems=y
-#define dct_const(x, y) _mm_setr_epi16((x), (y), (x), (y), (x), (y), (x), (y))
-
-// out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
-// out(1) = c1[even]*x + c1[odd]*y
-#define dct_rot(out0, out1, x, y, c0, c1)        \
-  __m128i c0##lo = _mm_unpacklo_epi16((x), (y)); \
-  __m128i c0##hi = _mm_unpackhi_epi16((x), (y)); \
-  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
-  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
-  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
-  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
-
-// out = in << 12  (in 16-bit, out 32-bit)
-#define dct_widen(out, in)                                                            \
-  __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
-  __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
-
-// wide add
-#define dct_wadd(out, a, b)                      \
-  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
-  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
-
-// wide sub
-#define dct_wsub(out, a, b)                      \
-  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
-  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
-
-// butterfly a/b, add bias, then shift by "s" and pack
-#define dct_bfly32o(out0, out1, a, b, bias, s)                                  \
-  {                                                                             \
-    __m128i abiased_l = _mm_add_epi32(a##_l, bias);                             \
-    __m128i abiased_h = _mm_add_epi32(a##_h, bias);                             \
-    dct_wadd(sum, abiased, b);                                                  \
-    dct_wsub(dif, abiased, b);                                                  \
-    out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
-    out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
-  }
-
-// 8-bit interleave step (for transposes)
-#define dct_interleave8(a, b)  \
-  tmp = a;                     \
-  a = _mm_unpacklo_epi8(a, b); \
-  b = _mm_unpackhi_epi8(tmp, b)
-
-// 16-bit interleave step (for transposes)
-#define dct_interleave16(a, b)  \
-  tmp = a;                      \
-  a = _mm_unpacklo_epi16(a, b); \
-  b = _mm_unpackhi_epi16(tmp, b)
-
-#define dct_pass(bias, shift)                        \
-  {                                                  \
-    /* even part */                                  \
-    dct_rot(t2e, t3e, row2, row6, rot0_0, rot0_1);   \
-    __m128i sum04 = _mm_add_epi16(row0, row4);       \
-    __m128i dif04 = _mm_sub_epi16(row0, row4);       \
-    dct_widen(t0e, sum04);                           \
-    dct_widen(t1e, dif04);                           \
-    dct_wadd(x0, t0e, t3e);                          \
-    dct_wsub(x3, t0e, t3e);                          \
-    dct_wadd(x1, t1e, t2e);                          \
-    dct_wsub(x2, t1e, t2e);                          \
-    /* odd part */                                   \
-    dct_rot(y0o, y2o, row7, row3, rot2_0, rot2_1);   \
-    dct_rot(y1o, y3o, row5, row1, rot3_0, rot3_1);   \
-    __m128i sum17 = _mm_add_epi16(row1, row7);       \
-    __m128i sum35 = _mm_add_epi16(row3, row5);       \
-    dct_rot(y4o, y5o, sum17, sum35, rot1_0, rot1_1); \
-    dct_wadd(x4, y0o, y4o);                          \
-    dct_wadd(x5, y1o, y5o);                          \
-    dct_wadd(x6, y2o, y5o);                          \
-    dct_wadd(x7, y3o, y4o);                          \
-    dct_bfly32o(row0, row7, x0, x7, bias, shift);    \
-    dct_bfly32o(row1, row6, x1, x6, bias, shift);    \
-    dct_bfly32o(row2, row5, x2, x5, bias, shift);    \
-    dct_bfly32o(row3, row4, x3, x4, bias, shift);    \
-  }
-
-  __m128i rot0_0 =
-      dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
-  __m128i rot0_1 =
-      dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), stbi__f2f(0.5411961f));
-  __m128i rot1_0 =
-      dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
-  __m128i rot1_1 =
-      dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
-  __m128i rot2_0 =
-      dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), stbi__f2f(-1.961570560f));
-  __m128i rot2_1 =
-      dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));
-  __m128i rot3_0 =
-      dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), stbi__f2f(-0.390180644f));
-  __m128i rot3_1 =
-      dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));
-
-  // rounding biases in column/row passes, see stbi__idct_block for explanation.
-  __m128i bias_0 = _mm_set1_epi32(512);
-  __m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));
-
-  // load
-  row0 = _mm_load_si128((const __m128i*)(data + 0 * 8));
-  row1 = _mm_load_si128((const __m128i*)(data + 1 * 8));
-  row2 = _mm_load_si128((const __m128i*)(data + 2 * 8));
-  row3 = _mm_load_si128((const __m128i*)(data + 3 * 8));
-  row4 = _mm_load_si128((const __m128i*)(data + 4 * 8));
-  row5 = _mm_load_si128((const __m128i*)(data + 5 * 8));
-  row6 = _mm_load_si128((const __m128i*)(data + 6 * 8));
-  row7 = _mm_load_si128((const __m128i*)(data + 7 * 8));
-
-  // column pass
-  dct_pass(bias_0, 10);
-
-  {
-    // 16bit 8x8 transpose pass 1
-    dct_interleave16(row0, row4);
-    dct_interleave16(row1, row5);
-    dct_interleave16(row2, row6);
-    dct_interleave16(row3, row7);
-
-    // transpose pass 2
-    dct_interleave16(row0, row2);
-    dct_interleave16(row1, row3);
-    dct_interleave16(row4, row6);
-    dct_interleave16(row5, row7);
-
-    // transpose pass 3
-    dct_interleave16(row0, row1);
-    dct_interleave16(row2, row3);
-    dct_interleave16(row4, row5);
-    dct_interleave16(row6, row7);
-  }
-
-  // row pass
-  dct_pass(bias_1, 17);
-
-  {
-    // pack
-    __m128i p0 = _mm_packus_epi16(row0, row1);  // a0a1a2a3...a7b0b1b2b3...b7
-    __m128i p1 = _mm_packus_epi16(row2, row3);
-    __m128i p2 = _mm_packus_epi16(row4, row5);
-    __m128i p3 = _mm_packus_epi16(row6, row7);
-
-    // 8bit 8x8 transpose pass 1
-    dct_interleave8(p0, p2);  // a0e0a1e1...
-    dct_interleave8(p1, p3);  // c0g0c1g1...
-
-    // transpose pass 2
-    dct_interleave8(p0, p1);  // a0c0e0g0...
-    dct_interleave8(p2, p3);  // b0d0f0h0...
-
-    // transpose pass 3
-    dct_interleave8(p0, p2);  // a0b0c0d0...
-    dct_interleave8(p1, p3);  // a4b4c4d4...
-
-    // store
-    _mm_storel_epi64((__m128i*)out, p0);
-    out += out_stride;
-    _mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p0, 0x4e));
-    out += out_stride;
-    _mm_storel_epi64((__m128i*)out, p2);
-    out += out_stride;
-    _mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p2, 0x4e));
-    out += out_stride;
-    _mm_storel_epi64((__m128i*)out, p1);
-    out += out_stride;
-    _mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p1, 0x4e));
-    out += out_stride;
-    _mm_storel_epi64((__m128i*)out, p3);
-    out += out_stride;
-    _mm_storel_epi64((__m128i*)out, _mm_shuffle_epi32(p3, 0x4e));
-  }
-
-#undef dct_const
-#undef dct_rot
-#undef dct_widen
-#undef dct_wadd
-#undef dct_wsub
-#undef dct_bfly32o
-#undef dct_interleave8
-#undef dct_interleave16
-#undef dct_pass
-}
-
-#endif  // STBI_SSE2
-
-#ifdef STBI_NEON
-
-// NEON integer IDCT. should produce bit-identical
-// results to the generic C version.
-static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64]) {
-  int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
-
-  int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
-  int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
-  int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));
-  int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));
-  int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
-  int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
-  int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
-  int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
-  int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));
-  int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));
-  int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));
-  int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));
-
-#define dct_long_mul(out, inq, coeff)                      \
-  int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
-  int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
-
-#define dct_long_mac(out, acc, inq, coeff)                          \
-  int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
-  int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
-
-#define dct_widen(out, inq)                               \
-  int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
-  int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
-
-// wide add
-#define dct_wadd(out, a, b)                    \
-  int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
-  int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
-
-// wide sub
-#define dct_wsub(out, a, b)                    \
-  int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
-  int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
-
-// butterfly a/b, then shift using "shiftop" by "s" and pack
-#define dct_bfly32o(out0, out1, a, b, shiftop, s)              \
-  {                                                            \
-    dct_wadd(sum, a, b);                                       \
-    dct_wsub(dif, a, b);                                       \
-    out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
-    out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
-  }
-
-#define dct_pass(shiftop, shift)                     \
-  {                                                  \
-    /* even part */                                  \
-    int16x8_t sum26 = vaddq_s16(row2, row6);         \
-    dct_long_mul(p1e, sum26, rot0_0);                \
-    dct_long_mac(t2e, p1e, row6, rot0_1);            \
-    dct_long_mac(t3e, p1e, row2, rot0_2);            \
-    int16x8_t sum04 = vaddq_s16(row0, row4);         \
-    int16x8_t dif04 = vsubq_s16(row0, row4);         \
-    dct_widen(t0e, sum04);                           \
-    dct_widen(t1e, dif04);                           \
-    dct_wadd(x0, t0e, t3e);                          \
-    dct_wsub(x3, t0e, t3e);                          \
-    dct_wadd(x1, t1e, t2e);                          \
-    dct_wsub(x2, t1e, t2e);                          \
-    /* odd part */                                   \
-    int16x8_t sum15 = vaddq_s16(row1, row5);         \
-    int16x8_t sum17 = vaddq_s16(row1, row7);         \
-    int16x8_t sum35 = vaddq_s16(row3, row5);         \
-    int16x8_t sum37 = vaddq_s16(row3, row7);         \
-    int16x8_t sumodd = vaddq_s16(sum17, sum35);      \
-    dct_long_mul(p5o, sumodd, rot1_0);               \
-    dct_long_mac(p1o, p5o, sum17, rot1_1);           \
-    dct_long_mac(p2o, p5o, sum35, rot1_2);           \
-    dct_long_mul(p3o, sum37, rot2_0);                \
-    dct_long_mul(p4o, sum15, rot2_1);                \
-    dct_wadd(sump13o, p1o, p3o);                     \
-    dct_wadd(sump24o, p2o, p4o);                     \
-    dct_wadd(sump23o, p2o, p3o);                     \
-    dct_wadd(sump14o, p1o, p4o);                     \
-    dct_long_mac(x4, sump13o, row7, rot3_0);         \
-    dct_long_mac(x5, sump24o, row5, rot3_1);         \
-    dct_long_mac(x6, sump23o, row3, rot3_2);         \
-    dct_long_mac(x7, sump14o, row1, rot3_3);         \
-    dct_bfly32o(row0, row7, x0, x7, shiftop, shift); \
-    dct_bfly32o(row1, row6, x1, x6, shiftop, shift); \
-    dct_bfly32o(row2, row5, x2, x5, shiftop, shift); \
-    dct_bfly32o(row3, row4, x3, x4, shiftop, shift); \
-  }
-
-  // load
-  row0 = vld1q_s16(data + 0 * 8);
-  row1 = vld1q_s16(data + 1 * 8);
-  row2 = vld1q_s16(data + 2 * 8);
-  row3 = vld1q_s16(data + 3 * 8);
-  row4 = vld1q_s16(data + 4 * 8);
-  row5 = vld1q_s16(data + 5 * 8);
-  row6 = vld1q_s16(data + 6 * 8);
-  row7 = vld1q_s16(data + 7 * 8);
-
-  // add DC bias
-  row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
-
-  // column pass
-  dct_pass(vrshrn_n_s32, 10);
-
-  // 16bit 8x8 transpose
-  {
-// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
-// whether compilers actually get this is another story, sadly.
-#define dct_trn16(x, y)              \
-  {                                  \
-    int16x8x2_t t = vtrnq_s16(x, y); \
-    x = t.val[0];                    \
-    y = t.val[1];                    \
-  }
-#define dct_trn32(x, y)                                                            \
-  {                                                                                \
-    int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); \
-    x = vreinterpretq_s16_s32(t.val[0]);                                           \
-    y = vreinterpretq_s16_s32(t.val[1]);                                           \
-  }
-#define dct_trn64(x, y)                                     \
-  {                                                         \
-    int16x8_t x0 = x;                                       \
-    int16x8_t y0 = y;                                       \
-    x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0));   \
-    y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); \
-  }
-
-    // pass 1
-    dct_trn16(row0, row1);  // a0b0a2b2a4b4a6b6
-    dct_trn16(row2, row3);
-    dct_trn16(row4, row5);
-    dct_trn16(row6, row7);
-
-    // pass 2
-    dct_trn32(row0, row2);  // a0b0c0d0a4b4c4d4
-    dct_trn32(row1, row3);
-    dct_trn32(row4, row6);
-    dct_trn32(row5, row7);
-
-    // pass 3
-    dct_trn64(row0, row4);  // a0b0c0d0e0f0g0h0
-    dct_trn64(row1, row5);
-    dct_trn64(row2, row6);
-    dct_trn64(row3, row7);
-
-#undef dct_trn16
-#undef dct_trn32
-#undef dct_trn64
-  }
-
-  // row pass
-  // vrshrn_n_s32 only supports shifts up to 16, we need
-  // 17. so do a non-rounding shift of 16 first then follow
-  // up with a rounding shift by 1.
-  dct_pass(vshrn_n_s32, 16);
-
-  {
-    // pack and round
-    uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
-    uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
-    uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
-    uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
-    uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
-    uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
-    uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
-    uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
-
-    // again, these can translate into one instruction, but often don't.
-#define dct_trn8_8(x, y)           \
-  {                                \
-    uint8x8x2_t t = vtrn_u8(x, y); \
-    x = t.val[0];                  \
-    y = t.val[1];                  \
-  }
-#define dct_trn8_16(x, y)                                                      \
-  {                                                                            \
-    uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); \
-    x = vreinterpret_u8_u16(t.val[0]);                                         \
-    y = vreinterpret_u8_u16(t.val[1]);                                         \
-  }
-#define dct_trn8_32(x, y)                                                      \
-  {                                                                            \
-    uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); \
-    x = vreinterpret_u8_u32(t.val[0]);                                         \
-    y = vreinterpret_u8_u32(t.val[1]);                                         \
-  }
-
-    // sadly can't use interleaved stores here since we only write
-    // 8 bytes to each scan line!
-
-    // 8x8 8-bit transpose pass 1
-    dct_trn8_8(p0, p1);
-    dct_trn8_8(p2, p3);
-    dct_trn8_8(p4, p5);
-    dct_trn8_8(p6, p7);
-
-    // pass 2
-    dct_trn8_16(p0, p2);
-    dct_trn8_16(p1, p3);
-    dct_trn8_16(p4, p6);
-    dct_trn8_16(p5, p7);
-
-    // pass 3
-    dct_trn8_32(p0, p4);
-    dct_trn8_32(p1, p5);
-    dct_trn8_32(p2, p6);
-    dct_trn8_32(p3, p7);
-
-    // store
-    vst1_u8(out, p0);
-    out += out_stride;
-    vst1_u8(out, p1);
-    out += out_stride;
-    vst1_u8(out, p2);
-    out += out_stride;
-    vst1_u8(out, p3);
-    out += out_stride;
-    vst1_u8(out, p4);
-    out += out_stride;
-    vst1_u8(out, p5);
-    out += out_stride;
-    vst1_u8(out, p6);
-    out += out_stride;
-    vst1_u8(out, p7);
-
-#undef dct_trn8_8
-#undef dct_trn8_16
-#undef dct_trn8_32
-  }
-
-#undef dct_long_mul
-#undef dct_long_mac
-#undef dct_widen
-#undef dct_wadd
-#undef dct_wsub
-#undef dct_bfly32o
-#undef dct_pass
-}
-
-#endif  // STBI_NEON
-
-#define STBI__MARKER_none 0xff
-// if there's a pending marker from the entropy stream, return that
-// otherwise, fetch from the stream and get a marker. if there's no
-// marker, return 0xff, which is never a valid marker value
-static stbi_uc stbi__get_marker(stbi__jpeg* j) {
-  stbi_uc x;
-  if (j->marker != STBI__MARKER_none) {
-    x = j->marker;
-    j->marker = STBI__MARKER_none;
-    return x;
-  }
-  x = stbi__get8(j->s);
-  if (x != 0xff)
-    return STBI__MARKER_none;
-  while (x == 0xff)
-    x = stbi__get8(j->s);  // consume repeated 0xff fill bytes
-  return x;
-}
-
-// in each scan, we'll have scan_n components, and the order
-// of the components is specified by order[]
-#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
-
-// after a restart interval, stbi__jpeg_reset the entropy decoder and
-// the dc prediction
-static void stbi__jpeg_reset(stbi__jpeg* j) {
-  j->code_bits = 0;
-  j->code_buffer = 0;
-  j->nomore = 0;
-  j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred =
-      j->img_comp[3].dc_pred = 0;
-  j->marker = STBI__MARKER_none;
-  j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
-  j->eob_run = 0;
-  // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
-  // since we don't even allow 1<<30 pixels
-}
-
-static int stbi__parse_entropy_coded_data(stbi__jpeg* z) {
-  stbi__jpeg_reset(z);
-  if (!z->progressive) {
-    if (z->scan_n == 1) {
-      int i, j;
-      STBI_SIMD_ALIGN(short, data[64]);
-      int n = z->order[0];
-      // non-interleaved data, we just need to process one block at a time,
-      // in trivial scanline order
-      // number of blocks to do just depends on how many actual "pixels" this
-      // component has, independent of interleaved MCU blocking and such
-      int w = (z->img_comp[n].x + 7) >> 3;
-      int h = (z->img_comp[n].y + 7) >> 3;
-      for (j = 0; j < h; ++j) {
-        for (i = 0; i < w; ++i) {
-          int ha = z->img_comp[n].ha;
-          if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha,
-                                       z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq]))
-            return 0;
-          z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8,
-                               z->img_comp[n].w2, data);
-          // every data block is an MCU, so countdown the restart interval
-          if (--z->todo <= 0) {
-            if (z->code_bits < 24)
-              stbi__grow_buffer_unsafe(z);
-            // if it's NOT a restart, then just bail, so we get corrupt data
-            // rather than no data
-            if (!STBI__RESTART(z->marker))
-              return 1;
-            stbi__jpeg_reset(z);
-          }
-        }
-      }
-      return 1;
-    } else {  // interleaved
-      int i, j, k, x, y;
-      STBI_SIMD_ALIGN(short, data[64]);
-      for (j = 0; j < z->img_mcu_y; ++j) {
-        for (i = 0; i < z->img_mcu_x; ++i) {
-          // scan an interleaved mcu... process scan_n components in order
-          for (k = 0; k < z->scan_n; ++k) {
-            int n = z->order[k];
-            // scan out an mcu's worth of this component; that's just determined
-            // by the basic H and V specified for the component
-            for (y = 0; y < z->img_comp[n].v; ++y) {
-              for (x = 0; x < z->img_comp[n].h; ++x) {
-                int x2 = (i * z->img_comp[n].h + x) * 8;
-                int y2 = (j * z->img_comp[n].v + y) * 8;
-                int ha = z->img_comp[n].ha;
-                if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd,
-                                             z->huff_ac + ha, z->fast_ac[ha], n,
-                                             z->dequant[z->img_comp[n].tq]))
-                  return 0;
-                z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2,
-                                     z->img_comp[n].w2, data);
-              }
-            }
-          }
-          // after all interleaved components, that's an interleaved MCU,
-          // so now count down the restart interval
-          if (--z->todo <= 0) {
-            if (z->code_bits < 24)
-              stbi__grow_buffer_unsafe(z);
-            if (!STBI__RESTART(z->marker))
-              return 1;
-            stbi__jpeg_reset(z);
-          }
-        }
-      }
-      return 1;
-    }
-  } else {
-    if (z->scan_n == 1) {
-      int i, j;
-      int n = z->order[0];
-      // non-interleaved data, we just need to process one block at a time,
-      // in trivial scanline order
-      // number of blocks to do just depends on how many actual "pixels" this
-      // component has, independent of interleaved MCU blocking and such
-      int w = (z->img_comp[n].x + 7) >> 3;
-      int h = (z->img_comp[n].y + 7) >> 3;
-      for (j = 0; j < h; ++j) {
-        for (i = 0; i < w; ++i) {
-          short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
-          if (z->spec_start == 0) {
-            if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
-              return 0;
-          } else {
-            int ha = z->img_comp[n].ha;
-            if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
-              return 0;
-          }
-          // every data block is an MCU, so countdown the restart interval
-          if (--z->todo <= 0) {
-            if (z->code_bits < 24)
-              stbi__grow_buffer_unsafe(z);
-            if (!STBI__RESTART(z->marker))
-              return 1;
-            stbi__jpeg_reset(z);
-          }
-        }
-      }
-      return 1;
-    } else {  // interleaved
-      int i, j, k, x, y;
-      for (j = 0; j < z->img_mcu_y; ++j) {
-        for (i = 0; i < z->img_mcu_x; ++i) {
-          // scan an interleaved mcu... process scan_n components in order
-          for (k = 0; k < z->scan_n; ++k) {
-            int n = z->order[k];
-            // scan out an mcu's worth of this component; that's just determined
-            // by the basic H and V specified for the component
-            for (y = 0; y < z->img_comp[n].v; ++y) {
-              for (x = 0; x < z->img_comp[n].h; ++x) {
-                int x2 = (i * z->img_comp[n].h + x);
-                int y2 = (j * z->img_comp[n].v + y);
-                short* data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
-                if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
-                  return 0;
-              }
-            }
-          }
-          // after all interleaved components, that's an interleaved MCU,
-          // so now count down the restart interval
-          if (--z->todo <= 0) {
-            if (z->code_bits < 24)
-              stbi__grow_buffer_unsafe(z);
-            if (!STBI__RESTART(z->marker))
-              return 1;
-            stbi__jpeg_reset(z);
-          }
-        }
-      }
-      return 1;
-    }
-  }
-}
-
-static void stbi__jpeg_dequantize(short* data, stbi__uint16* dequant) {
-  int i;
-  for (i = 0; i < 64; ++i)
-    data[i] *= dequant[i];
-}
-
-static void stbi__jpeg_finish(stbi__jpeg* z) {
-  if (z->progressive) {
-    // dequantize and idct the data
-    int i, j, n;
-    for (n = 0; n < z->s->img_n; ++n) {
-      int w = (z->img_comp[n].x + 7) >> 3;
-      int h = (z->img_comp[n].y + 7) >> 3;
-      for (j = 0; j < h; ++j) {
-        for (i = 0; i < w; ++i) {
-          short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
-          stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
-          z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8,
-                               z->img_comp[n].w2, data);
-        }
-      }
-    }
-  }
-}
-
-static int stbi__process_marker(stbi__jpeg* z, int m) {
-  int L;
-  switch (m) {
-    case STBI__MARKER_none:  // no marker found
-      return stbi__err("expected marker", "Corrupt JPEG");
-
-    case 0xDD:  // DRI - specify restart interval
-      if (stbi__get16be(z->s) != 4)
-        return stbi__err("bad DRI len", "Corrupt JPEG");
-      z->restart_interval = stbi__get16be(z->s);
-      return 1;
-
-    case 0xDB:  // DQT - define quantization table
-      L = stbi__get16be(z->s) - 2;
-      while (L > 0) {
-        int q = stbi__get8(z->s);
-        int p = q >> 4, sixteen = (p != 0);
-        int t = q & 15, i;
-        if (p != 0 && p != 1)
-          return stbi__err("bad DQT type", "Corrupt JPEG");
-        if (t > 3)
-          return stbi__err("bad DQT table", "Corrupt JPEG");
-
-        for (i = 0; i < 64; ++i)
-          z->dequant[t][stbi__jpeg_dezigzag[i]] =
-              (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
-        L -= (sixteen ? 129 : 65);
-      }
-      return L == 0;
-
-    case 0xC4:  // DHT - define huffman table
-      L = stbi__get16be(z->s) - 2;
-      while (L > 0) {
-        stbi_uc* v;
-        int sizes[16], i, n = 0;
-        int q = stbi__get8(z->s);
-        int tc = q >> 4;
-        int th = q & 15;
-        if (tc > 1 || th > 3)
-          return stbi__err("bad DHT header", "Corrupt JPEG");
-        for (i = 0; i < 16; ++i) {
-          sizes[i] = stbi__get8(z->s);
-          n += sizes[i];
-        }
-        L -= 17;
-        if (tc == 0) {
-          if (!stbi__build_huffman(z->huff_dc + th, sizes))
-            return 0;
-          v = z->huff_dc[th].values;
-        } else {
-          if (!stbi__build_huffman(z->huff_ac + th, sizes))
-            return 0;
-          v = z->huff_ac[th].values;
-        }
-        for (i = 0; i < n; ++i)
-          v[i] = stbi__get8(z->s);
-        if (tc != 0)
-          stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
-        L -= n;
-      }
-      return L == 0;
-  }
-
-  // check for comment block or APP blocks
-  if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
-    L = stbi__get16be(z->s);
-    if (L < 2) {
-      if (m == 0xFE)
-        return stbi__err("bad COM len", "Corrupt JPEG");
-      else
-        return stbi__err("bad APP len", "Corrupt JPEG");
-    }
-    L -= 2;
-
-    if (m == 0xE0 && L >= 5) {  // JFIF APP0 segment
-      static const unsigned char tag[5] = {'J', 'F', 'I', 'F', '\0'};
-      int ok = 1;
-      int i;
-      for (i = 0; i < 5; ++i)
-        if (stbi__get8(z->s) != tag[i])
-          ok = 0;
-      L -= 5;
-      if (ok)
-        z->jfif = 1;
-    } else if (m == 0xEE && L >= 12) {  // Adobe APP14 segment
-      static const unsigned char tag[6] = {'A', 'd', 'o', 'b', 'e', '\0'};
-      int ok = 1;
-      int i;
-      for (i = 0; i < 6; ++i)
-        if (stbi__get8(z->s) != tag[i])
-          ok = 0;
-      L -= 6;
-      if (ok) {
-        stbi__get8(z->s);                             // version
-        stbi__get16be(z->s);                          // flags0
-        stbi__get16be(z->s);                          // flags1
-        z->app14_color_transform = stbi__get8(z->s);  // color transform
-        L -= 6;
-      }
-    }
-
-    stbi__skip(z->s, L);
-    return 1;
-  }
-
-  return stbi__err("unknown marker", "Corrupt JPEG");
-}
-
-// after we see SOS
-static int stbi__process_scan_header(stbi__jpeg* z) {
-  int i;
-  int Ls = stbi__get16be(z->s);
-  z->scan_n = stbi__get8(z->s);
-  if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n)
-    return stbi__err("bad SOS component count", "Corrupt JPEG");
-  if (Ls != 6 + 2 * z->scan_n)
-    return stbi__err("bad SOS len", "Corrupt JPEG");
-  for (i = 0; i < z->scan_n; ++i) {
-    int id = stbi__get8(z->s), which;
-    int q = stbi__get8(z->s);
-    for (which = 0; which < z->s->img_n; ++which)
-      if (z->img_comp[which].id == id)
-        break;
-    if (which == z->s->img_n)
-      return 0;  // no match
-    z->img_comp[which].hd = q >> 4;
-    if (z->img_comp[which].hd > 3)
-      return stbi__err("bad DC huff", "Corrupt JPEG");
-    z->img_comp[which].ha = q & 15;
-    if (z->img_comp[which].ha > 3)
-      return stbi__err("bad AC huff", "Corrupt JPEG");
-    z->order[i] = which;
-  }
-
-  {
-    int aa;
-    z->spec_start = stbi__get8(z->s);
-    z->spec_end = stbi__get8(z->s);  // should be 63, but might be 0
-    aa = stbi__get8(z->s);
-    z->succ_high = (aa >> 4);
-    z->succ_low = (aa & 15);
-    if (z->progressive) {
-      if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end ||
-          z->succ_high > 13 || z->succ_low > 13)
-        return stbi__err("bad SOS", "Corrupt JPEG");
-    } else {
-      if (z->spec_start != 0)
-        return stbi__err("bad SOS", "Corrupt JPEG");
-      if (z->succ_high != 0 || z->succ_low != 0)
-        return stbi__err("bad SOS", "Corrupt JPEG");
-      z->spec_end = 63;
-    }
-  }
-
-  return 1;
-}
-
-static int stbi__free_jpeg_components(stbi__jpeg* z, int ncomp, int why) {
-  int i;
-  for (i = 0; i < ncomp; ++i) {
-    if (z->img_comp[i].raw_data) {
-      STBI_FREE(z->img_comp[i].raw_data);
-      z->img_comp[i].raw_data = NULL;
-      z->img_comp[i].data = NULL;
-    }
-    if (z->img_comp[i].raw_coeff) {
-      STBI_FREE(z->img_comp[i].raw_coeff);
-      z->img_comp[i].raw_coeff = 0;
-      z->img_comp[i].coeff = 0;
-    }
-    if (z->img_comp[i].linebuf) {
-      STBI_FREE(z->img_comp[i].linebuf);
-      z->img_comp[i].linebuf = NULL;
-    }
-  }
-  return why;
-}
-
-static int stbi__process_frame_header(stbi__jpeg* z, int scan) {
-  stbi__context* s = z->s;
-  int Lf, p, i, q, h_max = 1, v_max = 1, c;
-  Lf = stbi__get16be(s);
-  if (Lf < 11)
-    return stbi__err("bad SOF len", "Corrupt JPEG");  // JPEG
-  p = stbi__get8(s);
-  if (p != 8)
-    return stbi__err("only 8-bit", "JPEG format not supported: 8-bit only");  // JPEG baseline
-  s->img_y = stbi__get16be(s);
-  if (s->img_y == 0)
-    return stbi__err("no header height",
-                     "JPEG format not supported: delayed height");  // Legal, but we don't handle
-                                                                    // it--but neither does IJG
-  s->img_x = stbi__get16be(s);
-  if (s->img_x == 0)
-    return stbi__err("0 width", "Corrupt JPEG");  // JPEG requires
-  if (s->img_y > STBI_MAX_DIMENSIONS)
-    return stbi__err("too large", "Very large image (corrupt?)");
-  if (s->img_x > STBI_MAX_DIMENSIONS)
-    return stbi__err("too large", "Very large image (corrupt?)");
-  c = stbi__get8(s);
-  if (c != 3 && c != 1 && c != 4)
-    return stbi__err("bad component count", "Corrupt JPEG");
-  s->img_n = c;
-  for (i = 0; i < c; ++i) {
-    z->img_comp[i].data = NULL;
-    z->img_comp[i].linebuf = NULL;
-  }
-
-  if (Lf != 8 + 3 * s->img_n)
-    return stbi__err("bad SOF len", "Corrupt JPEG");
-
-  z->rgb = 0;
-  for (i = 0; i < s->img_n; ++i) {
-    static const unsigned char rgb[3] = {'R', 'G', 'B'};
-    z->img_comp[i].id = stbi__get8(s);
-    if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
-      ++z->rgb;
-    q = stbi__get8(s);
-    z->img_comp[i].h = (q >> 4);
-    if (!z->img_comp[i].h || z->img_comp[i].h > 4)
-      return stbi__err("bad H", "Corrupt JPEG");
-    z->img_comp[i].v = q & 15;
-    if (!z->img_comp[i].v || z->img_comp[i].v > 4)
-      return stbi__err("bad V", "Corrupt JPEG");
-    z->img_comp[i].tq = stbi__get8(s);
-    if (z->img_comp[i].tq > 3)
-      return stbi__err("bad TQ", "Corrupt JPEG");
-  }
-
-  if (scan != STBI__SCAN_load)
-    return 1;
-
-  if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0))
-    return stbi__err("too large", "Image too large to decode");
-
-  for (i = 0; i < s->img_n; ++i) {
-    if (z->img_comp[i].h > h_max)
-      h_max = z->img_comp[i].h;
-    if (z->img_comp[i].v > v_max)
-      v_max = z->img_comp[i].v;
-  }
-
-  // check that plane subsampling factors are integer ratios; our resamplers can't deal with
-  // fractional ratios and I've never seen a non-corrupted JPEG file actually use them
-  for (i = 0; i < s->img_n; ++i) {
-    if (h_max % z->img_comp[i].h != 0)
-      return stbi__err("bad H", "Corrupt JPEG");
-    if (v_max % z->img_comp[i].v != 0)
-      return stbi__err("bad V", "Corrupt JPEG");
-  }
-
-  // compute interleaved mcu info
-  z->img_h_max = h_max;
-  z->img_v_max = v_max;
-  z->img_mcu_w = h_max * 8;
-  z->img_mcu_h = v_max * 8;
-  // these sizes can't be more than 17 bits
-  z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
-  z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
-
-  for (i = 0; i < s->img_n; ++i) {
-    // number of effective pixels (e.g. for non-interleaved MCU)
-    z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
-    z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
-    // to simplify generation, we'll allocate enough memory to decode
-    // the bogus oversized data from using interleaved MCUs and their
-    // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
-    // discard the extra data until colorspace conversion
-    //
-    // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
-    // so these muls can't overflow with 32-bit ints (which we require)
-    z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
-    z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
-    z->img_comp[i].coeff = 0;
-    z->img_comp[i].raw_coeff = 0;
-    z->img_comp[i].linebuf = NULL;
-    z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
-    if (z->img_comp[i].raw_data == NULL)
-      return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
-    // align blocks for idct using mmx/sse
-    z->img_comp[i].data = (stbi_uc*)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
-    if (z->progressive) {
-      // w2, h2 are multiples of 8 (see above)
-      z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
-      z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
-      z->img_comp[i].raw_coeff =
-          stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
-      if (z->img_comp[i].raw_coeff == NULL)
-        return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
-      z->img_comp[i].coeff = (short*)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);
-    }
-  }
-
-  return 1;
-}
-
-// use comparisons since in some cases we handle more than one case (e.g. SOF)
-#define stbi__DNL(x) ((x) == 0xdc)
-#define stbi__SOI(x) ((x) == 0xd8)
-#define stbi__EOI(x) ((x) == 0xd9)
-#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
-#define stbi__SOS(x) ((x) == 0xda)
-
-#define stbi__SOF_progressive(x) ((x) == 0xc2)
-
-static int stbi__decode_jpeg_header(stbi__jpeg* z, int scan) {
-  int m;
-  z->jfif = 0;
-  z->app14_color_transform = -1;  // valid values are 0,1,2
-  z->marker = STBI__MARKER_none;  // initialize cached marker to empty
-  m = stbi__get_marker(z);
-  if (!stbi__SOI(m))
-    return stbi__err("no SOI", "Corrupt JPEG");
-  if (scan == STBI__SCAN_type)
-    return 1;
-  m = stbi__get_marker(z);
-  while (!stbi__SOF(m)) {
-    if (!stbi__process_marker(z, m))
-      return 0;
-    m = stbi__get_marker(z);
-    while (m == STBI__MARKER_none) {
-      // some files have extra padding after their blocks, so ok, we'll scan
-      if (stbi__at_eof(z->s))
-        return stbi__err("no SOF", "Corrupt JPEG");
-      m = stbi__get_marker(z);
-    }
-  }
-  z->progressive = stbi__SOF_progressive(m);
-  if (!stbi__process_frame_header(z, scan))
-    return 0;
-  return 1;
-}
-
-// decode image to YCbCr format
-static int stbi__decode_jpeg_image(stbi__jpeg* j) {
-  int m;
-  for (m = 0; m < 4; m++) {
-    j->img_comp[m].raw_data = NULL;
-    j->img_comp[m].raw_coeff = NULL;
-  }
-  j->restart_interval = 0;
-  if (!stbi__decode_jpeg_header(j, STBI__SCAN_load))
-    return 0;
-  m = stbi__get_marker(j);
-  while (!stbi__EOI(m)) {
-    if (stbi__SOS(m)) {
-      if (!stbi__process_scan_header(j))
-        return 0;
-      if (!stbi__parse_entropy_coded_data(j))
-        return 0;
-      if (j->marker == STBI__MARKER_none) {
-        // handle 0s at the end of image data from IP Kamera 9060
-        while (!stbi__at_eof(j->s)) {
-          int x = stbi__get8(j->s);
-          if (x == 255) {
-            j->marker = stbi__get8(j->s);
-            break;
-          }
-        }
-        // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll
-        // eventually return 0
-      }
-    } else if (stbi__DNL(m)) {
-      int Ld = stbi__get16be(j->s);
-      stbi__uint32 NL = stbi__get16be(j->s);
-      if (Ld != 4)
-        return stbi__err("bad DNL len", "Corrupt JPEG");
-      if (NL != j->s->img_y)
-        return stbi__err("bad DNL height", "Corrupt JPEG");
-    } else {
-      if (!stbi__process_marker(j, m))
-        return 0;
-    }
-    m = stbi__get_marker(j);
-  }
-  if (j->progressive)
-    stbi__jpeg_finish(j);
-  return 1;
-}
-
-// static jfif-centered resampling (across block boundaries)
-
-typedef stbi_uc* (*resample_row_func)(stbi_uc* out, stbi_uc* in0, stbi_uc* in1, int w, int hs);
-
-#define stbi__div4(x) ((stbi_uc)((x) >> 2))
-
-static stbi_uc* resample_row_1(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs) {
-  STBI_NOTUSED(out);
-  STBI_NOTUSED(in_far);
-  STBI_NOTUSED(w);
-  STBI_NOTUSED(hs);
-  return in_near;
-}
-
-static stbi_uc* stbi__resample_row_v_2(stbi_uc* out,
-                                       stbi_uc* in_near,
-                                       stbi_uc* in_far,
-                                       int w,
-                                       int hs) {
-  // need to generate two samples vertically for every one in input
-  int i;
-  STBI_NOTUSED(hs);
-  for (i = 0; i < w; ++i)
-    out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);
-  return out;
-}
-
-static stbi_uc* stbi__resample_row_h_2(stbi_uc* out,
-                                       stbi_uc* in_near,
-                                       stbi_uc* in_far,
-                                       int w,
-                                       int hs) {
-  // need to generate two samples horizontally for every one in input
-  int i;
-  stbi_uc* input = in_near;
-
-  if (w == 1) {
-    // if only one sample, can't do any interpolation
-    out[0] = out[1] = input[0];
-    return out;
-  }
-
-  out[0] = input[0];
-  out[1] = stbi__div4(input[0] * 3 + input[1] + 2);
-  for (i = 1; i < w - 1; ++i) {
-    int n = 3 * input[i] + 2;
-    out[i * 2 + 0] = stbi__div4(n + input[i - 1]);
-    out[i * 2 + 1] = stbi__div4(n + input[i + 1]);
-  }
-  out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);
-  out[i * 2 + 1] = input[w - 1];
-
-  STBI_NOTUSED(in_far);
-  STBI_NOTUSED(hs);
-
-  return out;
-}
-
-#define stbi__div16(x) ((stbi_uc)((x) >> 4))
-
-static stbi_uc* stbi__resample_row_hv_2(stbi_uc* out,
-                                        stbi_uc* in_near,
-                                        stbi_uc* in_far,
-                                        int w,
-                                        int hs) {
-  // need to generate 2x2 samples for every one in input
-  int i, t0, t1;
-  if (w == 1) {
-    out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
-    return out;
-  }
-
-  t1 = 3 * in_near[0] + in_far[0];
-  out[0] = stbi__div4(t1 + 2);
-  for (i = 1; i < w; ++i) {
-    t0 = t1;
-    t1 = 3 * in_near[i] + in_far[i];
-    out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
-    out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
-  }
-  out[w * 2 - 1] = stbi__div4(t1 + 2);
-
-  STBI_NOTUSED(hs);
-
-  return out;
-}
-
-#if defined(STBI_SSE2) || defined(STBI_NEON)
-static stbi_uc* stbi__resample_row_hv_2_simd(stbi_uc* out,
-                                             stbi_uc* in_near,
-                                             stbi_uc* in_far,
-                                             int w,
-                                             int hs) {
-  // need to generate 2x2 samples for every one in input
-  int i = 0, t0, t1;
-
-  if (w == 1) {
-    out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
-    return out;
-  }
-
-  t1 = 3 * in_near[0] + in_far[0];
-  // process groups of 8 pixels for as long as we can.
-  // note we can't handle the last pixel in a row in this loop
-  // because we need to handle the filter boundary conditions.
-  for (; i < ((w - 1) & ~7); i += 8) {
-#if defined(STBI_SSE2)
-    // load and perform the vertical filtering pass
-    // this uses 3*x + y = 4*x + (y - x)
-    __m128i zero = _mm_setzero_si128();
-    __m128i farb = _mm_loadl_epi64((__m128i*)(in_far + i));
-    __m128i nearb = _mm_loadl_epi64((__m128i*)(in_near + i));
-    __m128i farw = _mm_unpacklo_epi8(farb, zero);
-    __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
-    __m128i diff = _mm_sub_epi16(farw, nearw);
-    __m128i nears = _mm_slli_epi16(nearw, 2);
-    __m128i curr = _mm_add_epi16(nears, diff);  // current row
-
-    // horizontal filter works the same based on shifted vers of current
-    // row. "prev" is current row shifted right by 1 pixel; we need to
-    // insert the previous pixel value (from t1).
-    // "next" is current row shifted left by 1 pixel, with first pixel
-    // of next block of 8 pixels added in.
-    __m128i prv0 = _mm_slli_si128(curr, 2);
-    __m128i nxt0 = _mm_srli_si128(curr, 2);
-    __m128i prev = _mm_insert_epi16(prv0, t1, 0);
-    __m128i next = _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);
-
-    // horizontal filter, polyphase implementation since it's convenient:
-    // even pixels = 3*cur + prev = cur*4 + (prev - cur)
-    // odd  pixels = 3*cur + next = cur*4 + (next - cur)
-    // note the shared term.
-    __m128i bias = _mm_set1_epi16(8);
-    __m128i curs = _mm_slli_epi16(curr, 2);
-    __m128i prvd = _mm_sub_epi16(prev, curr);
-    __m128i nxtd = _mm_sub_epi16(next, curr);
-    __m128i curb = _mm_add_epi16(curs, bias);
-    __m128i even = _mm_add_epi16(prvd, curb);
-    __m128i odd = _mm_add_epi16(nxtd, curb);
-
-    // interleave even and odd pixels, then undo scaling.
-    __m128i int0 = _mm_unpacklo_epi16(even, odd);
-    __m128i int1 = _mm_unpackhi_epi16(even, odd);
-    __m128i de0 = _mm_srli_epi16(int0, 4);
-    __m128i de1 = _mm_srli_epi16(int1, 4);
-
-    // pack and write output
-    __m128i outv = _mm_packus_epi16(de0, de1);
-    _mm_storeu_si128((__m128i*)(out + i * 2), outv);
-#elif defined(STBI_NEON)
-    // load and perform the vertical filtering pass
-    // this uses 3*x + y = 4*x + (y - x)
-    uint8x8_t farb = vld1_u8(in_far + i);
-    uint8x8_t nearb = vld1_u8(in_near + i);
-    int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
-    int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
-    int16x8_t curr = vaddq_s16(nears, diff);  // current row
-
-    // horizontal filter works the same based on shifted vers of current
-    // row. "prev" is current row shifted right by 1 pixel; we need to
-    // insert the previous pixel value (from t1).
-    // "next" is current row shifted left by 1 pixel, with first pixel
-    // of next block of 8 pixels added in.
-    int16x8_t prv0 = vextq_s16(curr, curr, 7);
-    int16x8_t nxt0 = vextq_s16(curr, curr, 1);
-    int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
-    int16x8_t next = vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);
-
-    // horizontal filter, polyphase implementation since it's convenient:
-    // even pixels = 3*cur + prev = cur*4 + (prev - cur)
-    // odd  pixels = 3*cur + next = cur*4 + (next - cur)
-    // note the shared term.
-    int16x8_t curs = vshlq_n_s16(curr, 2);
-    int16x8_t prvd = vsubq_s16(prev, curr);
-    int16x8_t nxtd = vsubq_s16(next, curr);
-    int16x8_t even = vaddq_s16(curs, prvd);
-    int16x8_t odd = vaddq_s16(curs, nxtd);
-
-    // undo scaling and round, then store with even/odd phases interleaved
-    uint8x8x2_t o;
-    o.val[0] = vqrshrun_n_s16(even, 4);
-    o.val[1] = vqrshrun_n_s16(odd, 4);
-    vst2_u8(out + i * 2, o);
-#endif
-
-    // "previous" value for next iter
-    t1 = 3 * in_near[i + 7] + in_far[i + 7];
-  }
-
-  t0 = t1;
-  t1 = 3 * in_near[i] + in_far[i];
-  out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
-
-  for (++i; i < w; ++i) {
-    t0 = t1;
-    t1 = 3 * in_near[i] + in_far[i];
-    out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
-    out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
-  }
-  out[w * 2 - 1] = stbi__div4(t1 + 2);
-
-  STBI_NOTUSED(hs);
-
-  return out;
-}
-#endif
-
-static stbi_uc* stbi__resample_row_generic(stbi_uc* out,
-                                           stbi_uc* in_near,
-                                           stbi_uc* in_far,
-                                           int w,
-                                           int hs) {
-  // resample with nearest-neighbor
-  int i, j;
-  STBI_NOTUSED(in_far);
-  for (i = 0; i < w; ++i)
-    for (j = 0; j < hs; ++j)
-      out[i * hs + j] = in_near[i];
-  return out;
-}
-
-// this is a reduced-precision calculation of YCbCr-to-RGB introduced
-// to make sure the code produces the same results in both SIMD and scalar
-#define stbi__float2fixed(x) (((int)((x)*4096.0f + 0.5f)) << 8)
-static void stbi__YCbCr_to_RGB_row(stbi_uc* out,
-                                   const stbi_uc* y,
-                                   const stbi_uc* pcb,
-                                   const stbi_uc* pcr,
-                                   int count,
-                                   int step) {
-  int i;
-  for (i = 0; i < count; ++i) {
-    int y_fixed = (y[i] << 20) + (1 << 19);  // rounding
-    int r, g, b;
-    int cr = pcr[i] - 128;
-    int cb = pcb[i] - 128;
-    r = y_fixed + cr * stbi__float2fixed(1.40200f);
-    g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) +
-        ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
-    b = y_fixed + cb * stbi__float2fixed(1.77200f);
-    r >>= 20;
-    g >>= 20;
-    b >>= 20;
-    if ((unsigned)r > 255) {
-      if (r < 0)
-        r = 0;
-      else
-        r = 255;
-    }
-    if ((unsigned)g > 255) {
-      if (g < 0)
-        g = 0;
-      else
-        g = 255;
-    }
-    if ((unsigned)b > 255) {
-      if (b < 0)
-        b = 0;
-      else
-        b = 255;
-    }
-    out[0] = (stbi_uc)r;
-    out[1] = (stbi_uc)g;
-    out[2] = (stbi_uc)b;
-    out[3] = 255;
-    out += step;
-  }
-}
-
-#if defined(STBI_SSE2) || defined(STBI_NEON)
-static void stbi__YCbCr_to_RGB_simd(stbi_uc* out,
-                                    stbi_uc const* y,
-                                    stbi_uc const* pcb,
-                                    stbi_uc const* pcr,
-                                    int count,
-                                    int step) {
-  int i = 0;
-
-#ifdef STBI_SSE2
-  // step == 3 is pretty ugly on the final interleave, and i'm not convinced
-  // it's useful in practice (you wouldn't use it for textures, for example).
-  // so just accelerate step == 4 case.
-  if (step == 4) {
-    // this is a fairly straightforward implementation and not super-optimized.
-    __m128i signflip = _mm_set1_epi8(-0x80);
-    __m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f));
-    __m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f));
-    __m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f));
-    __m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f));
-    __m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);
-    __m128i xw = _mm_set1_epi16(255);  // alpha channel
-
-    for (; i + 7 < count; i += 8) {
-      // load
-      __m128i y_bytes = _mm_loadl_epi64((__m128i*)(y + i));
-      __m128i cr_bytes = _mm_loadl_epi64((__m128i*)(pcr + i));
-      __m128i cb_bytes = _mm_loadl_epi64((__m128i*)(pcb + i));
-      __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip);  // -128
-      __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip);  // -128
-
-      // unpack to short (and left-shift cr, cb by 8)
-      __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
-      __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
-      __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
-
-      // color transform
-      __m128i yws = _mm_srli_epi16(yw, 4);
-      __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
-      __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
-      __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
-      __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
-      __m128i rws = _mm_add_epi16(cr0, yws);
-      __m128i gwt = _mm_add_epi16(cb0, yws);
-      __m128i bws = _mm_add_epi16(yws, cb1);
-      __m128i gws = _mm_add_epi16(gwt, cr1);
-
-      // descale
-      __m128i rw = _mm_srai_epi16(rws, 4);
-      __m128i bw = _mm_srai_epi16(bws, 4);
-      __m128i gw = _mm_srai_epi16(gws, 4);
-
-      // back to byte, set up for transpose
-      __m128i brb = _mm_packus_epi16(rw, bw);
-      __m128i gxb = _mm_packus_epi16(gw, xw);
-
-      // transpose to interleave channels
-      __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
-      __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
-      __m128i o0 = _mm_unpacklo_epi16(t0, t1);
-      __m128i o1 = _mm_unpackhi_epi16(t0, t1);
-
-      // store
-      _mm_storeu_si128((__m128i*)(out + 0), o0);
-      _mm_storeu_si128((__m128i*)(out + 16), o1);
-      out += 32;
-    }
-  }
-#endif
-
-#ifdef STBI_NEON
-  // in this version, step=3 support would be easy to add. but is there demand?
-  if (step == 4) {
-    // this is a fairly straightforward implementation and not super-optimized.
-    uint8x8_t signflip = vdup_n_u8(0x80);
-    int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f));
-    int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f));
-    int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f));
-    int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f));
-
-    for (; i + 7 < count; i += 8) {
-      // load
-      uint8x8_t y_bytes = vld1_u8(y + i);
-      uint8x8_t cr_bytes = vld1_u8(pcr + i);
-      uint8x8_t cb_bytes = vld1_u8(pcb + i);
-      int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
-      int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
-
-      // expand to s16
-      int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
-      int16x8_t crw = vshll_n_s8(cr_biased, 7);
-      int16x8_t cbw = vshll_n_s8(cb_biased, 7);
-
-      // color transform
-      int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
-      int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
-      int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
-      int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
-      int16x8_t rws = vaddq_s16(yws, cr0);
-      int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
-      int16x8_t bws = vaddq_s16(yws, cb1);
-
-      // undo scaling, round, convert to byte
-      uint8x8x4_t o;
-      o.val[0] = vqrshrun_n_s16(rws, 4);
-      o.val[1] = vqrshrun_n_s16(gws, 4);
-      o.val[2] = vqrshrun_n_s16(bws, 4);
-      o.val[3] = vdup_n_u8(255);
-
-      // store, interleaving r/g/b/a
-      vst4_u8(out, o);
-      out += 8 * 4;
-    }
-  }
-#endif
-
-  for (; i < count; ++i) {
-    int y_fixed = (y[i] << 20) + (1 << 19);  // rounding
-    int r, g, b;
-    int cr = pcr[i] - 128;
-    int cb = pcb[i] - 128;
-    r = y_fixed + cr * stbi__float2fixed(1.40200f);
-    g = y_fixed + cr * -stbi__float2fixed(0.71414f) +
-        ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
-    b = y_fixed + cb * stbi__float2fixed(1.77200f);
-    r >>= 20;
-    g >>= 20;
-    b >>= 20;
-    if ((unsigned)r > 255) {
-      if (r < 0)
-        r = 0;
-      else
-        r = 255;
-    }
-    if ((unsigned)g > 255) {
-      if (g < 0)
-        g = 0;
-      else
-        g = 255;
-    }
-    if ((unsigned)b > 255) {
-      if (b < 0)
-        b = 0;
-      else
-        b = 255;
-    }
-    out[0] = (stbi_uc)r;
-    out[1] = (stbi_uc)g;
-    out[2] = (stbi_uc)b;
-    out[3] = 255;
-    out += step;
-  }
-}
-#endif
-
-// set up the kernels
-static void stbi__setup_jpeg(stbi__jpeg* j) {
-  j->idct_block_kernel = stbi__idct_block;
-  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
-  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
-
-#ifdef STBI_SSE2
-  if (stbi__sse2_available()) {
-    j->idct_block_kernel = stbi__idct_simd;
-    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
-  }
-#endif
-
-#ifdef STBI_NEON
-  j->idct_block_kernel = stbi__idct_simd;
-  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
-#endif
-}
-
-// clean up the temporary component buffers
-static void stbi__cleanup_jpeg(stbi__jpeg* j) {
-  stbi__free_jpeg_components(j, j->s->img_n, 0);
-}
-
-typedef struct {
-  resample_row_func resample;
-  stbi_uc *line0, *line1;
-  int hs, vs;   // expansion factor in each axis
-  int w_lores;  // horizontal pixels pre-expansion
-  int ystep;    // how far through vertical expansion we are
-  int ypos;     // which pre-expansion row we're on
-} stbi__resample;
-
-// fast 0..255 * 0..255 => 0..255 rounded multiplication
-static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) {
-  unsigned int t = x * y + 128;
-  return (stbi_uc)((t + (t >> 8)) >> 8);
-}
-
-static stbi_uc* load_jpeg_image(stbi__jpeg* z, int* out_x, int* out_y, int* comp, int req_comp) {
-  int n, decode_n, is_rgb;
-  z->s->img_n = 0;  // make stbi__cleanup_jpeg safe
-
-  // validate req_comp
-  if (req_comp < 0 || req_comp > 4)
-    return stbi__errpuc("bad req_comp", "Internal error");
-
-  // load a jpeg image from whichever source, but leave in YCbCr format
-  if (!stbi__decode_jpeg_image(z)) {
-    stbi__cleanup_jpeg(z);
-    return NULL;
-  }
-
-  // determine actual number of components to generate
-  n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
-
-  is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
-
-  if (z->s->img_n == 3 && n < 3 && !is_rgb)
-    decode_n = 1;
-  else
-    decode_n = z->s->img_n;
-
-  // nothing to do if no components requested; check this now to avoid
-  // accessing uninitialized coutput[0] later
-  if (decode_n <= 0) {
-    stbi__cleanup_jpeg(z);
-    return NULL;
-  }
-
-  // resample and color-convert
-  {
-    int k;
-    unsigned int i, j;
-    stbi_uc* output;
-    stbi_uc* coutput[4] = {NULL, NULL, NULL, NULL};
-
-    stbi__resample res_comp[4];
-
-    for (k = 0; k < decode_n; ++k) {
-      stbi__resample* r = &res_comp[k];
-
-      // allocate line buffer big enough for upsampling off the edges
-      // with upsample factor of 4
-      z->img_comp[k].linebuf = (stbi_uc*)stbi__malloc(z->s->img_x + 3);
-      if (!z->img_comp[k].linebuf) {
-        stbi__cleanup_jpeg(z);
-        return stbi__errpuc("outofmem", "Out of memory");
-      }
-
-      r->hs = z->img_h_max / z->img_comp[k].h;
-      r->vs = z->img_v_max / z->img_comp[k].v;
-      r->ystep = r->vs >> 1;
-      r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
-      r->ypos = 0;
-      r->line0 = r->line1 = z->img_comp[k].data;
-
-      if (r->hs == 1 && r->vs == 1)
-        r->resample = resample_row_1;
-      else if (r->hs == 1 && r->vs == 2)
-        r->resample = stbi__resample_row_v_2;
-      else if (r->hs == 2 && r->vs == 1)
-        r->resample = stbi__resample_row_h_2;
-      else if (r->hs == 2 && r->vs == 2)
-        r->resample = z->resample_row_hv_2_kernel;
-      else
-        r->resample = stbi__resample_row_generic;
-    }
-
-    // can't error after this so, this is safe
-    output = (stbi_uc*)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
-    if (!output) {
-      stbi__cleanup_jpeg(z);
-      return stbi__errpuc("outofmem", "Out of memory");
-    }
-
-    // now go ahead and resample
-    for (j = 0; j < z->s->img_y; ++j) {
-      stbi_uc* out = output + n * z->s->img_x * j;
-      for (k = 0; k < decode_n; ++k) {
-        stbi__resample* r = &res_comp[k];
-        int y_bot = r->ystep >= (r->vs >> 1);
-        coutput[k] = r->resample(z->img_comp[k].linebuf, y_bot ? r->line1 : r->line0,
-                                 y_bot ? r->line0 : r->line1, r->w_lores, r->hs);
-        if (++r->ystep >= r->vs) {
-          r->ystep = 0;
-          r->line0 = r->line1;
-          if (++r->ypos < z->img_comp[k].y)
-            r->line1 += z->img_comp[k].w2;
-        }
-      }
-      if (n >= 3) {
-        stbi_uc* y = coutput[0];
-        if (z->s->img_n == 3) {
-          if (is_rgb) {
-            for (i = 0; i < z->s->img_x; ++i) {
-              out[0] = y[i];
-              out[1] = coutput[1][i];
-              out[2] = coutput[2][i];
-              out[3] = 255;
-              out += n;
-            }
-          } else {
-            z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
-          }
-        } else if (z->s->img_n == 4) {
-          if (z->app14_color_transform == 0) {  // CMYK
-            for (i = 0; i < z->s->img_x; ++i) {
-              stbi_uc m = coutput[3][i];
-              out[0] = stbi__blinn_8x8(coutput[0][i], m);
-              out[1] = stbi__blinn_8x8(coutput[1][i], m);
-              out[2] = stbi__blinn_8x8(coutput[2][i], m);
-              out[3] = 255;
-              out += n;
-            }
-          } else if (z->app14_color_transform == 2) {  // YCCK
-            z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
-            for (i = 0; i < z->s->img_x; ++i) {
-              stbi_uc m = coutput[3][i];
-              out[0] = stbi__blinn_8x8(255 - out[0], m);
-              out[1] = stbi__blinn_8x8(255 - out[1], m);
-              out[2] = stbi__blinn_8x8(255 - out[2], m);
-              out += n;
-            }
-          } else {  // YCbCr + alpha?  Ignore the fourth channel for now
-            z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
-          }
-        } else
-          for (i = 0; i < z->s->img_x; ++i) {
-            out[0] = out[1] = out[2] = y[i];
-            out[3] = 255;  // not used if n==3
-            out += n;
-          }
-      } else {
-        if (is_rgb) {
-          if (n == 1)
-            for (i = 0; i < z->s->img_x; ++i)
-              *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
-          else {
-            for (i = 0; i < z->s->img_x; ++i, out += 2) {
-              out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
-              out[1] = 255;
-            }
-          }
-        } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
-          for (i = 0; i < z->s->img_x; ++i) {
-            stbi_uc m = coutput[3][i];
-            stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
-            stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
-            stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
-            out[0] = stbi__compute_y(r, g, b);
-            out[1] = 255;
-            out += n;
-          }
-        } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
-          for (i = 0; i < z->s->img_x; ++i) {
-            out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
-            out[1] = 255;
-            out += n;
-          }
-        } else {
-          stbi_uc* y = coutput[0];
-          if (n == 1)
-            for (i = 0; i < z->s->img_x; ++i)
-              out[i] = y[i];
-          else
-            for (i = 0; i < z->s->img_x; ++i) {
-              *out++ = y[i];
-              *out++ = 255;
-            }
-        }
-      }
-    }
-    stbi__cleanup_jpeg(z);
-    *out_x = z->s->img_x;
-    *out_y = z->s->img_y;
-    if (comp)
-      *comp = z->s->img_n >= 3 ? 3 : 1;  // report original components, not output
-    return output;
-  }
-}
-
-static void* stbi__jpeg_load(stbi__context* s,
-                             int* x,
-                             int* y,
-                             int* comp,
-                             int req_comp,
-                             stbi__result_info* ri) {
-  unsigned char* result;
-  stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
-  if (!j)
-    return stbi__errpuc("outofmem", "Out of memory");
-  STBI_NOTUSED(ri);
-  j->s = s;
-  stbi__setup_jpeg(j);
-  result = load_jpeg_image(j, x, y, comp, req_comp);
-  STBI_FREE(j);
-  return result;
-}
-
-static int stbi__jpeg_test(stbi__context* s) {
-  int r;
-  stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
-  if (!j)
-    return stbi__err("outofmem", "Out of memory");
-  j->s = s;
-  stbi__setup_jpeg(j);
-  r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
-  stbi__rewind(s);
-  STBI_FREE(j);
-  return r;
-}
-
-static int stbi__jpeg_info_raw(stbi__jpeg* j, int* x, int* y, int* comp) {
-  if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
-    stbi__rewind(j->s);
-    return 0;
-  }
-  if (x)
-    *x = j->s->img_x;
-  if (y)
-    *y = j->s->img_y;
-  if (comp)
-    *comp = j->s->img_n >= 3 ? 3 : 1;
-  return 1;
-}
-
-static int stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp) {
-  int result;
-  stbi__jpeg* j = (stbi__jpeg*)(stbi__malloc(sizeof(stbi__jpeg)));
-  if (!j)
-    return stbi__err("outofmem", "Out of memory");
-  j->s = s;
-  result = stbi__jpeg_info_raw(j, x, y, comp);
-  STBI_FREE(j);
-  return result;
-}
-#endif
-
-// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
-//    simple implementation
-//      - all input must be provided in an upfront buffer
-//      - all output is written to a single output buffer (can malloc/realloc)
-//    performance
-//      - fast huffman
-
-#ifndef STBI_NO_ZLIB
-
-// fast-way is faster to check than jpeg huffman, but slow way is slower
-#define STBI__ZFAST_BITS 9  // accelerate all cases in default tables
-#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
-#define STBI__ZNSYMS 288  // number of symbols in literal/length alphabet
-
-// zlib-style huffman encoding
-// (jpegs packs from left, zlib from right, so can't share code)
-typedef struct {
-  stbi__uint16 fast[1 << STBI__ZFAST_BITS];
-  stbi__uint16 firstcode[16];
-  int maxcode[17];
-  stbi__uint16 firstsymbol[16];
-  stbi_uc size[STBI__ZNSYMS];
-  stbi__uint16 value[STBI__ZNSYMS];
-} stbi__zhuffman;
-
-stbi_inline static int stbi__bitreverse16(int n) {
-  n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
-  n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
-  n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
-  n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
-  return n;
-}
-
-stbi_inline static int stbi__bit_reverse(int v, int bits) {
-  STBI_ASSERT(bits <= 16);
-  // to bit reverse n bits, reverse 16 and shift
-  // e.g. 11 bits, bit reverse and shift away 5
-  return stbi__bitreverse16(v) >> (16 - bits);
-}
-
-static int stbi__zbuild_huffman(stbi__zhuffman* z, const stbi_uc* sizelist, int num) {
-  int i, k = 0;
-  int code, next_code[16], sizes[17];
-
-  // DEFLATE spec for generating codes
-  memset(sizes, 0, sizeof(sizes));
-  memset(z->fast, 0, sizeof(z->fast));
-  for (i = 0; i < num; ++i)
-    ++sizes[sizelist[i]];
-  sizes[0] = 0;
-  for (i = 1; i < 16; ++i)
-    if (sizes[i] > (1 << i))
-      return stbi__err("bad sizes", "Corrupt PNG");
-  code = 0;
-  for (i = 1; i < 16; ++i) {
-    next_code[i] = code;
-    z->firstcode[i] = (stbi__uint16)code;
-    z->firstsymbol[i] = (stbi__uint16)k;
-    code = (code + sizes[i]);
-    if (sizes[i])
-      if (code - 1 >= (1 << i))
-        return stbi__err("bad codelengths", "Corrupt PNG");
-    z->maxcode[i] = code << (16 - i);  // preshift for inner loop
-    code <<= 1;
-    k += sizes[i];
-  }
-  z->maxcode[16] = 0x10000;  // sentinel
-  for (i = 0; i < num; ++i) {
-    int s = sizelist[i];
-    if (s) {
-      int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
-      stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);
-      z->size[c] = (stbi_uc)s;
-      z->value[c] = (stbi__uint16)i;
-      if (s <= STBI__ZFAST_BITS) {
-        int j = stbi__bit_reverse(next_code[s], s);
-        while (j < (1 << STBI__ZFAST_BITS)) {
-          z->fast[j] = fastv;
-          j += (1 << s);
-        }
-      }
-      ++next_code[s];
-    }
-  }
-  return 1;
-}
-
-// zlib-from-memory implementation for PNG reading
-//    because PNG allows splitting the zlib stream arbitrarily,
-//    and it's annoying structurally to have PNG call ZLIB call PNG,
-//    we require PNG read all the IDATs and combine them into a single
-//    memory buffer
-
-typedef struct {
-  stbi_uc *zbuffer, *zbuffer_end;
-  int num_bits;
-  stbi__uint32 code_buffer;
-
-  char* zout;
-  char* zout_start;
-  char* zout_end;
-  int z_expandable;
-
-  stbi__zhuffman z_length, z_distance;
-} stbi__zbuf;
-
-stbi_inline static int stbi__zeof(stbi__zbuf* z) {
-  return (z->zbuffer >= z->zbuffer_end);
-}
-
-stbi_inline static stbi_uc stbi__zget8(stbi__zbuf* z) {
-  return stbi__zeof(z) ? 0 : *z->zbuffer++;
-}
-
-static void stbi__fill_bits(stbi__zbuf* z) {
-  do {
-    if (z->code_buffer >= (1U << z->num_bits)) {
-      z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */
-      return;
-    }
-    z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;
-    z->num_bits += 8;
-  } while (z->num_bits <= 24);
-}
-
-stbi_inline static unsigned int stbi__zreceive(stbi__zbuf* z, int n) {
-  unsigned int k;
-  if (z->num_bits < n)
-    stbi__fill_bits(z);
-  k = z->code_buffer & ((1 << n) - 1);
-  z->code_buffer >>= n;
-  z->num_bits -= n;
-  return k;
-}
-
-static int stbi__zhuffman_decode_slowpath(stbi__zbuf* a, stbi__zhuffman* z) {
-  int b, s, k;
-  // not resolved by fast table, so compute it the slow way
-  // use jpeg approach, which requires MSbits at top
-  k = stbi__bit_reverse(a->code_buffer, 16);
-  for (s = STBI__ZFAST_BITS + 1;; ++s)
-    if (k < z->maxcode[s])
-      break;
-  if (s >= 16)
-    return -1;  // invalid code!
-  // code size is s, so:
-  b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
-  if (b >= STBI__ZNSYMS)
-    return -1;  // some data was corrupt somewhere!
-  if (z->size[b] != s)
-    return -1;  // was originally an assert, but report failure instead.
-  a->code_buffer >>= s;
-  a->num_bits -= s;
-  return z->value[b];
-}
-
-stbi_inline static int stbi__zhuffman_decode(stbi__zbuf* a, stbi__zhuffman* z) {
-  int b, s;
-  if (a->num_bits < 16) {
-    if (stbi__zeof(a)) {
-      return -1; /* report error for unexpected end of data. */
-    }
-    stbi__fill_bits(a);
-  }
-  b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
-  if (b) {
-    s = b >> 9;
-    a->code_buffer >>= s;
-    a->num_bits -= s;
-    return b & 511;
-  }
-  return stbi__zhuffman_decode_slowpath(a, z);
-}
-
-static int stbi__zexpand(stbi__zbuf* z, char* zout, int n)  // need to make room for n bytes
-{
-  char* q;
-  unsigned int cur, limit, old_limit;
-  z->zout = zout;
-  if (!z->z_expandable)
-    return stbi__err("output buffer limit", "Corrupt PNG");
-  cur = (unsigned int)(z->zout - z->zout_start);
-  limit = old_limit = (unsigned)(z->zout_end - z->zout_start);
-  if (UINT_MAX - cur < (unsigned)n)
-    return stbi__err("outofmem", "Out of memory");
-  while (cur + n > limit) {
-    if (limit > UINT_MAX / 2)
-      return stbi__err("outofmem", "Out of memory");
-    limit *= 2;
-  }
-  q = (char*)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
-  STBI_NOTUSED(old_limit);
-  if (q == NULL)
-    return stbi__err("outofmem", "Out of memory");
-  z->zout_start = q;
-  z->zout = q + cur;
-  z->zout_end = q + limit;
-  return 1;
-}
-
-static const int stbi__zlength_base[31] = {3,  4,   5,   6,   7,   8,   9,   10, 11, 13, 15,
-                                           17, 19,  23,  27,  31,  35,  43,  51, 59, 67, 83,
-                                           99, 115, 131, 163, 195, 227, 258, 0,  0};
-
-static const int stbi__zlength_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
-                                            3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0};
-
-static const int stbi__zdist_base[32] = {
-    1,   2,   3,   4,   5,    7,    9,    13,   17,   25,   33,   49,    65,    97,    129, 193,
-    257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0,   0};
-
-static const int stbi__zdist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2,  3,  3,  4,  4,  5,  5,  6,
-                                          6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
-
-static int stbi__parse_huffman_block(stbi__zbuf* a) {
-  char* zout = a->zout;
-  for (;;) {
-    int z = stbi__zhuffman_decode(a, &a->z_length);
-    if (z < 256) {
-      if (z < 0)
-        return stbi__err("bad huffman code", "Corrupt PNG");  // error in huffman codes
-      if (zout >= a->zout_end) {
-        if (!stbi__zexpand(a, zout, 1))
-          return 0;
-        zout = a->zout;
-      }
-      *zout++ = (char)z;
-    } else {
-      stbi_uc* p;
-      int len, dist;
-      if (z == 256) {
-        a->zout = zout;
-        return 1;
-      }
-      z -= 257;
-      len = stbi__zlength_base[z];
-      if (stbi__zlength_extra[z])
-        len += stbi__zreceive(a, stbi__zlength_extra[z]);
-      z = stbi__zhuffman_decode(a, &a->z_distance);
-      if (z < 0)
-        return stbi__err("bad huffman code", "Corrupt PNG");
-      dist = stbi__zdist_base[z];
-      if (stbi__zdist_extra[z])
-        dist += stbi__zreceive(a, stbi__zdist_extra[z]);
-      if (zout - a->zout_start < dist)
-        return stbi__err("bad dist", "Corrupt PNG");
-      if (zout + len > a->zout_end) {
-        if (!stbi__zexpand(a, zout, len))
-          return 0;
-        zout = a->zout;
-      }
-      p = (stbi_uc*)(zout - dist);
-      if (dist == 1) {  // run of one byte; common in images.
-        stbi_uc v = *p;
-        if (len) {
-          do
-            *zout++ = v;
-          while (--len);
-        }
-      } else {
-        if (len) {
-          do
-            *zout++ = *p++;
-          while (--len);
-        }
-      }
-    }
-  }
-}
-
-static int stbi__compute_huffman_codes(stbi__zbuf* a) {
-  static const stbi_uc length_dezigzag[19] = {16, 17, 18, 0, 8,  7, 9,  6, 10, 5,
-                                              11, 4,  12, 3, 13, 2, 14, 1, 15};
-  stbi__zhuffman z_codelength;
-  stbi_uc lencodes[286 + 32 + 137];  // padding for maximum single op
-  stbi_uc codelength_sizes[19];
-  int i, n;
-
-  int hlit = stbi__zreceive(a, 5) + 257;
-  int hdist = stbi__zreceive(a, 5) + 1;
-  int hclen = stbi__zreceive(a, 4) + 4;
-  int ntot = hlit + hdist;
-
-  memset(codelength_sizes, 0, sizeof(codelength_sizes));
-  for (i = 0; i < hclen; ++i) {
-    int s = stbi__zreceive(a, 3);
-    codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;
-  }
-  if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19))
-    return 0;
-
-  n = 0;
-  while (n < ntot) {
-    int c = stbi__zhuffman_decode(a, &z_codelength);
-    if (c < 0 || c >= 19)
-      return stbi__err("bad codelengths", "Corrupt PNG");
-    if (c < 16)
-      lencodes[n++] = (stbi_uc)c;
-    else {
-      stbi_uc fill = 0;
-      if (c == 16) {
-        c = stbi__zreceive(a, 2) + 3;
-        if (n == 0)
-          return stbi__err("bad codelengths", "Corrupt PNG");
-        fill = lencodes[n - 1];
-      } else if (c == 17) {
-        c = stbi__zreceive(a, 3) + 3;
-      } else if (c == 18) {
-        c = stbi__zreceive(a, 7) + 11;
-      } else {
-        return stbi__err("bad codelengths", "Corrupt PNG");
-      }
-      if (ntot - n < c)
-        return stbi__err("bad codelengths", "Corrupt PNG");
-      memset(lencodes + n, fill, c);
-      n += c;
-    }
-  }
-  if (n != ntot)
-    return stbi__err("bad codelengths", "Corrupt PNG");
-  if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit))
-    return 0;
-  if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist))
-    return 0;
-  return 1;
-}
-
-static int stbi__parse_uncompressed_block(stbi__zbuf* a) {
-  stbi_uc header[4];
-  int len, nlen, k;
-  if (a->num_bits & 7)
-    stbi__zreceive(a, a->num_bits & 7);  // discard
-  // drain the bit-packed data into header
-  k = 0;
-  while (a->num_bits > 0) {
-    header[k++] = (stbi_uc)(a->code_buffer & 255);  // suppress MSVC run-time check
-    a->code_buffer >>= 8;
-    a->num_bits -= 8;
-  }
-  if (a->num_bits < 0)
-    return stbi__err("zlib corrupt", "Corrupt PNG");
-  // now fill header the normal way
-  while (k < 4)
-    header[k++] = stbi__zget8(a);
-  len = header[1] * 256 + header[0];
-  nlen = header[3] * 256 + header[2];
-  if (nlen != (len ^ 0xffff))
-    return stbi__err("zlib corrupt", "Corrupt PNG");
-  if (a->zbuffer + len > a->zbuffer_end)
-    return stbi__err("read past buffer", "Corrupt PNG");
-  if (a->zout + len > a->zout_end)
-    if (!stbi__zexpand(a, a->zout, len))
-      return 0;
-  memcpy(a->zout, a->zbuffer, len);
-  a->zbuffer += len;
-  a->zout += len;
-  return 1;
-}
-
-static int stbi__parse_zlib_header(stbi__zbuf* a) {
-  int cmf = stbi__zget8(a);
-  int cm = cmf & 15;
-  /* int cinfo = cmf >> 4; */
-  int flg = stbi__zget8(a);
-  if (stbi__zeof(a))
-    return stbi__err("bad zlib header", "Corrupt PNG");  // zlib spec
-  if ((cmf * 256 + flg) % 31 != 0)
-    return stbi__err("bad zlib header", "Corrupt PNG");  // zlib spec
-  if (flg & 32)
-    return stbi__err("no preset dict", "Corrupt PNG");  // preset dictionary not allowed in png
-  if (cm != 8)
-    return stbi__err("bad compression", "Corrupt PNG");  // DEFLATE required for png
-  // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
-  return 1;
-}
-
-static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = {
-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
-    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
-    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
-    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8};
-static const stbi_uc stbi__zdefault_distance[32] = {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-                                                    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5};
-/*
-Init algorithm:
-{
-   int i;   // use <= to match clearly with spec
-   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
-   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
-   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
-   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
-
-   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
-}
-*/
-
-static int stbi__parse_zlib(stbi__zbuf* a, int parse_header) {
-  int final, type;
-  if (parse_header)
-    if (!stbi__parse_zlib_header(a))
-      return 0;
-  a->num_bits = 0;
-  a->code_buffer = 0;
-  do {
-    final = stbi__zreceive(a, 1);
-    type = stbi__zreceive(a, 2);
-    if (type == 0) {
-      if (!stbi__parse_uncompressed_block(a))
-        return 0;
-    } else if (type == 3) {
-      return 0;
-    } else {
-      if (type == 1) {
-        // use fixed code lengths
-        if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, STBI__ZNSYMS))
-          return 0;
-        if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32))
-          return 0;
-      } else {
-        if (!stbi__compute_huffman_codes(a))
-          return 0;
-      }
-      if (!stbi__parse_huffman_block(a))
-        return 0;
-    }
-  } while (!final);
-  return 1;
-}
-
-static int stbi__do_zlib(stbi__zbuf* a, char* obuf, int olen, int exp, int parse_header) {
-  a->zout_start = obuf;
-  a->zout = obuf;
-  a->zout_end = obuf + olen;
-  a->z_expandable = exp;
-
-  return stbi__parse_zlib(a, parse_header);
-}
-
-STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer,
-                                                int len,
-                                                int initial_size,
-                                                int* outlen) {
-  stbi__zbuf a;
-  char* p = (char*)stbi__malloc(initial_size);
-  if (p == NULL)
-    return NULL;
-  a.zbuffer = (stbi_uc*)buffer;
-  a.zbuffer_end = (stbi_uc*)buffer + len;
-  if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
-    if (outlen)
-      *outlen = (int)(a.zout - a.zout_start);
-    return a.zout_start;
-  } else {
-    STBI_FREE(a.zout_start);
-    return NULL;
-  }
-}
-
-STBIDEF char* stbi_zlib_decode_malloc(char const* buffer, int len, int* outlen) {
-  return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
-}
-
-STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer,
-                                                           int len,
-                                                           int initial_size,
-                                                           int* outlen,
-                                                           int parse_header) {
-  stbi__zbuf a;
-  char* p = (char*)stbi__malloc(initial_size);
-  if (p == NULL)
-    return NULL;
-  a.zbuffer = (stbi_uc*)buffer;
-  a.zbuffer_end = (stbi_uc*)buffer + len;
-  if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
-    if (outlen)
-      *outlen = (int)(a.zout - a.zout_start);
-    return a.zout_start;
-  } else {
-    STBI_FREE(a.zout_start);
-    return NULL;
-  }
-}
-
-STBIDEF int stbi_zlib_decode_buffer(char* obuffer, int olen, char const* ibuffer, int ilen) {
-  stbi__zbuf a;
-  a.zbuffer = (stbi_uc*)ibuffer;
-  a.zbuffer_end = (stbi_uc*)ibuffer + ilen;
-  if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
-    return (int)(a.zout - a.zout_start);
-  else
-    return -1;
-}
-
-STBIDEF char* stbi_zlib_decode_noheader_malloc(char const* buffer, int len, int* outlen) {
-  stbi__zbuf a;
-  char* p = (char*)stbi__malloc(16384);
-  if (p == NULL)
-    return NULL;
-  a.zbuffer = (stbi_uc*)buffer;
-  a.zbuffer_end = (stbi_uc*)buffer + len;
-  if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
-    if (outlen)
-      *outlen = (int)(a.zout - a.zout_start);
-    return a.zout_start;
-  } else {
-    STBI_FREE(a.zout_start);
-    return NULL;
-  }
-}
-
-STBIDEF int stbi_zlib_decode_noheader_buffer(char* obuffer,
-                                             int olen,
-                                             const char* ibuffer,
-                                             int ilen) {
-  stbi__zbuf a;
-  a.zbuffer = (stbi_uc*)ibuffer;
-  a.zbuffer_end = (stbi_uc*)ibuffer + ilen;
-  if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
-    return (int)(a.zout - a.zout_start);
-  else
-    return -1;
-}
-#endif
-
-// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
-//    simple implementation
-//      - only 8-bit samples
-//      - no CRC checking
-//      - allocates lots of intermediate memory
-//        - avoids problem of streaming data between subsystems
-//        - avoids explicit window management
-//    performance
-//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
-
-#ifndef STBI_NO_PNG
-typedef struct {
-  stbi__uint32 length;
-  stbi__uint32 type;
-} stbi__pngchunk;
-
-static stbi__pngchunk stbi__get_chunk_header(stbi__context* s) {
-  stbi__pngchunk c;
-  c.length = stbi__get32be(s);
-  c.type = stbi__get32be(s);
-  return c;
-}
-
-static int stbi__check_png_header(stbi__context* s) {
-  static const stbi_uc png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
-  int i;
-  for (i = 0; i < 8; ++i)
-    if (stbi__get8(s) != png_sig[i])
-      return stbi__err("bad png sig", "Not a PNG");
-  return 1;
-}
-
-typedef struct {
-  stbi__context* s;
-  stbi_uc *idata, *expanded, *out;
-  int depth;
-} stbi__png;
-
-enum {
-  STBI__F_none = 0,
-  STBI__F_sub = 1,
-  STBI__F_up = 2,
-  STBI__F_avg = 3,
-  STBI__F_paeth = 4,
-  // synthetic filters used for first scanline to avoid needing a dummy row of 0s
-  STBI__F_avg_first,
-  STBI__F_paeth_first
-};
-
-static stbi_uc first_row_filter[5] = {STBI__F_none, STBI__F_sub, STBI__F_none, STBI__F_avg_first,
-                                      STBI__F_paeth_first};
-
-static int stbi__paeth(int a, int b, int c) {
-  int p = a + b - c;
-  int pa = abs(p - a);
-  int pb = abs(p - b);
-  int pc = abs(p - c);
-  if (pa <= pb && pa <= pc)
-    return a;
-  if (pb <= pc)
-    return b;
-  return c;
-}
-
-static const stbi_uc stbi__depth_scale_table[9] = {0, 0xff, 0x55, 0, 0x11, 0, 0, 0, 0x01};
-
-// create the png data from post-deflated data
-static int stbi__create_png_image_raw(stbi__png* a,
-                                      stbi_uc* raw,
-                                      stbi__uint32 raw_len,
-                                      int out_n,
-                                      stbi__uint32 x,
-                                      stbi__uint32 y,
-                                      int depth,
-                                      int color) {
-  int bytes = (depth == 16 ? 2 : 1);
-  stbi__context* s = a->s;
-  stbi__uint32 i, j, stride = x * out_n * bytes;
-  stbi__uint32 img_len, img_width_bytes;
-  int k;
-  int img_n = s->img_n;  // copy it into a local for later
-
-  int output_bytes = out_n * bytes;
-  int filter_bytes = img_n * bytes;
-  int width = x;
-
-  STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
-  a->out =
-      (stbi_uc*)stbi__malloc_mad3(x, y, output_bytes, 0);  // extra bytes to write off the end into
-  if (!a->out)
-    return stbi__err("outofmem", "Out of memory");
-
-  if (!stbi__mad3sizes_valid(img_n, x, depth, 7))
-    return stbi__err("too large", "Corrupt PNG");
-  img_width_bytes = (((img_n * x * depth) + 7) >> 3);
-  img_len = (img_width_bytes + 1) * y;
-
-  // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
-  // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
-  // so just check for raw_len < img_len always.
-  if (raw_len < img_len)
-    return stbi__err("not enough pixels", "Corrupt PNG");
-
-  for (j = 0; j < y; ++j) {
-    stbi_uc* cur = a->out + stride * j;
-    stbi_uc* prior;
-    int filter = *raw++;
-
-    if (filter > 4)
-      return stbi__err("invalid filter", "Corrupt PNG");
-
-    if (depth < 8) {
-      if (img_width_bytes > x)
-        return stbi__err("invalid width", "Corrupt PNG");
-      cur += x * out_n - img_width_bytes;  // store output to the rightmost img_len bytes, so we can
-                                           // decode in place
-      filter_bytes = 1;
-      width = img_width_bytes;
-    }
-    prior = cur - stride;  // bugfix: need to compute this after 'cur +=' computation above
-
-    // if first row, use special filter that doesn't sample previous row
-    if (j == 0)
-      filter = first_row_filter[filter];
-
-    // handle first byte explicitly
-    for (k = 0; k < filter_bytes; ++k) {
-      switch (filter) {
-        case STBI__F_none:
-          cur[k] = raw[k];
-          break;
-        case STBI__F_sub:
-          cur[k] = raw[k];
-          break;
-        case STBI__F_up:
-          cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
-          break;
-        case STBI__F_avg:
-          cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1));
-          break;
-        case STBI__F_paeth:
-          cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0));
-          break;
-        case STBI__F_avg_first:
-          cur[k] = raw[k];
-          break;
-        case STBI__F_paeth_first:
-          cur[k] = raw[k];
-          break;
-      }
-    }
-
-    if (depth == 8) {
-      if (img_n != out_n)
-        cur[img_n] = 255;  // first pixel
-      raw += img_n;
-      cur += out_n;
-      prior += out_n;
-    } else if (depth == 16) {
-      if (img_n != out_n) {
-        cur[filter_bytes] = 255;      // first pixel top byte
-        cur[filter_bytes + 1] = 255;  // first pixel bottom byte
-      }
-      raw += filter_bytes;
-      cur += output_bytes;
-      prior += output_bytes;
-    } else {
-      raw += 1;
-      cur += 1;
-      prior += 1;
-    }
-
-    // this is a little gross, so that we don't switch per-pixel or per-component
-    if (depth < 8 || img_n == out_n) {
-      int nk = (width - 1) * filter_bytes;
-#define STBI__CASE(f) \
-  case f:             \
-    for (k = 0; k < nk; ++k)
-      switch (filter) {
-        // "none" filter turns into a memcpy here; make that explicit.
-        case STBI__F_none:
-          memcpy(cur, raw, nk);
-          break;
-          STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); }
-          break;
-          STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); }
-          break;
-          STBI__CASE(STBI__F_avg) {
-            cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1));
-          }
-          break;
-          STBI__CASE(STBI__F_paeth) {
-            cur[k] = STBI__BYTECAST(
-                raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes]));
-          }
-          break;
-          STBI__CASE(STBI__F_avg_first) {
-            cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1));
-          }
-          break;
-          STBI__CASE(STBI__F_paeth_first) {
-            cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], 0, 0));
-          }
-          break;
-      }
-#undef STBI__CASE
-      raw += nk;
-    } else {
-      STBI_ASSERT(img_n + 1 == out_n);
-#define STBI__CASE(f)                                                          \
-  case f:                                                                      \
-    for (i = x - 1; i >= 1; --i, cur[filter_bytes] = 255, raw += filter_bytes, \
-        cur += output_bytes, prior += output_bytes)                            \
-      for (k = 0; k < filter_bytes; ++k)
-      switch (filter) {
-        STBI__CASE(STBI__F_none) { cur[k] = raw[k]; }
-        break;
-        STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - output_bytes]); }
-        break;
-        STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); }
-        break;
-        STBI__CASE(STBI__F_avg) {
-          cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1));
-        }
-        break;
-        STBI__CASE(STBI__F_paeth) {
-          cur[k] = STBI__BYTECAST(
-              raw[k] + stbi__paeth(cur[k - output_bytes], prior[k], prior[k - output_bytes]));
-        }
-        break;
-        STBI__CASE(STBI__F_avg_first) {
-          cur[k] = STBI__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1));
-        }
-        break;
-        STBI__CASE(STBI__F_paeth_first) {
-          cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], 0, 0));
-        }
-        break;
-      }
-#undef STBI__CASE
-
-      // the loop above sets the high byte of the pixels' alpha, but for
-      // 16 bit png files we also need the low byte set. we'll do that here.
-      if (depth == 16) {
-        cur = a->out + stride * j;  // start at the beginning of the row again
-        for (i = 0; i < x; ++i, cur += output_bytes) {
-          cur[filter_bytes + 1] = 255;
-        }
-      }
-    }
-  }
-
-  // we make a separate pass to expand bits to pixels; for performance,
-  // this could run two scanlines behind the above code, so it won't
-  // intefere with filtering but will still be in the cache.
-  if (depth < 8) {
-    for (j = 0; j < y; ++j) {
-      stbi_uc* cur = a->out + stride * j;
-      stbi_uc* in = a->out + stride * j + x * out_n - img_width_bytes;
-      // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at
-      // minimal cost for 1/2/4-bit png guarante byte alignment, if width is not multiple of 8/4/2
-      // we'll decode dummy trailing data that will be skipped in the later loop
-      stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth]
-                                   : 1;  // scale grayscale values to 0..255 range
-
-      // note that the final byte might overshoot and write more data than desired.
-      // we can allocate enough data that this never writes out of memory, but it
-      // could also overwrite the next scanline. can it overwrite non-empty data
-      // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
-      // so we need to explicitly clamp the final ones
-
-      if (depth == 4) {
-        for (k = x * img_n; k >= 2; k -= 2, ++in) {
-          *cur++ = scale * ((*in >> 4));
-          *cur++ = scale * ((*in) & 0x0f);
-        }
-        if (k > 0)
-          *cur++ = scale * ((*in >> 4));
-      } else if (depth == 2) {
-        for (k = x * img_n; k >= 4; k -= 4, ++in) {
-          *cur++ = scale * ((*in >> 6));
-          *cur++ = scale * ((*in >> 4) & 0x03);
-          *cur++ = scale * ((*in >> 2) & 0x03);
-          *cur++ = scale * ((*in) & 0x03);
-        }
-        if (k > 0)
-          *cur++ = scale * ((*in >> 6));
-        if (k > 1)
-          *cur++ = scale * ((*in >> 4) & 0x03);
-        if (k > 2)
-          *cur++ = scale * ((*in >> 2) & 0x03);
-      } else if (depth == 1) {
-        for (k = x * img_n; k >= 8; k -= 8, ++in) {
-          *cur++ = scale * ((*in >> 7));
-          *cur++ = scale * ((*in >> 6) & 0x01);
-          *cur++ = scale * ((*in >> 5) & 0x01);
-          *cur++ = scale * ((*in >> 4) & 0x01);
-          *cur++ = scale * ((*in >> 3) & 0x01);
-          *cur++ = scale * ((*in >> 2) & 0x01);
-          *cur++ = scale * ((*in >> 1) & 0x01);
-          *cur++ = scale * ((*in) & 0x01);
-        }
-        if (k > 0)
-          *cur++ = scale * ((*in >> 7));
-        if (k > 1)
-          *cur++ = scale * ((*in >> 6) & 0x01);
-        if (k > 2)
-          *cur++ = scale * ((*in >> 5) & 0x01);
-        if (k > 3)
-          *cur++ = scale * ((*in >> 4) & 0x01);
-        if (k > 4)
-          *cur++ = scale * ((*in >> 3) & 0x01);
-        if (k > 5)
-          *cur++ = scale * ((*in >> 2) & 0x01);
-        if (k > 6)
-          *cur++ = scale * ((*in >> 1) & 0x01);
-      }
-      if (img_n != out_n) {
-        int q;
-        // insert alpha = 255
-        cur = a->out + stride * j;
-        if (img_n == 1) {
-          for (q = x - 1; q >= 0; --q) {
-            cur[q * 2 + 1] = 255;
-            cur[q * 2 + 0] = cur[q];
-          }
-        } else {
-          STBI_ASSERT(img_n == 3);
-          for (q = x - 1; q >= 0; --q) {
-            cur[q * 4 + 3] = 255;
-            cur[q * 4 + 2] = cur[q * 3 + 2];
-            cur[q * 4 + 1] = cur[q * 3 + 1];
-            cur[q * 4 + 0] = cur[q * 3 + 0];
-          }
-        }
-      }
-    }
-  } else if (depth == 16) {
-    // force the image data from big-endian to platform-native.
-    // this is done in a separate pass due to the decoding relying
-    // on the data being untouched, but could probably be done
-    // per-line during decode if care is taken.
-    stbi_uc* cur = a->out;
-    stbi__uint16* cur16 = (stbi__uint16*)cur;
-
-    for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) {
-      *cur16 = (cur[0] << 8) | cur[1];
-    }
-  }
-
-  return 1;
-}
-
-static int stbi__create_png_image(stbi__png* a,
-                                  stbi_uc* image_data,
-                                  stbi__uint32 image_data_len,
-                                  int out_n,
-                                  int depth,
-                                  int color,
-                                  int interlaced) {
-  int bytes = (depth == 16 ? 2 : 1);
-  int out_bytes = out_n * bytes;
-  stbi_uc* final;
-  int p;
-  if (!interlaced)
-    return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x,
-                                      a->s->img_y, depth, color);
-
-  // de-interlacing
-  final = (stbi_uc*)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
-  if (!final)
-    return stbi__err("outofmem", "Out of memory");
-  for (p = 0; p < 7; ++p) {
-    int xorig[] = {0, 4, 0, 2, 0, 1, 0};
-    int yorig[] = {0, 0, 4, 0, 2, 0, 1};
-    int xspc[] = {8, 8, 4, 4, 2, 2, 1};
-    int yspc[] = {8, 8, 8, 4, 4, 2, 2};
-    int i, j, x, y;
-    // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
-    x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
-    y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
-    if (x && y) {
-      stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
-      if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
-        STBI_FREE(final);
-        return 0;
-      }
-      for (j = 0; j < y; ++j) {
-        for (i = 0; i < x; ++i) {
-          int out_y = j * yspc[p] + yorig[p];
-          int out_x = i * xspc[p] + xorig[p];
-          memcpy(final + out_y * a->s->img_x * out_bytes + out_x * out_bytes,
-                 a->out + (j * x + i) * out_bytes, out_bytes);
-        }
-      }
-      STBI_FREE(a->out);
-      image_data += img_len;
-      image_data_len -= img_len;
-    }
-  }
-  a->out = final;
-
-  return 1;
-}
-
-static int stbi__compute_transparency(stbi__png* z, stbi_uc tc[3], int out_n) {
-  stbi__context* s = z->s;
-  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-  stbi_uc* p = z->out;
-
-  // compute color-based transparency, assuming we've
-  // already got 255 as the alpha value in the output
-  STBI_ASSERT(out_n == 2 || out_n == 4);
-
-  if (out_n == 2) {
-    for (i = 0; i < pixel_count; ++i) {
-      p[1] = (p[0] == tc[0] ? 0 : 255);
-      p += 2;
-    }
-  } else {
-    for (i = 0; i < pixel_count; ++i) {
-      if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
-        p[3] = 0;
-      p += 4;
-    }
-  }
-  return 1;
-}
-
-static int stbi__compute_transparency16(stbi__png* z, stbi__uint16 tc[3], int out_n) {
-  stbi__context* s = z->s;
-  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-  stbi__uint16* p = (stbi__uint16*)z->out;
-
-  // compute color-based transparency, assuming we've
-  // already got 65535 as the alpha value in the output
-  STBI_ASSERT(out_n == 2 || out_n == 4);
-
-  if (out_n == 2) {
-    for (i = 0; i < pixel_count; ++i) {
-      p[1] = (p[0] == tc[0] ? 0 : 65535);
-      p += 2;
-    }
-  } else {
-    for (i = 0; i < pixel_count; ++i) {
-      if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
-        p[3] = 0;
-      p += 4;
-    }
-  }
-  return 1;
-}
-
-static int stbi__expand_png_palette(stbi__png* a, stbi_uc* palette, int len, int pal_img_n) {
-  stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
-  stbi_uc *p, *temp_out, *orig = a->out;
-
-  p = (stbi_uc*)stbi__malloc_mad2(pixel_count, pal_img_n, 0);
-  if (p == NULL)
-    return stbi__err("outofmem", "Out of memory");
-
-  // between here and free(out) below, exitting would leak
-  temp_out = p;
-
-  if (pal_img_n == 3) {
-    for (i = 0; i < pixel_count; ++i) {
-      int n = orig[i] * 4;
-      p[0] = palette[n];
-      p[1] = palette[n + 1];
-      p[2] = palette[n + 2];
-      p += 3;
-    }
-  } else {
-    for (i = 0; i < pixel_count; ++i) {
-      int n = orig[i] * 4;
-      p[0] = palette[n];
-      p[1] = palette[n + 1];
-      p[2] = palette[n + 2];
-      p[3] = palette[n + 3];
-      p += 4;
-    }
-  }
-  STBI_FREE(a->out);
-  a->out = temp_out;
-
-  STBI_NOTUSED(len);
-
-  return 1;
-}
-
-static int stbi__unpremultiply_on_load_global = 0;
-static int stbi__de_iphone_flag_global = 0;
-
-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) {
-  stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
-}
-
-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) {
-  stbi__de_iphone_flag_global = flag_true_if_should_convert;
-}
-
-#ifndef STBI_THREAD_LOCAL
-#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global
-#define stbi__de_iphone_flag stbi__de_iphone_flag_global
-#else
-static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
-static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
-
-STBIDEF void stbi__unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) {
-  stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
-  stbi__unpremultiply_on_load_set = 1;
-}
-
-STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) {
-  stbi__de_iphone_flag_local = flag_true_if_should_convert;
-  stbi__de_iphone_flag_set = 1;
-}
-
-#define stbi__unpremultiply_on_load                                    \
-  (stbi__unpremultiply_on_load_set ? stbi__unpremultiply_on_load_local \
-                                   : stbi__unpremultiply_on_load_global)
-#define stbi__de_iphone_flag \
-  (stbi__de_iphone_flag_set ? stbi__de_iphone_flag_local : stbi__de_iphone_flag_global)
-#endif  // STBI_THREAD_LOCAL
-
-static void stbi__de_iphone(stbi__png* z) {
-  stbi__context* s = z->s;
-  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-  stbi_uc* p = z->out;
-
-  if (s->img_out_n == 3) {  // convert bgr to rgb
-    for (i = 0; i < pixel_count; ++i) {
-      stbi_uc t = p[0];
-      p[0] = p[2];
-      p[2] = t;
-      p += 3;
-    }
-  } else {
-    STBI_ASSERT(s->img_out_n == 4);
-    if (stbi__unpremultiply_on_load) {
-      // convert bgr to rgb and unpremultiply
-      for (i = 0; i < pixel_count; ++i) {
-        stbi_uc a = p[3];
-        stbi_uc t = p[0];
-        if (a) {
-          stbi_uc half = a / 2;
-          p[0] = (p[2] * 255 + half) / a;
-          p[1] = (p[1] * 255 + half) / a;
-          p[2] = (t * 255 + half) / a;
-        } else {
-          p[0] = p[2];
-          p[2] = t;
-        }
-        p += 4;
-      }
-    } else {
-      // convert bgr to rgb
-      for (i = 0; i < pixel_count; ++i) {
-        stbi_uc t = p[0];
-        p[0] = p[2];
-        p[2] = t;
-        p += 4;
-      }
-    }
-  }
-}
-
-#define STBI__PNG_TYPE(a, b, c, d) \
-  (((unsigned)(a) << 24) + ((unsigned)(b) << 16) + ((unsigned)(c) << 8) + (unsigned)(d))
-
-static int stbi__parse_png_file(stbi__png* z, int scan, int req_comp) {
-  stbi_uc palette[1024], pal_img_n = 0;
-  stbi_uc has_trans = 0, tc[3] = {0};
-  stbi__uint16 tc16[3];
-  stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
-  int first = 1, k, interlace = 0, color = 0, is_iphone = 0;
-  stbi__context* s = z->s;
-
-  z->expanded = NULL;
-  z->idata = NULL;
-  z->out = NULL;
-
-  if (!stbi__check_png_header(s))
-    return 0;
-
-  if (scan == STBI__SCAN_type)
-    return 1;
-
-  for (;;) {
-    stbi__pngchunk c = stbi__get_chunk_header(s);
-    switch (c.type) {
-      case STBI__PNG_TYPE('C', 'g', 'B', 'I'):
-        is_iphone = 1;
-        stbi__skip(s, c.length);
-        break;
-      case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {
-        int comp, filter;
-        if (!first)
-          return stbi__err("multiple IHDR", "Corrupt PNG");
-        first = 0;
-        if (c.length != 13)
-          return stbi__err("bad IHDR len", "Corrupt PNG");
-        s->img_x = stbi__get32be(s);
-        s->img_y = stbi__get32be(s);
-        if (s->img_y > STBI_MAX_DIMENSIONS)
-          return stbi__err("too large", "Very large image (corrupt?)");
-        if (s->img_x > STBI_MAX_DIMENSIONS)
-          return stbi__err("too large", "Very large image (corrupt?)");
-        z->depth = stbi__get8(s);
-        if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)
-          return stbi__err("1/2/4/8/16-bit only", "PNG not supported: 1/2/4/8/16-bit only");
-        color = stbi__get8(s);
-        if (color > 6)
-          return stbi__err("bad ctype", "Corrupt PNG");
-        if (color == 3 && z->depth == 16)
-          return stbi__err("bad ctype", "Corrupt PNG");
-        if (color == 3)
-          pal_img_n = 3;
-        else if (color & 1)
-          return stbi__err("bad ctype", "Corrupt PNG");
-        comp = stbi__get8(s);
-        if (comp)
-          return stbi__err("bad comp method", "Corrupt PNG");
-        filter = stbi__get8(s);
-        if (filter)
-          return stbi__err("bad filter method", "Corrupt PNG");
-        interlace = stbi__get8(s);
-        if (interlace > 1)
-          return stbi__err("bad interlace method", "Corrupt PNG");
-        if (!s->img_x || !s->img_y)
-          return stbi__err("0-pixel image", "Corrupt PNG");
-        if (!pal_img_n) {
-          s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
-          if ((1 << 30) / s->img_x / s->img_n < s->img_y)
-            return stbi__err("too large", "Image too large to decode");
-          if (scan == STBI__SCAN_header)
-            return 1;
-        } else {
-          // if paletted, then pal_n is our final components, and
-          // img_n is # components to decompress/filter.
-          s->img_n = 1;
-          if ((1 << 30) / s->img_x / 4 < s->img_y)
-            return stbi__err("too large", "Corrupt PNG");
-          // if SCAN_header, have to scan to see if we have a tRNS
-        }
-        break;
-      }
-
-      case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {
-        if (first)
-          return stbi__err("first not IHDR", "Corrupt PNG");
-        if (c.length > 256 * 3)
-          return stbi__err("invalid PLTE", "Corrupt PNG");
-        pal_len = c.length / 3;
-        if (pal_len * 3 != c.length)
-          return stbi__err("invalid PLTE", "Corrupt PNG");
-        for (i = 0; i < pal_len; ++i) {
-          palette[i * 4 + 0] = stbi__get8(s);
-          palette[i * 4 + 1] = stbi__get8(s);
-          palette[i * 4 + 2] = stbi__get8(s);
-          palette[i * 4 + 3] = 255;
-        }
-        break;
-      }
-
-      case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {
-        if (first)
-          return stbi__err("first not IHDR", "Corrupt PNG");
-        if (z->idata)
-          return stbi__err("tRNS after IDAT", "Corrupt PNG");
-        if (pal_img_n) {
-          if (scan == STBI__SCAN_header) {
-            s->img_n = 4;
-            return 1;
-          }
-          if (pal_len == 0)
-            return stbi__err("tRNS before PLTE", "Corrupt PNG");
-          if (c.length > pal_len)
-            return stbi__err("bad tRNS len", "Corrupt PNG");
-          pal_img_n = 4;
-          for (i = 0; i < c.length; ++i)
-            palette[i * 4 + 3] = stbi__get8(s);
-        } else {
-          if (!(s->img_n & 1))
-            return stbi__err("tRNS with alpha", "Corrupt PNG");
-          if (c.length != (stbi__uint32)s->img_n * 2)
-            return stbi__err("bad tRNS len", "Corrupt PNG");
-          has_trans = 1;
-          if (z->depth == 16) {
-            for (k = 0; k < s->img_n; ++k)
-              tc16[k] = (stbi__uint16)stbi__get16be(s);  // copy the values as-is
-          } else {
-            for (k = 0; k < s->img_n; ++k)
-              tc[k] = (stbi_uc)(stbi__get16be(s) & 255) *
-                      stbi__depth_scale_table[z->depth];  // non 8-bit images will be larger
-          }
-        }
-        break;
-      }
-
-      case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {
-        if (first)
-          return stbi__err("first not IHDR", "Corrupt PNG");
-        if (pal_img_n && !pal_len)
-          return stbi__err("no PLTE", "Corrupt PNG");
-        if (scan == STBI__SCAN_header) {
-          s->img_n = pal_img_n;
-          return 1;
-        }
-        if ((int)(ioff + c.length) < (int)ioff)
-          return 0;
-        if (ioff + c.length > idata_limit) {
-          stbi__uint32 idata_limit_old = idata_limit;
-          stbi_uc* p;
-          if (idata_limit == 0)
-            idata_limit = c.length > 4096 ? c.length : 4096;
-          while (ioff + c.length > idata_limit)
-            idata_limit *= 2;
-          STBI_NOTUSED(idata_limit_old);
-          p = (stbi_uc*)STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit);
-          if (p == NULL)
-            return stbi__err("outofmem", "Out of memory");
-          z->idata = p;
-        }
-        if (!stbi__getn(s, z->idata + ioff, c.length))
-          return stbi__err("outofdata", "Corrupt PNG");
-        ioff += c.length;
-        break;
-      }
-
-      case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {
-        stbi__uint32 raw_len, bpl;
-        if (first)
-          return stbi__err("first not IHDR", "Corrupt PNG");
-        if (scan != STBI__SCAN_load)
-          return 1;
-        if (z->idata == NULL)
-          return stbi__err("no IDAT", "Corrupt PNG");
-        // initial guess for decoded data size to avoid unnecessary reallocs
-        bpl = (s->img_x * z->depth + 7) / 8;  // bytes per line, per component
-        raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
-        z->expanded = (stbi_uc*)stbi_zlib_decode_malloc_guesssize_headerflag(
-            (char*)z->idata, ioff, raw_len, (int*)&raw_len, !is_iphone);
-        if (z->expanded == NULL)
-          return 0;  // zlib should set error
-        STBI_FREE(z->idata);
-        z->idata = NULL;
-        if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
-          s->img_out_n = s->img_n + 1;
-        else
-          s->img_out_n = s->img_n;
-        if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color,
-                                    interlace))
-          return 0;
-        if (has_trans) {
-          if (z->depth == 16) {
-            if (!stbi__compute_transparency16(z, tc16, s->img_out_n))
-              return 0;
-          } else {
-            if (!stbi__compute_transparency(z, tc, s->img_out_n))
-              return 0;
-          }
-        }
-        if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
-          stbi__de_iphone(z);
-        if (pal_img_n) {
-          // pal_img_n == 3 or 4
-          s->img_n = pal_img_n;  // record the actual colors we had
-          s->img_out_n = pal_img_n;
-          if (req_comp >= 3)
-            s->img_out_n = req_comp;
-          if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
-            return 0;
-        } else if (has_trans) {
-          // non-paletted image with tRNS -> source image has (constant) alpha
-          ++s->img_n;
-        }
-        STBI_FREE(z->expanded);
-        z->expanded = NULL;
-        // end of PNG chunk, read and skip CRC
-        stbi__get32be(s);
-        return 1;
-      }
-
-      default:
-        // if critical, fail
-        if (first)
-          return stbi__err("first not IHDR", "Corrupt PNG");
-        if ((c.type & (1 << 29)) == 0) {
-#ifndef STBI_NO_FAILURE_STRINGS
-          // not threadsafe
-          static char invalid_chunk[] = "XXXX PNG chunk not known";
-          invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
-          invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
-          invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
-          invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
-#endif
-          return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
-        }
-        stbi__skip(s, c.length);
-        break;
-    }
-    // end of PNG chunk, read and skip CRC
-    stbi__get32be(s);
-  }
-}
-
-static void* stbi__do_png(stbi__png* p,
-                          int* x,
-                          int* y,
-                          int* n,
-                          int req_comp,
-                          stbi__result_info* ri) {
-  void* result = NULL;
-  if (req_comp < 0 || req_comp > 4)
-    return stbi__errpuc("bad req_comp", "Internal error");
-  if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
-    if (p->depth <= 8)
-      ri->bits_per_channel = 8;
-    else if (p->depth == 16)
-      ri->bits_per_channel = 16;
-    else
-      return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
-    result = p->out;
-    p->out = NULL;
-    if (req_comp && req_comp != p->s->img_out_n) {
-      if (ri->bits_per_channel == 8)
-        result = stbi__convert_format((unsigned char*)result, p->s->img_out_n, req_comp,
-                                      p->s->img_x, p->s->img_y);
-      else
-        result = stbi__convert_format16((stbi__uint16*)result, p->s->img_out_n, req_comp,
-                                        p->s->img_x, p->s->img_y);
-      p->s->img_out_n = req_comp;
-      if (result == NULL)
-        return result;
-    }
-    *x = p->s->img_x;
-    *y = p->s->img_y;
-    if (n)
-      *n = p->s->img_n;
-  }
-  STBI_FREE(p->out);
-  p->out = NULL;
-  STBI_FREE(p->expanded);
-  p->expanded = NULL;
-  STBI_FREE(p->idata);
-  p->idata = NULL;
-
-  return result;
-}
-
-static void* stbi__png_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri) {
-  stbi__png p;
-  p.s = s;
-  return stbi__do_png(&p, x, y, comp, req_comp, ri);
-}
-
-static int stbi__png_test(stbi__context* s) {
-  int r;
-  r = stbi__check_png_header(s);
-  stbi__rewind(s);
-  return r;
-}
-
-static int stbi__png_info_raw(stbi__png* p, int* x, int* y, int* comp) {
-  if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
-    stbi__rewind(p->s);
-    return 0;
-  }
-  if (x)
-    *x = p->s->img_x;
-  if (y)
-    *y = p->s->img_y;
-  if (comp)
-    *comp = p->s->img_n;
-  return 1;
-}
-
-static int stbi__png_info(stbi__context* s, int* x, int* y, int* comp) {
-  stbi__png p;
-  p.s = s;
-  return stbi__png_info_raw(&p, x, y, comp);
-}
-
-static int stbi__png_is16(stbi__context* s) {
-  stbi__png p;
-  p.s = s;
-  if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
-    return 0;
-  if (p.depth != 16) {
-    stbi__rewind(p.s);
-    return 0;
-  }
-  return 1;
-}
-#endif
-
-// Microsoft/Windows BMP image
-
-#ifndef STBI_NO_BMP
-static int stbi__bmp_test_raw(stbi__context* s) {
-  int r;
-  int sz;
-  if (stbi__get8(s) != 'B')
-    return 0;
-  if (stbi__get8(s) != 'M')
-    return 0;
-  stbi__get32le(s);  // discard filesize
-  stbi__get16le(s);  // discard reserved
-  stbi__get16le(s);  // discard reserved
-  stbi__get32le(s);  // discard data offset
-  sz = stbi__get32le(s);
-  r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
-  return r;
-}
-
-static int stbi__bmp_test(stbi__context* s) {
-  int r = stbi__bmp_test_raw(s);
-  stbi__rewind(s);
-  return r;
-}
-
-// returns 0..31 for the highest set bit
-static int stbi__high_bit(unsigned int z) {
-  int n = 0;
-  if (z == 0)
-    return -1;
-  if (z >= 0x10000) {
-    n += 16;
-    z >>= 16;
-  }
-  if (z >= 0x00100) {
-    n += 8;
-    z >>= 8;
-  }
-  if (z >= 0x00010) {
-    n += 4;
-    z >>= 4;
-  }
-  if (z >= 0x00004) {
-    n += 2;
-    z >>= 2;
-  }
-  if (z >= 0x00002) {
-    n += 1; /* >>=  1;*/
-  }
-  return n;
-}
-
-static int stbi__bitcount(unsigned int a) {
-  a = (a & 0x55555555) + ((a >> 1) & 0x55555555);  // max 2
-  a = (a & 0x33333333) + ((a >> 2) & 0x33333333);  // max 4
-  a = (a + (a >> 4)) & 0x0f0f0f0f;                 // max 8 per 4, now 8 bits
-  a = (a + (a >> 8));                              // max 16 per 8 bits
-  a = (a + (a >> 16));                             // max 32 per 8 bits
-  return a & 0xff;
-}
-
-// extract an arbitrarily-aligned N-bit value (N=bits)
-// from v, and then make it 8-bits long and fractionally
-// extend it to full full range.
-static int stbi__shiftsigned(unsigned int v, int shift, int bits) {
-  static unsigned int mul_table[9] = {
-      0,
-      0xff /*0b11111111*/,
-      0x55 /*0b01010101*/,
-      0x49 /*0b01001001*/,
-      0x11 /*0b00010001*/,
-      0x21 /*0b00100001*/,
-      0x41 /*0b01000001*/,
-      0x81 /*0b10000001*/,
-      0x01 /*0b00000001*/,
-  };
-  static unsigned int shift_table[9] = {
-      0, 0, 0, 1, 0, 2, 4, 6, 0,
-  };
-  if (shift < 0)
-    v <<= -shift;
-  else
-    v >>= shift;
-  STBI_ASSERT(v < 256);
-  v >>= (8 - bits);
-  STBI_ASSERT(bits >= 0 && bits <= 8);
-  return (int)((unsigned)v * mul_table[bits]) >> shift_table[bits];
-}
-
-typedef struct {
-  int bpp, offset, hsz;
-  unsigned int mr, mg, mb, ma, all_a;
-  int extra_read;
-} stbi__bmp_data;
-
-static int stbi__bmp_set_mask_defaults(stbi__bmp_data* info, int compress) {
-  // BI_BITFIELDS specifies masks explicitly, don't override
-  if (compress == 3)
-    return 1;
-
-  if (compress == 0) {
-    if (info->bpp == 16) {
-      info->mr = 31u << 10;
-      info->mg = 31u << 5;
-      info->mb = 31u << 0;
-    } else if (info->bpp == 32) {
-      info->mr = 0xffu << 16;
-      info->mg = 0xffu << 8;
-      info->mb = 0xffu << 0;
-      info->ma = 0xffu << 24;
-      info->all_a = 0;  // if all_a is 0 at end, then we loaded alpha channel but it was all 0
-    } else {
-      // otherwise, use defaults, which is all-0
-      info->mr = info->mg = info->mb = info->ma = 0;
-    }
-    return 1;
-  }
-  return 0;  // error
-}
-
-static void* stbi__bmp_parse_header(stbi__context* s, stbi__bmp_data* info) {
-  int hsz;
-  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M')
-    return stbi__errpuc("not BMP", "Corrupt BMP");
-  stbi__get32le(s);  // discard filesize
-  stbi__get16le(s);  // discard reserved
-  stbi__get16le(s);  // discard reserved
-  info->offset = stbi__get32le(s);
-  info->hsz = hsz = stbi__get32le(s);
-  info->mr = info->mg = info->mb = info->ma = 0;
-  info->extra_read = 14;
-
-  if (info->offset < 0)
-    return stbi__errpuc("bad BMP", "bad BMP");
-
-  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124)
-    return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
-  if (hsz == 12) {
-    s->img_x = stbi__get16le(s);
-    s->img_y = stbi__get16le(s);
-  } else {
-    s->img_x = stbi__get32le(s);
-    s->img_y = stbi__get32le(s);
-  }
-  if (stbi__get16le(s) != 1)
-    return stbi__errpuc("bad BMP", "bad BMP");
-  info->bpp = stbi__get16le(s);
-  if (hsz != 12) {
-    int compress = stbi__get32le(s);
-    if (compress == 1 || compress == 2)
-      return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
-    if (compress >= 4)
-      return stbi__errpuc(
-          "BMP JPEG/PNG",
-          "BMP type not supported: unsupported compression");  // this includes PNG/JPEG modes
-    if (compress == 3 && info->bpp != 16 && info->bpp != 32)
-      return stbi__errpuc("bad BMP", "bad BMP");  // bitfields requires 16 or 32 bits/pixel
-    stbi__get32le(s);                             // discard sizeof
-    stbi__get32le(s);                             // discard hres
-    stbi__get32le(s);                             // discard vres
-    stbi__get32le(s);                             // discard colorsused
-    stbi__get32le(s);                             // discard max important
-    if (hsz == 40 || hsz == 56) {
-      if (hsz == 56) {
-        stbi__get32le(s);
-        stbi__get32le(s);
-        stbi__get32le(s);
-        stbi__get32le(s);
-      }
-      if (info->bpp == 16 || info->bpp == 32) {
-        if (compress == 0) {
-          stbi__bmp_set_mask_defaults(info, compress);
-        } else if (compress == 3) {
-          info->mr = stbi__get32le(s);
-          info->mg = stbi__get32le(s);
-          info->mb = stbi__get32le(s);
-          info->extra_read += 12;
-          // not documented, but generated by photoshop and handled by mspaint
-          if (info->mr == info->mg && info->mg == info->mb) {
-            // ?!?!?
-            return stbi__errpuc("bad BMP", "bad BMP");
-          }
-        } else
-          return stbi__errpuc("bad BMP", "bad BMP");
-      }
-    } else {
-      // V4/V5 header
-      int i;
-      if (hsz != 108 && hsz != 124)
-        return stbi__errpuc("bad BMP", "bad BMP");
-      info->mr = stbi__get32le(s);
-      info->mg = stbi__get32le(s);
-      info->mb = stbi__get32le(s);
-      info->ma = stbi__get32le(s);
-      if (compress != 3)  // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
-        stbi__bmp_set_mask_defaults(info, compress);
-      stbi__get32le(s);  // discard color space
-      for (i = 0; i < 12; ++i)
-        stbi__get32le(s);  // discard color space parameters
-      if (hsz == 124) {
-        stbi__get32le(s);  // discard rendering intent
-        stbi__get32le(s);  // discard offset of profile data
-        stbi__get32le(s);  // discard size of profile data
-        stbi__get32le(s);  // discard reserved
-      }
-    }
-  }
-  return (void*)1;
-}
-
-static void* stbi__bmp_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri) {
-  stbi_uc* out;
-  unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;
-  stbi_uc pal[256][4];
-  int psize = 0, i, j, width;
-  int flip_vertically, pad, target;
-  stbi__bmp_data info;
-  STBI_NOTUSED(ri);
-
-  info.all_a = 255;
-  if (stbi__bmp_parse_header(s, &info) == NULL)
-    return NULL;  // error code already set
-
-  flip_vertically = ((int)s->img_y) > 0;
-  s->img_y = abs((int)s->img_y);
-
-  if (s->img_y > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-  if (s->img_x > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-
-  mr = info.mr;
-  mg = info.mg;
-  mb = info.mb;
-  ma = info.ma;
-  all_a = info.all_a;
-
-  if (info.hsz == 12) {
-    if (info.bpp < 24)
-      psize = (info.offset - info.extra_read - 24) / 3;
-  } else {
-    if (info.bpp < 16)
-      psize = (info.offset - info.extra_read - info.hsz) >> 2;
-  }
-  if (psize == 0) {
-    if (info.offset != s->callback_already_read + (s->img_buffer - s->img_buffer_original)) {
-      return stbi__errpuc("bad offset", "Corrupt BMP");
-    }
-  }
-
-  if (info.bpp == 24 && ma == 0xff000000)
-    s->img_n = 3;
-  else
-    s->img_n = ma ? 4 : 3;
-  if (req_comp && req_comp >= 3)  // we can directly decode 3 or 4
-    target = req_comp;
-  else
-    target = s->img_n;  // if they want monochrome, we'll post-convert
-
-  // sanity-check size
-  if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
-    return stbi__errpuc("too large", "Corrupt BMP");
-
-  out = (stbi_uc*)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
-  if (!out)
-    return stbi__errpuc("outofmem", "Out of memory");
-  if (info.bpp < 16) {
-    int z = 0;
-    if (psize == 0 || psize > 256) {
-      STBI_FREE(out);
-      return stbi__errpuc("invalid", "Corrupt BMP");
-    }
-    for (i = 0; i < psize; ++i) {
-      pal[i][2] = stbi__get8(s);
-      pal[i][1] = stbi__get8(s);
-      pal[i][0] = stbi__get8(s);
-      if (info.hsz != 12)
-        stbi__get8(s);
-      pal[i][3] = 255;
-    }
-    stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
-    if (info.bpp == 1)
-      width = (s->img_x + 7) >> 3;
-    else if (info.bpp == 4)
-      width = (s->img_x + 1) >> 1;
-    else if (info.bpp == 8)
-      width = s->img_x;
-    else {
-      STBI_FREE(out);
-      return stbi__errpuc("bad bpp", "Corrupt BMP");
-    }
-    pad = (-width) & 3;
-    if (info.bpp == 1) {
-      for (j = 0; j < (int)s->img_y; ++j) {
-        int bit_offset = 7, v = stbi__get8(s);
-        for (i = 0; i < (int)s->img_x; ++i) {
-          int color = (v >> bit_offset) & 0x1;
-          out[z++] = pal[color][0];
-          out[z++] = pal[color][1];
-          out[z++] = pal[color][2];
-          if (target == 4)
-            out[z++] = 255;
-          if (i + 1 == (int)s->img_x)
-            break;
-          if ((--bit_offset) < 0) {
-            bit_offset = 7;
-            v = stbi__get8(s);
-          }
-        }
-        stbi__skip(s, pad);
-      }
-    } else {
-      for (j = 0; j < (int)s->img_y; ++j) {
-        for (i = 0; i < (int)s->img_x; i += 2) {
-          int v = stbi__get8(s), v2 = 0;
-          if (info.bpp == 4) {
-            v2 = v & 15;
-            v >>= 4;
-          }
-          out[z++] = pal[v][0];
-          out[z++] = pal[v][1];
-          out[z++] = pal[v][2];
-          if (target == 4)
-            out[z++] = 255;
-          if (i + 1 == (int)s->img_x)
-            break;
-          v = (info.bpp == 8) ? stbi__get8(s) : v2;
-          out[z++] = pal[v][0];
-          out[z++] = pal[v][1];
-          out[z++] = pal[v][2];
-          if (target == 4)
-            out[z++] = 255;
-        }
-        stbi__skip(s, pad);
-      }
-    }
-  } else {
-    int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0,
-        acount = 0;
-    int z = 0;
-    int easy = 0;
-    stbi__skip(s, info.offset - info.extra_read - info.hsz);
-    if (info.bpp == 24)
-      width = 3 * s->img_x;
-    else if (info.bpp == 16)
-      width = 2 * s->img_x;
-    else /* bpp = 32 and pad = 0 */
-      width = 0;
-    pad = (-width) & 3;
-    if (info.bpp == 24) {
-      easy = 1;
-    } else if (info.bpp == 32) {
-      if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
-        easy = 2;
-    }
-    if (!easy) {
-      if (!mr || !mg || !mb) {
-        STBI_FREE(out);
-        return stbi__errpuc("bad masks", "Corrupt BMP");
-      }
-      // right shift amt to put high bit in position #7
-      rshift = stbi__high_bit(mr) - 7;
-      rcount = stbi__bitcount(mr);
-      gshift = stbi__high_bit(mg) - 7;
-      gcount = stbi__bitcount(mg);
-      bshift = stbi__high_bit(mb) - 7;
-      bcount = stbi__bitcount(mb);
-      ashift = stbi__high_bit(ma) - 7;
-      acount = stbi__bitcount(ma);
-      if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) {
-        STBI_FREE(out);
-        return stbi__errpuc("bad masks", "Corrupt BMP");
-      }
-    }
-    for (j = 0; j < (int)s->img_y; ++j) {
-      if (easy) {
-        for (i = 0; i < (int)s->img_x; ++i) {
-          unsigned char a;
-          out[z + 2] = stbi__get8(s);
-          out[z + 1] = stbi__get8(s);
-          out[z + 0] = stbi__get8(s);
-          z += 3;
-          a = (easy == 2 ? stbi__get8(s) : 255);
-          all_a |= a;
-          if (target == 4)
-            out[z++] = a;
-        }
-      } else {
-        int bpp = info.bpp;
-        for (i = 0; i < (int)s->img_x; ++i) {
-          stbi__uint32 v = (bpp == 16 ? (stbi__uint32)stbi__get16le(s) : stbi__get32le(s));
-          unsigned int a;
-          out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
-          out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
-          out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
-          a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
-          all_a |= a;
-          if (target == 4)
-            out[z++] = STBI__BYTECAST(a);
-        }
-      }
-      stbi__skip(s, pad);
-    }
-  }
-
-  // if alpha channel is all 0s, replace with all 255s
-  if (target == 4 && all_a == 0)
-    for (i = 4 * s->img_x * s->img_y - 1; i >= 0; i -= 4)
-      out[i] = 255;
-
-  if (flip_vertically) {
-    stbi_uc t;
-    for (j = 0; j < (int)s->img_y >> 1; ++j) {
-      stbi_uc* p1 = out + j * s->img_x * target;
-      stbi_uc* p2 = out + (s->img_y - 1 - j) * s->img_x * target;
-      for (i = 0; i < (int)s->img_x * target; ++i) {
-        t = p1[i];
-        p1[i] = p2[i];
-        p2[i] = t;
-      }
-    }
-  }
-
-  if (req_comp && req_comp != target) {
-    out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
-    if (out == NULL)
-      return out;  // stbi__convert_format frees input on failure
-  }
-
-  *x = s->img_x;
-  *y = s->img_y;
-  if (comp)
-    *comp = s->img_n;
-  return out;
-}
-#endif
-
-// Targa Truevision - TGA
-// by Jonathan Dummer
-#ifndef STBI_NO_TGA
-// returns STBI_rgb or whatever, 0 on error
-static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) {
-  // only RGB or RGBA (incl. 16bit) or grey allowed
-  if (is_rgb16)
-    *is_rgb16 = 0;
-  switch (bits_per_pixel) {
-    case 8:
-      return STBI_grey;
-    case 16:
-      if (is_grey)
-        return STBI_grey_alpha;
-      // fallthrough
-    case 15:
-      if (is_rgb16)
-        *is_rgb16 = 1;
-      return STBI_rgb;
-    case 24:  // fallthrough
-    case 32:
-      return bits_per_pixel / 8;
-    default:
-      return 0;
-  }
-}
-
-static int stbi__tga_info(stbi__context* s, int* x, int* y, int* comp) {
-  int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
-  int sz, tga_colormap_type;
-  stbi__get8(s);                      // discard Offset
-  tga_colormap_type = stbi__get8(s);  // colormap type
-  if (tga_colormap_type > 1) {
-    stbi__rewind(s);
-    return 0;  // only RGB or indexed allowed
-  }
-  tga_image_type = stbi__get8(s);  // image type
-  if (tga_colormap_type == 1) {    // colormapped (paletted) image
-    if (tga_image_type != 1 && tga_image_type != 9) {
-      stbi__rewind(s);
-      return 0;
-    }
-    stbi__skip(s, 4);    // skip index of first colormap entry and number of entries
-    sz = stbi__get8(s);  //   check bits per palette color entry
-    if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
-      stbi__rewind(s);
-      return 0;
-    }
-    stbi__skip(s, 4);  // skip image x and y origin
-    tga_colormap_bpp = sz;
-  } else {  // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
-    if ((tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) &&
-        (tga_image_type != 11)) {
-      stbi__rewind(s);
-      return 0;  // only RGB or grey allowed, +/- RLE
-    }
-    stbi__skip(s, 9);  // skip colormap specification and image x/y origin
-    tga_colormap_bpp = 0;
-  }
-  tga_w = stbi__get16le(s);
-  if (tga_w < 1) {
-    stbi__rewind(s);
-    return 0;  // test width
-  }
-  tga_h = stbi__get16le(s);
-  if (tga_h < 1) {
-    stbi__rewind(s);
-    return 0;  // test height
-  }
-  tga_bits_per_pixel = stbi__get8(s);  // bits per pixel
-  stbi__get8(s);                       // ignore alpha bits
-  if (tga_colormap_bpp != 0) {
-    if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
-      // when using a colormap, tga_bits_per_pixel is the size of the indexes
-      // I don't think anything but 8 or 16bit indexes makes sense
-      stbi__rewind(s);
-      return 0;
-    }
-    tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
-  } else {
-    tga_comp = stbi__tga_get_comp(tga_bits_per_pixel,
-                                  (tga_image_type == 3) || (tga_image_type == 11), NULL);
-  }
-  if (!tga_comp) {
-    stbi__rewind(s);
-    return 0;
-  }
-  if (x)
-    *x = tga_w;
-  if (y)
-    *y = tga_h;
-  if (comp)
-    *comp = tga_comp;
-  return 1;  // seems to have passed everything
-}
-
-static int stbi__tga_test(stbi__context* s) {
-  int res = 0;
-  int sz, tga_color_type;
-  stbi__get8(s);                   //   discard Offset
-  tga_color_type = stbi__get8(s);  //   color type
-  if (tga_color_type > 1)
-    goto errorEnd;            //   only RGB or indexed allowed
-  sz = stbi__get8(s);         //   image type
-  if (tga_color_type == 1) {  // colormapped (paletted) image
-    if (sz != 1 && sz != 9)
-      goto errorEnd;     // colortype 1 demands image type 1 or 9
-    stbi__skip(s, 4);    // skip index of first colormap entry and number of entries
-    sz = stbi__get8(s);  //   check bits per palette color entry
-    if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))
-      goto errorEnd;
-    stbi__skip(s, 4);  // skip image x and y origin
-  } else {             // "normal" image w/o colormap
-    if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11))
-      goto errorEnd;   // only RGB or grey allowed, +/- RLE
-    stbi__skip(s, 9);  // skip colormap specification and image x/y origin
-  }
-  if (stbi__get16le(s) < 1)
-    goto errorEnd;  //   test width
-  if (stbi__get16le(s) < 1)
-    goto errorEnd;     //   test height
-  sz = stbi__get8(s);  //   bits per pixel
-  if ((tga_color_type == 1) && (sz != 8) && (sz != 16))
-    goto errorEnd;  // for colormapped images, bpp is size of an index
-  if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32))
-    goto errorEnd;
-
-  res = 1;  // if we got this far, everything's good and we can return 1 instead of 0
-
-errorEnd:
-  stbi__rewind(s);
-  return res;
-}
-
-// read 16bit value and convert to 24bit RGB
-static void stbi__tga_read_rgb16(stbi__context* s, stbi_uc* out) {
-  stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
-  stbi__uint16 fiveBitMask = 31;
-  // we have 3 channels with 5bits each
-  int r = (px >> 10) & fiveBitMask;
-  int g = (px >> 5) & fiveBitMask;
-  int b = px & fiveBitMask;
-  // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
-  out[0] = (stbi_uc)((r * 255) / 31);
-  out[1] = (stbi_uc)((g * 255) / 31);
-  out[2] = (stbi_uc)((b * 255) / 31);
-
-  // some people claim that the most significant bit might be used for alpha
-  // (possibly if an alpha-bit is set in the "image descriptor byte")
-  // but that only made 16bit test images completely translucent..
-  // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
-}
-
-static void* stbi__tga_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri) {
-  //   read in the TGA header stuff
-  int tga_offset = stbi__get8(s);
-  int tga_indexed = stbi__get8(s);
-  int tga_image_type = stbi__get8(s);
-  int tga_is_RLE = 0;
-  int tga_palette_start = stbi__get16le(s);
-  int tga_palette_len = stbi__get16le(s);
-  int tga_palette_bits = stbi__get8(s);
-  int tga_x_origin = stbi__get16le(s);
-  int tga_y_origin = stbi__get16le(s);
-  int tga_width = stbi__get16le(s);
-  int tga_height = stbi__get16le(s);
-  int tga_bits_per_pixel = stbi__get8(s);
-  int tga_comp, tga_rgb16 = 0;
-  int tga_inverted = stbi__get8(s);
-  // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
-  //   image data
-  unsigned char* tga_data;
-  unsigned char* tga_palette = NULL;
-  int i, j;
-  unsigned char raw_data[4] = {0};
-  int RLE_count = 0;
-  int RLE_repeating = 0;
-  int read_next_pixel = 1;
-  STBI_NOTUSED(ri);
-  STBI_NOTUSED(tga_x_origin);  // @TODO
-  STBI_NOTUSED(tga_y_origin);  // @TODO
-
-  if (tga_height > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-  if (tga_width > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-
-  //   do a tiny bit of precessing
-  if (tga_image_type >= 8) {
-    tga_image_type -= 8;
-    tga_is_RLE = 1;
-  }
-  tga_inverted = 1 - ((tga_inverted >> 5) & 1);
-
-  //   If I'm paletted, then I'll use the number of bits from the palette
-  if (tga_indexed)
-    tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
-  else
-    tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
-
-  if (!tga_comp)  // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
-    return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
-
-  //   tga info
-  *x = tga_width;
-  *y = tga_height;
-  if (comp)
-    *comp = tga_comp;
-
-  if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
-    return stbi__errpuc("too large", "Corrupt TGA");
-
-  tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
-  if (!tga_data)
-    return stbi__errpuc("outofmem", "Out of memory");
-
-  // skip to the data's starting position (offset usually = 0)
-  stbi__skip(s, tga_offset);
-
-  if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {
-    for (i = 0; i < tga_height; ++i) {
-      int row = tga_inverted ? tga_height - i - 1 : i;
-      stbi_uc* tga_row = tga_data + row * tga_width * tga_comp;
-      stbi__getn(s, tga_row, tga_width * tga_comp);
-    }
-  } else {
-    //   do I need to load a palette?
-    if (tga_indexed) {
-      if (tga_palette_len == 0) { /* you have to have at least one entry! */
-        STBI_FREE(tga_data);
-        return stbi__errpuc("bad palette", "Corrupt TGA");
-      }
-
-      //   any data to skip? (offset usually = 0)
-      stbi__skip(s, tga_palette_start);
-      //   load the palette
-      tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
-      if (!tga_palette) {
-        STBI_FREE(tga_data);
-        return stbi__errpuc("outofmem", "Out of memory");
-      }
-      if (tga_rgb16) {
-        stbi_uc* pal_entry = tga_palette;
-        STBI_ASSERT(tga_comp == STBI_rgb);
-        for (i = 0; i < tga_palette_len; ++i) {
-          stbi__tga_read_rgb16(s, pal_entry);
-          pal_entry += tga_comp;
-        }
-      } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
-        STBI_FREE(tga_data);
-        STBI_FREE(tga_palette);
-        return stbi__errpuc("bad palette", "Corrupt TGA");
-      }
-    }
-    //   load the data
-    for (i = 0; i < tga_width * tga_height; ++i) {
-      //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
-      if (tga_is_RLE) {
-        if (RLE_count == 0) {
-          //   yep, get the next byte as a RLE command
-          int RLE_cmd = stbi__get8(s);
-          RLE_count = 1 + (RLE_cmd & 127);
-          RLE_repeating = RLE_cmd >> 7;
-          read_next_pixel = 1;
-        } else if (!RLE_repeating) {
-          read_next_pixel = 1;
-        }
-      } else {
-        read_next_pixel = 1;
-      }
-      //   OK, if I need to read a pixel, do it now
-      if (read_next_pixel) {
-        //   load however much data we did have
-        if (tga_indexed) {
-          // read in index, then perform the lookup
-          int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
-          if (pal_idx >= tga_palette_len) {
-            // invalid index
-            pal_idx = 0;
-          }
-          pal_idx *= tga_comp;
-          for (j = 0; j < tga_comp; ++j) {
-            raw_data[j] = tga_palette[pal_idx + j];
-          }
-        } else if (tga_rgb16) {
-          STBI_ASSERT(tga_comp == STBI_rgb);
-          stbi__tga_read_rgb16(s, raw_data);
-        } else {
-          //   read in the data raw
-          for (j = 0; j < tga_comp; ++j) {
-            raw_data[j] = stbi__get8(s);
-          }
-        }
-        //   clear the reading flag for the next pixel
-        read_next_pixel = 0;
-      }  // end of reading a pixel
-
-      // copy data
-      for (j = 0; j < tga_comp; ++j)
-        tga_data[i * tga_comp + j] = raw_data[j];
-
-      //   in case we're in RLE mode, keep counting down
-      --RLE_count;
-    }
-    //   do I need to invert the image?
-    if (tga_inverted) {
-      for (j = 0; j * 2 < tga_height; ++j) {
-        int index1 = j * tga_width * tga_comp;
-        int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
-        for (i = tga_width * tga_comp; i > 0; --i) {
-          unsigned char temp = tga_data[index1];
-          tga_data[index1] = tga_data[index2];
-          tga_data[index2] = temp;
-          ++index1;
-          ++index2;
-        }
-      }
-    }
-    //   clear my palette, if I had one
-    if (tga_palette != NULL) {
-      STBI_FREE(tga_palette);
-    }
-  }
-
-  // swap RGB - if the source data was RGB16, it already is in the right order
-  if (tga_comp >= 3 && !tga_rgb16) {
-    unsigned char* tga_pixel = tga_data;
-    for (i = 0; i < tga_width * tga_height; ++i) {
-      unsigned char temp = tga_pixel[0];
-      tga_pixel[0] = tga_pixel[2];
-      tga_pixel[2] = temp;
-      tga_pixel += tga_comp;
-    }
-  }
-
-  // convert to target component count
-  if (req_comp && req_comp != tga_comp)
-    tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
-
-  //   the things I do to get rid of an error message, and yet keep
-  //   Microsoft's C compilers happy... [8^(
-  tga_palette_start = tga_palette_len = tga_palette_bits = tga_x_origin = tga_y_origin = 0;
-  STBI_NOTUSED(tga_palette_start);
-  //   OK, done
-  return tga_data;
-}
-#endif
-
-// *************************************************************************************************
-// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
-
-#ifndef STBI_NO_PSD
-static int stbi__psd_test(stbi__context* s) {
-  int r = (stbi__get32be(s) == 0x38425053);
-  stbi__rewind(s);
-  return r;
-}
-
-static int stbi__psd_decode_rle(stbi__context* s, stbi_uc* p, int pixelCount) {
-  int count, nleft, len;
-
-  count = 0;
-  while ((nleft = pixelCount - count) > 0) {
-    len = stbi__get8(s);
-    if (len == 128) {
-      // No-op.
-    } else if (len < 128) {
-      // Copy next len+1 bytes literally.
-      len++;
-      if (len > nleft)
-        return 0;  // corrupt data
-      count += len;
-      while (len) {
-        *p = stbi__get8(s);
-        p += 4;
-        len--;
-      }
-    } else if (len > 128) {
-      stbi_uc val;
-      // Next -len+1 bytes in the dest are replicated from next source byte.
-      // (Interpret len as a negative 8-bit int.)
-      len = 257 - len;
-      if (len > nleft)
-        return 0;  // corrupt data
-      val = stbi__get8(s);
-      count += len;
-      while (len) {
-        *p = val;
-        p += 4;
-        len--;
-      }
-    }
-  }
-
-  return 1;
-}
-
-static void* stbi__psd_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri,
-                            int bpc) {
-  int pixelCount;
-  int channelCount, compression;
-  int channel, i;
-  int bitdepth;
-  int w, h;
-  stbi_uc* out;
-  STBI_NOTUSED(ri);
-
-  // Check identifier
-  if (stbi__get32be(s) != 0x38425053)  // "8BPS"
-    return stbi__errpuc("not PSD", "Corrupt PSD image");
-
-  // Check file type version.
-  if (stbi__get16be(s) != 1)
-    return stbi__errpuc("wrong version", "Unsupported version of PSD image");
-
-  // Skip 6 reserved bytes.
-  stbi__skip(s, 6);
-
-  // Read the number of channels (R, G, B, A, etc).
-  channelCount = stbi__get16be(s);
-  if (channelCount < 0 || channelCount > 16)
-    return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
-
-  // Read the rows and columns of the image.
-  h = stbi__get32be(s);
-  w = stbi__get32be(s);
-
-  if (h > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-  if (w > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-
-  // Make sure the depth is 8 bits.
-  bitdepth = stbi__get16be(s);
-  if (bitdepth != 8 && bitdepth != 16)
-    return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
-
-  // Make sure the color mode is RGB.
-  // Valid options are:
-  //   0: Bitmap
-  //   1: Grayscale
-  //   2: Indexed color
-  //   3: RGB color
-  //   4: CMYK color
-  //   7: Multichannel
-  //   8: Duotone
-  //   9: Lab color
-  if (stbi__get16be(s) != 3)
-    return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
-
-  // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
-  stbi__skip(s, stbi__get32be(s));
-
-  // Skip the image resources.  (resolution, pen tool paths, etc)
-  stbi__skip(s, stbi__get32be(s));
-
-  // Skip the reserved data.
-  stbi__skip(s, stbi__get32be(s));
-
-  // Find out if the data is compressed.
-  // Known values:
-  //   0: no compression
-  //   1: RLE compressed
-  compression = stbi__get16be(s);
-  if (compression > 1)
-    return stbi__errpuc("bad compression", "PSD has an unknown compression format");
-
-  // Check size
-  if (!stbi__mad3sizes_valid(4, w, h, 0))
-    return stbi__errpuc("too large", "Corrupt PSD");
-
-  // Create the destination image.
-
-  if (!compression && bitdepth == 16 && bpc == 16) {
-    out = (stbi_uc*)stbi__malloc_mad3(8, w, h, 0);
-    ri->bits_per_channel = 16;
-  } else
-    out = (stbi_uc*)stbi__malloc(4 * w * h);
-
-  if (!out)
-    return stbi__errpuc("outofmem", "Out of memory");
-  pixelCount = w * h;
-
-  // Initialize the data to zero.
-  // memset( out, 0, pixelCount * 4 );
-
-  // Finally, the image data.
-  if (compression) {
-    // RLE as used by .PSD and .TIFF
-    // Loop until you get the number of unpacked bytes you are expecting:
-    //     Read the next source byte into n.
-    //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
-    //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
-    //     Else if n is 128, noop.
-    // Endloop
-
-    // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
-    // which we're going to just skip.
-    stbi__skip(s, h * channelCount * 2);
-
-    // Read the RLE data by channel.
-    for (channel = 0; channel < 4; channel++) {
-      stbi_uc* p;
-
-      p = out + channel;
-      if (channel >= channelCount) {
-        // Fill this channel with default data.
-        for (i = 0; i < pixelCount; i++, p += 4)
-          *p = (channel == 3 ? 255 : 0);
-      } else {
-        // Read the RLE data.
-        if (!stbi__psd_decode_rle(s, p, pixelCount)) {
-          STBI_FREE(out);
-          return stbi__errpuc("corrupt", "bad RLE data");
-        }
-      }
-    }
-
-  } else {
-    // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
-    // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
-
-    // Read the data by channel.
-    for (channel = 0; channel < 4; channel++) {
-      if (channel >= channelCount) {
-        // Fill this channel with default data.
-        if (bitdepth == 16 && bpc == 16) {
-          stbi__uint16* q = ((stbi__uint16*)out) + channel;
-          stbi__uint16 val = channel == 3 ? 65535 : 0;
-          for (i = 0; i < pixelCount; i++, q += 4)
-            *q = val;
-        } else {
-          stbi_uc* p = out + channel;
-          stbi_uc val = channel == 3 ? 255 : 0;
-          for (i = 0; i < pixelCount; i++, p += 4)
-            *p = val;
-        }
-      } else {
-        if (ri->bits_per_channel == 16) {  // output bpc
-          stbi__uint16* q = ((stbi__uint16*)out) + channel;
-          for (i = 0; i < pixelCount; i++, q += 4)
-            *q = (stbi__uint16)stbi__get16be(s);
-        } else {
-          stbi_uc* p = out + channel;
-          if (bitdepth == 16) {  // input bpc
-            for (i = 0; i < pixelCount; i++, p += 4)
-              *p = (stbi_uc)(stbi__get16be(s) >> 8);
-          } else {
-            for (i = 0; i < pixelCount; i++, p += 4)
-              *p = stbi__get8(s);
-          }
-        }
-      }
-    }
-  }
-
-  // remove weird white matte from PSD
-  if (channelCount >= 4) {
-    if (ri->bits_per_channel == 16) {
-      for (i = 0; i < w * h; ++i) {
-        stbi__uint16* pixel = (stbi__uint16*)out + 4 * i;
-        if (pixel[3] != 0 && pixel[3] != 65535) {
-          float a = pixel[3] / 65535.0f;
-          float ra = 1.0f / a;
-          float inv_a = 65535.0f * (1 - ra);
-          pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);
-          pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);
-          pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);
-        }
-      }
-    } else {
-      for (i = 0; i < w * h; ++i) {
-        unsigned char* pixel = out + 4 * i;
-        if (pixel[3] != 0 && pixel[3] != 255) {
-          float a = pixel[3] / 255.0f;
-          float ra = 1.0f / a;
-          float inv_a = 255.0f * (1 - ra);
-          pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);
-          pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);
-          pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);
-        }
-      }
-    }
-  }
-
-  // convert to desired output format
-  if (req_comp && req_comp != 4) {
-    if (ri->bits_per_channel == 16)
-      out = (stbi_uc*)stbi__convert_format16((stbi__uint16*)out, 4, req_comp, w, h);
-    else
-      out = stbi__convert_format(out, 4, req_comp, w, h);
-    if (out == NULL)
-      return out;  // stbi__convert_format frees input on failure
-  }
-
-  if (comp)
-    *comp = 4;
-  *y = h;
-  *x = w;
-
-  return out;
-}
-#endif
-
-// *************************************************************************************************
-// Softimage PIC loader
-// by Tom Seddon
-//
-// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
-// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
-
-#ifndef STBI_NO_PIC
-static int stbi__pic_is4(stbi__context* s, const char* str) {
-  int i;
-  for (i = 0; i < 4; ++i)
-    if (stbi__get8(s) != (stbi_uc)str[i])
-      return 0;
-
-  return 1;
-}
-
-static int stbi__pic_test_core(stbi__context* s) {
-  int i;
-
-  if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))
-    return 0;
-
-  for (i = 0; i < 84; ++i)
-    stbi__get8(s);
-
-  if (!stbi__pic_is4(s, "PICT"))
-    return 0;
-
-  return 1;
-}
-
-typedef struct {
-  stbi_uc size, type, channel;
-} stbi__pic_packet;
-
-static stbi_uc* stbi__readval(stbi__context* s, int channel, stbi_uc* dest) {
-  int mask = 0x80, i;
-
-  for (i = 0; i < 4; ++i, mask >>= 1) {
-    if (channel & mask) {
-      if (stbi__at_eof(s))
-        return stbi__errpuc("bad file", "PIC file too short");
-      dest[i] = stbi__get8(s);
-    }
-  }
-
-  return dest;
-}
-
-static void stbi__copyval(int channel, stbi_uc* dest, const stbi_uc* src) {
-  int mask = 0x80, i;
-
-  for (i = 0; i < 4; ++i, mask >>= 1)
-    if (channel & mask)
-      dest[i] = src[i];
-}
-
-static stbi_uc* stbi__pic_load_core(stbi__context* s,
-                                    int width,
-                                    int height,
-                                    int* comp,
-                                    stbi_uc* result) {
-  int act_comp = 0, num_packets = 0, y, chained;
-  stbi__pic_packet packets[10];
-
-  // this will (should...) cater for even some bizarre stuff like having data
-  // for the same channel in multiple packets.
-  do {
-    stbi__pic_packet* packet;
-
-    if (num_packets == sizeof(packets) / sizeof(packets[0]))
-      return stbi__errpuc("bad format", "too many packets");
-
-    packet = &packets[num_packets++];
-
-    chained = stbi__get8(s);
-    packet->size = stbi__get8(s);
-    packet->type = stbi__get8(s);
-    packet->channel = stbi__get8(s);
-
-    act_comp |= packet->channel;
-
-    if (stbi__at_eof(s))
-      return stbi__errpuc("bad file", "file too short (reading packets)");
-    if (packet->size != 8)
-      return stbi__errpuc("bad format", "packet isn't 8bpp");
-  } while (chained);
-
-  *comp = (act_comp & 0x10 ? 4 : 3);  // has alpha channel?
-
-  for (y = 0; y < height; ++y) {
-    int packet_idx;
-
-    for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {
-      stbi__pic_packet* packet = &packets[packet_idx];
-      stbi_uc* dest = result + y * width * 4;
-
-      switch (packet->type) {
-        default:
-          return stbi__errpuc("bad format", "packet has bad compression type");
-
-        case 0: {  // uncompressed
-          int x;
-
-          for (x = 0; x < width; ++x, dest += 4)
-            if (!stbi__readval(s, packet->channel, dest))
-              return 0;
-          break;
-        }
-
-        case 1:  // Pure RLE
-        {
-          int left = width, i;
-
-          while (left > 0) {
-            stbi_uc count, value[4];
-
-            count = stbi__get8(s);
-            if (stbi__at_eof(s))
-              return stbi__errpuc("bad file", "file too short (pure read count)");
-
-            if (count > left)
-              count = (stbi_uc)left;
-
-            if (!stbi__readval(s, packet->channel, value))
-              return 0;
-
-            for (i = 0; i < count; ++i, dest += 4)
-              stbi__copyval(packet->channel, dest, value);
-            left -= count;
-          }
-        } break;
-
-        case 2: {  // Mixed RLE
-          int left = width;
-          while (left > 0) {
-            int count = stbi__get8(s), i;
-            if (stbi__at_eof(s))
-              return stbi__errpuc("bad file", "file too short (mixed read count)");
-
-            if (count >= 128) {  // Repeated
-              stbi_uc value[4];
-
-              if (count == 128)
-                count = stbi__get16be(s);
-              else
-                count -= 127;
-              if (count > left)
-                return stbi__errpuc("bad file", "scanline overrun");
-
-              if (!stbi__readval(s, packet->channel, value))
-                return 0;
-
-              for (i = 0; i < count; ++i, dest += 4)
-                stbi__copyval(packet->channel, dest, value);
-            } else {  // Raw
-              ++count;
-              if (count > left)
-                return stbi__errpuc("bad file", "scanline overrun");
-
-              for (i = 0; i < count; ++i, dest += 4)
-                if (!stbi__readval(s, packet->channel, dest))
-                  return 0;
-            }
-            left -= count;
-          }
-          break;
-        }
-      }
-    }
-  }
-
-  return result;
-}
-
-static void* stbi__pic_load(stbi__context* s,
-                            int* px,
-                            int* py,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri) {
-  stbi_uc* result;
-  int i, x, y, internal_comp;
-  STBI_NOTUSED(ri);
-
-  if (!comp)
-    comp = &internal_comp;
-
-  for (i = 0; i < 92; ++i)
-    stbi__get8(s);
-
-  x = stbi__get16be(s);
-  y = stbi__get16be(s);
-
-  if (y > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-  if (x > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-
-  if (stbi__at_eof(s))
-    return stbi__errpuc("bad file", "file too short (pic header)");
-  if (!stbi__mad3sizes_valid(x, y, 4, 0))
-    return stbi__errpuc("too large", "PIC image too large to decode");
-
-  stbi__get32be(s);  // skip `ratio'
-  stbi__get16be(s);  // skip `fields'
-  stbi__get16be(s);  // skip `pad'
-
-  // intermediate buffer is RGBA
-  result = (stbi_uc*)stbi__malloc_mad3(x, y, 4, 0);
-  if (!result)
-    return stbi__errpuc("outofmem", "Out of memory");
-  memset(result, 0xff, x * y * 4);
-
-  if (!stbi__pic_load_core(s, x, y, comp, result)) {
-    STBI_FREE(result);
-    result = 0;
-  }
-  *px = x;
-  *py = y;
-  if (req_comp == 0)
-    req_comp = *comp;
-  result = stbi__convert_format(result, 4, req_comp, x, y);
-
-  return result;
-}
-
-static int stbi__pic_test(stbi__context* s) {
-  int r = stbi__pic_test_core(s);
-  stbi__rewind(s);
-  return r;
-}
-#endif
-
-// *************************************************************************************************
-// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
-
-#ifndef STBI_NO_GIF
-typedef struct {
-  stbi__int16 prefix;
-  stbi_uc first;
-  stbi_uc suffix;
-} stbi__gif_lzw;
-
-typedef struct {
-  int w, h;
-  stbi_uc* out;         // output buffer (always 4 components)
-  stbi_uc* background;  // The current "background" as far as a gif is concerned
-  stbi_uc* history;
-  int flags, bgindex, ratio, transparent, eflags;
-  stbi_uc pal[256][4];
-  stbi_uc lpal[256][4];
-  stbi__gif_lzw codes[8192];
-  stbi_uc* color_table;
-  int parse, step;
-  int lflags;
-  int start_x, start_y;
-  int max_x, max_y;
-  int cur_x, cur_y;
-  int line_size;
-  int delay;
-} stbi__gif;
-
-static int stbi__gif_test_raw(stbi__context* s) {
-  int sz;
-  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
-    return 0;
-  sz = stbi__get8(s);
-  if (sz != '9' && sz != '7')
-    return 0;
-  if (stbi__get8(s) != 'a')
-    return 0;
-  return 1;
-}
-
-static int stbi__gif_test(stbi__context* s) {
-  int r = stbi__gif_test_raw(s);
-  stbi__rewind(s);
-  return r;
-}
-
-static void stbi__gif_parse_colortable(stbi__context* s,
-                                       stbi_uc pal[256][4],
-                                       int num_entries,
-                                       int transp) {
-  int i;
-  for (i = 0; i < num_entries; ++i) {
-    pal[i][2] = stbi__get8(s);
-    pal[i][1] = stbi__get8(s);
-    pal[i][0] = stbi__get8(s);
-    pal[i][3] = transp == i ? 0 : 255;
-  }
-}
-
-static int stbi__gif_header(stbi__context* s, stbi__gif* g, int* comp, int is_info) {
-  stbi_uc version;
-  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
-    return stbi__err("not GIF", "Corrupt GIF");
-
-  version = stbi__get8(s);
-  if (version != '7' && version != '9')
-    return stbi__err("not GIF", "Corrupt GIF");
-  if (stbi__get8(s) != 'a')
-    return stbi__err("not GIF", "Corrupt GIF");
-
-  stbi__g_failure_reason = "";
-  g->w = stbi__get16le(s);
-  g->h = stbi__get16le(s);
-  g->flags = stbi__get8(s);
-  g->bgindex = stbi__get8(s);
-  g->ratio = stbi__get8(s);
-  g->transparent = -1;
-
-  if (g->w > STBI_MAX_DIMENSIONS)
-    return stbi__err("too large", "Very large image (corrupt?)");
-  if (g->h > STBI_MAX_DIMENSIONS)
-    return stbi__err("too large", "Very large image (corrupt?)");
-
-  if (comp != 0)
-    *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
-
-  if (is_info)
-    return 1;
-
-  if (g->flags & 0x80)
-    stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
-
-  return 1;
-}
-
-static int stbi__gif_info_raw(stbi__context* s, int* x, int* y, int* comp) {
-  stbi__gif* g = (stbi__gif*)stbi__malloc(sizeof(stbi__gif));
-  if (!g)
-    return stbi__err("outofmem", "Out of memory");
-  if (!stbi__gif_header(s, g, comp, 1)) {
-    STBI_FREE(g);
-    stbi__rewind(s);
-    return 0;
-  }
-  if (x)
-    *x = g->w;
-  if (y)
-    *y = g->h;
-  STBI_FREE(g);
-  return 1;
-}
-
-static void stbi__out_gif_code(stbi__gif* g, stbi__uint16 code) {
-  stbi_uc *p, *c;
-  int idx;
-
-  // recurse to decode the prefixes, since the linked-list is backwards,
-  // and working backwards through an interleaved image would be nasty
-  if (g->codes[code].prefix >= 0)
-    stbi__out_gif_code(g, g->codes[code].prefix);
-
-  if (g->cur_y >= g->max_y)
-    return;
-
-  idx = g->cur_x + g->cur_y;
-  p = &g->out[idx];
-  g->history[idx / 4] = 1;
-
-  c = &g->color_table[g->codes[code].suffix * 4];
-  if (c[3] > 128) {  // don't render transparent pixels;
-    p[0] = c[2];
-    p[1] = c[1];
-    p[2] = c[0];
-    p[3] = c[3];
-  }
-  g->cur_x += 4;
-
-  if (g->cur_x >= g->max_x) {
-    g->cur_x = g->start_x;
-    g->cur_y += g->step;
-
-    while (g->cur_y >= g->max_y && g->parse > 0) {
-      g->step = (1 << g->parse) * g->line_size;
-      g->cur_y = g->start_y + (g->step >> 1);
-      --g->parse;
-    }
-  }
-}
-
-static stbi_uc* stbi__process_gif_raster(stbi__context* s, stbi__gif* g) {
-  stbi_uc lzw_cs;
-  stbi__int32 len, init_code;
-  stbi__uint32 first;
-  stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
-  stbi__gif_lzw* p;
-
-  lzw_cs = stbi__get8(s);
-  if (lzw_cs > 12)
-    return NULL;
-  clear = 1 << lzw_cs;
-  first = 1;
-  codesize = lzw_cs + 1;
-  codemask = (1 << codesize) - 1;
-  bits = 0;
-  valid_bits = 0;
-  for (init_code = 0; init_code < clear; init_code++) {
-    g->codes[init_code].prefix = -1;
-    g->codes[init_code].first = (stbi_uc)init_code;
-    g->codes[init_code].suffix = (stbi_uc)init_code;
-  }
-
-  // support no starting clear code
-  avail = clear + 2;
-  oldcode = -1;
-
-  len = 0;
-  for (;;) {
-    if (valid_bits < codesize) {
-      if (len == 0) {
-        len = stbi__get8(s);  // start new block
-        if (len == 0)
-          return g->out;
-      }
-      --len;
-      bits |= (stbi__int32)stbi__get8(s) << valid_bits;
-      valid_bits += 8;
-    } else {
-      stbi__int32 code = bits & codemask;
-      bits >>= codesize;
-      valid_bits -= codesize;
-      // @OPTIMIZE: is there some way we can accelerate the non-clear path?
-      if (code == clear) {  // clear code
-        codesize = lzw_cs + 1;
-        codemask = (1 << codesize) - 1;
-        avail = clear + 2;
-        oldcode = -1;
-        first = 0;
-      } else if (code == clear + 1) {  // end of stream code
-        stbi__skip(s, len);
-        while ((len = stbi__get8(s)) > 0)
-          stbi__skip(s, len);
-        return g->out;
-      } else if (code <= avail) {
-        if (first) {
-          return stbi__errpuc("no clear code", "Corrupt GIF");
-        }
-
-        if (oldcode >= 0) {
-          p = &g->codes[avail++];
-          if (avail > 8192) {
-            return stbi__errpuc("too many codes", "Corrupt GIF");
-          }
-
-          p->prefix = (stbi__int16)oldcode;
-          p->first = g->codes[oldcode].first;
-          p->suffix = (code == avail) ? p->first : g->codes[code].first;
-        } else if (code == avail)
-          return stbi__errpuc("illegal code in raster", "Corrupt GIF");
-
-        stbi__out_gif_code(g, (stbi__uint16)code);
-
-        if ((avail & codemask) == 0 && avail <= 0x0FFF) {
-          codesize++;
-          codemask = (1 << codesize) - 1;
-        }
-
-        oldcode = code;
-      } else {
-        return stbi__errpuc("illegal code in raster", "Corrupt GIF");
-      }
-    }
-  }
-}
-
-// this function is designed to support animated gifs, although stb_image doesn't support it
-// two back is the image from two frames ago, used for a very specific disposal format
-static stbi_uc* stbi__gif_load_next(stbi__context* s,
-                                    stbi__gif* g,
-                                    int* comp,
-                                    int req_comp,
-                                    stbi_uc* two_back) {
-  int dispose;
-  int first_frame;
-  int pi;
-  int pcount;
-  STBI_NOTUSED(req_comp);
-
-  // on first frame, any non-written pixels get the background colour (non-transparent)
-  first_frame = 0;
-  if (g->out == 0) {
-    if (!stbi__gif_header(s, g, comp, 0))
-      return 0;  // stbi__g_failure_reason set by stbi__gif_header
-    if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
-      return stbi__errpuc("too large", "GIF image is too large");
-    pcount = g->w * g->h;
-    g->out = (stbi_uc*)stbi__malloc(4 * pcount);
-    g->background = (stbi_uc*)stbi__malloc(4 * pcount);
-    g->history = (stbi_uc*)stbi__malloc(pcount);
-    if (!g->out || !g->background || !g->history)
-      return stbi__errpuc("outofmem", "Out of memory");
-
-    // image is treated as "transparent" at the start - ie, nothing overwrites the current
-    // background; background colour is only used for pixels that are not rendered first frame,
-    // after that "background" color refers to the color that was there the previous frame.
-    memset(g->out, 0x00, 4 * pcount);
-    memset(g->background, 0x00, 4 * pcount);  // state of the background (starts transparent)
-    memset(g->history, 0x00, pcount);         // pixels that were affected previous frame
-    first_frame = 1;
-  } else {
-    // second frame - how do we dispose of the previous one?
-    dispose = (g->eflags & 0x1C) >> 2;
-    pcount = g->w * g->h;
-
-    if ((dispose == 3) && (two_back == 0)) {
-      dispose = 2;  // if I don't have an image to revert back to, default to the old background
-    }
-
-    if (dispose == 3) {  // use previous graphic
-      for (pi = 0; pi < pcount; ++pi) {
-        if (g->history[pi]) {
-          memcpy(&g->out[pi * 4], &two_back[pi * 4], 4);
-        }
-      }
-    } else if (dispose == 2) {
-      // restore what was changed last frame to background before that frame;
-      for (pi = 0; pi < pcount; ++pi) {
-        if (g->history[pi]) {
-          memcpy(&g->out[pi * 4], &g->background[pi * 4], 4);
-        }
-      }
-    } else {
-      // This is a non-disposal case eithe way, so just
-      // leave the pixels as is, and they will become the new background
-      // 1: do not dispose
-      // 0:  not specified.
-    }
-
-    // background is what out is after the undoing of the previou frame;
-    memcpy(g->background, g->out, 4 * g->w * g->h);
-  }
-
-  // clear my history;
-  memset(g->history, 0x00, g->w * g->h);  // pixels that were affected previous frame
-
-  for (;;) {
-    int tag = stbi__get8(s);
-    switch (tag) {
-      case 0x2C: /* Image Descriptor */
-      {
-        stbi__int32 x, y, w, h;
-        stbi_uc* o;
-
-        x = stbi__get16le(s);
-        y = stbi__get16le(s);
-        w = stbi__get16le(s);
-        h = stbi__get16le(s);
-        if (((x + w) > (g->w)) || ((y + h) > (g->h)))
-          return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
-
-        g->line_size = g->w * 4;
-        g->start_x = x * 4;
-        g->start_y = y * g->line_size;
-        g->max_x = g->start_x + w * 4;
-        g->max_y = g->start_y + h * g->line_size;
-        g->cur_x = g->start_x;
-        g->cur_y = g->start_y;
-
-        // if the width of the specified rectangle is 0, that means
-        // we may not see *any* pixels or the image is malformed;
-        // to make sure this is caught, move the current y down to
-        // max_y (which is what out_gif_code checks).
-        if (w == 0)
-          g->cur_y = g->max_y;
-
-        g->lflags = stbi__get8(s);
-
-        if (g->lflags & 0x40) {
-          g->step = 8 * g->line_size;  // first interlaced spacing
-          g->parse = 3;
-        } else {
-          g->step = g->line_size;
-          g->parse = 0;
-        }
-
-        if (g->lflags & 0x80) {
-          stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7),
-                                     g->eflags & 0x01 ? g->transparent : -1);
-          g->color_table = (stbi_uc*)g->lpal;
-        } else if (g->flags & 0x80) {
-          g->color_table = (stbi_uc*)g->pal;
-        } else
-          return stbi__errpuc("missing color table", "Corrupt GIF");
-
-        o = stbi__process_gif_raster(s, g);
-        if (!o)
-          return NULL;
-
-        // if this was the first frame,
-        pcount = g->w * g->h;
-        if (first_frame && (g->bgindex > 0)) {
-          // if first frame, any pixel not drawn to gets the background color
-          for (pi = 0; pi < pcount; ++pi) {
-            if (g->history[pi] == 0) {
-              g->pal[g->bgindex][3] = 255;  // just in case it was made transparent, undo that; It
-                                            // will be reset next frame if need be;
-              memcpy(&g->out[pi * 4], &g->pal[g->bgindex], 4);
-            }
-          }
-        }
-
-        return o;
-      }
-
-      case 0x21:  // Comment Extension.
-      {
-        int len;
-        int ext = stbi__get8(s);
-        if (ext == 0xF9) {  // Graphic Control Extension.
-          len = stbi__get8(s);
-          if (len == 4) {
-            g->eflags = stbi__get8(s);
-            g->delay = 10 * stbi__get16le(s);  // delay - 1/100th of a second, saving as 1/1000ths.
-
-            // unset old transparent
-            if (g->transparent >= 0) {
-              g->pal[g->transparent][3] = 255;
-            }
-            if (g->eflags & 0x01) {
-              g->transparent = stbi__get8(s);
-              if (g->transparent >= 0) {
-                g->pal[g->transparent][3] = 0;
-              }
-            } else {
-              // don't need transparent
-              stbi__skip(s, 1);
-              g->transparent = -1;
-            }
-          } else {
-            stbi__skip(s, len);
-            break;
-          }
-        }
-        while ((len = stbi__get8(s)) != 0) {
-          stbi__skip(s, len);
-        }
-        break;
-      }
-
-      case 0x3B:             // gif stream termination code
-        return (stbi_uc*)s;  // using '1' causes warning on some compilers
-
-      default:
-        return stbi__errpuc("unknown code", "Corrupt GIF");
-    }
-  }
-}
-
-static void* stbi__load_gif_main_outofmem(stbi__gif* g, stbi_uc* out, int** delays) {
-  STBI_FREE(g->out);
-  STBI_FREE(g->history);
-  STBI_FREE(g->background);
-
-  if (out)
-    STBI_FREE(out);
-  if (delays && *delays)
-    STBI_FREE(*delays);
-  return stbi__errpuc("outofmem", "Out of memory");
-}
-
-static void* stbi__load_gif_main(stbi__context* s,
-                                 int** delays,
-                                 int* x,
-                                 int* y,
-                                 int* z,
-                                 int* comp,
-                                 int req_comp) {
-  if (stbi__gif_test(s)) {
-    int layers = 0;
-    stbi_uc* u = 0;
-    stbi_uc* out = 0;
-    stbi_uc* two_back = 0;
-    stbi__gif g;
-    int stride;
-    int out_size = 0;
-    int delays_size = 0;
-
-    STBI_NOTUSED(out_size);
-    STBI_NOTUSED(delays_size);
-
-    memset(&g, 0, sizeof(g));
-    if (delays) {
-      *delays = 0;
-    }
-
-    do {
-      u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
-      if (u == (stbi_uc*)s)
-        u = 0;  // end of animated gif marker
-
-      if (u) {
-        *x = g.w;
-        *y = g.h;
-        ++layers;
-        stride = g.w * g.h * 4;
-
-        if (out) {
-          void* tmp = (stbi_uc*)STBI_REALLOC_SIZED(out, out_size, layers * stride);
-          if (!tmp)
-            return stbi__load_gif_main_outofmem(&g, out, delays);
-          else {
-            out = (stbi_uc*)tmp;
-            out_size = layers * stride;
-          }
-
-          if (delays) {
-            int* new_delays = (int*)STBI_REALLOC_SIZED(*delays, delays_size, sizeof(int) * layers);
-            if (!new_delays)
-              return stbi__load_gif_main_outofmem(&g, out, delays);
-            *delays = new_delays;
-            delays_size = layers * sizeof(int);
-          }
-        } else {
-          out = (stbi_uc*)stbi__malloc(layers * stride);
-          if (!out)
-            return stbi__load_gif_main_outofmem(&g, out, delays);
-          out_size = layers * stride;
-          if (delays) {
-            *delays = (int*)stbi__malloc(layers * sizeof(int));
-            if (!*delays)
-              return stbi__load_gif_main_outofmem(&g, out, delays);
-            delays_size = layers * sizeof(int);
-          }
-        }
-        memcpy(out + ((layers - 1) * stride), u, stride);
-        if (layers >= 2) {
-          two_back = out - 2 * stride;
-        }
-
-        if (delays) {
-          (*delays)[layers - 1U] = g.delay;
-        }
-      }
-    } while (u != 0);
-
-    // free temp buffer;
-    STBI_FREE(g.out);
-    STBI_FREE(g.history);
-    STBI_FREE(g.background);
-
-    // do the final conversion after loading everything;
-    if (req_comp && req_comp != 4)
-      out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
-
-    *z = layers;
-    return out;
-  } else {
-    return stbi__errpuc("not GIF", "Image was not as a gif type.");
-  }
-}
-
-static void* stbi__gif_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri) {
-  stbi_uc* u = 0;
-  stbi__gif g;
-  memset(&g, 0, sizeof(g));
-  STBI_NOTUSED(ri);
-
-  u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
-  if (u == (stbi_uc*)s)
-    u = 0;  // end of animated gif marker
-  if (u) {
-    *x = g.w;
-    *y = g.h;
-
-    // moved conversion to after successful load so that the same
-    // can be done for multiple frames.
-    if (req_comp && req_comp != 4)
-      u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
-  } else if (g.out) {
-    // if there was an error and we allocated an image buffer, free it!
-    STBI_FREE(g.out);
-  }
-
-  // free buffers needed for multiple frame loading;
-  STBI_FREE(g.history);
-  STBI_FREE(g.background);
-
-  return u;
-}
-
-static int stbi__gif_info(stbi__context* s, int* x, int* y, int* comp) {
-  return stbi__gif_info_raw(s, x, y, comp);
-}
-#endif
-
-// *************************************************************************************************
-// Radiance RGBE HDR loader
-// originally by Nicolas Schulz
-#ifndef STBI_NO_HDR
-static int stbi__hdr_test_core(stbi__context* s, const char* signature) {
-  int i;
-  for (i = 0; signature[i]; ++i)
-    if (stbi__get8(s) != signature[i])
-      return 0;
-  stbi__rewind(s);
-  return 1;
-}
-
-static int stbi__hdr_test(stbi__context* s) {
-  int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
-  stbi__rewind(s);
-  if (!r) {
-    r = stbi__hdr_test_core(s, "#?RGBE\n");
-    stbi__rewind(s);
-  }
-  return r;
-}
-
-#define STBI__HDR_BUFLEN 1024
-static char* stbi__hdr_gettoken(stbi__context* z, char* buffer) {
-  int len = 0;
-  char c = '\0';
-
-  c = (char)stbi__get8(z);
-
-  while (!stbi__at_eof(z) && c != '\n') {
-    buffer[len++] = c;
-    if (len == STBI__HDR_BUFLEN - 1) {
-      // flush to end of line
-      while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
-        ;
-      break;
-    }
-    c = (char)stbi__get8(z);
-  }
-
-  buffer[len] = 0;
-  return buffer;
-}
-
-static void stbi__hdr_convert(float* output, stbi_uc* input, int req_comp) {
-  if (input[3] != 0) {
-    float f1;
-    // Exponent
-    f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
-    if (req_comp <= 2)
-      output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
-    else {
-      output[0] = input[0] * f1;
-      output[1] = input[1] * f1;
-      output[2] = input[2] * f1;
-    }
-    if (req_comp == 2)
-      output[1] = 1;
-    if (req_comp == 4)
-      output[3] = 1;
-  } else {
-    switch (req_comp) {
-      case 4:
-        output[3] = 1; /* fallthrough */
-      case 3:
-        output[0] = output[1] = output[2] = 0;
-        break;
-      case 2:
-        output[1] = 1; /* fallthrough */
-      case 1:
-        output[0] = 0;
-        break;
-    }
-  }
-}
-
-static float* stbi__hdr_load(stbi__context* s,
-                             int* x,
-                             int* y,
-                             int* comp,
-                             int req_comp,
-                             stbi__result_info* ri) {
-  char buffer[STBI__HDR_BUFLEN];
-  char* token;
-  int valid = 0;
-  int width, height;
-  stbi_uc* scanline;
-  float* hdr_data;
-  int len;
-  unsigned char count, value;
-  int i, j, k, c1, c2, z;
-  const char* headerToken;
-  STBI_NOTUSED(ri);
-
-  // Check identifier
-  headerToken = stbi__hdr_gettoken(s, buffer);
-  if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
-    return stbi__errpf("not HDR", "Corrupt HDR image");
-
-  // Parse header
-  for (;;) {
-    token = stbi__hdr_gettoken(s, buffer);
-    if (token[0] == 0)
-      break;
-    if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)
-      valid = 1;
-  }
-
-  if (!valid)
-    return stbi__errpf("unsupported format", "Unsupported HDR format");
-
-  // Parse width and height
-  // can't use sscanf() if we're not using stdio!
-  token = stbi__hdr_gettoken(s, buffer);
-  if (strncmp(token, "-Y ", 3))
-    return stbi__errpf("unsupported data layout", "Unsupported HDR format");
-  token += 3;
-  height = (int)strtol(token, &token, 10);
-  while (*token == ' ')
-    ++token;
-  if (strncmp(token, "+X ", 3))
-    return stbi__errpf("unsupported data layout", "Unsupported HDR format");
-  token += 3;
-  width = (int)strtol(token, NULL, 10);
-
-  if (height > STBI_MAX_DIMENSIONS)
-    return stbi__errpf("too large", "Very large image (corrupt?)");
-  if (width > STBI_MAX_DIMENSIONS)
-    return stbi__errpf("too large", "Very large image (corrupt?)");
-
-  *x = width;
-  *y = height;
-
-  if (comp)
-    *comp = 3;
-  if (req_comp == 0)
-    req_comp = 3;
-
-  if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
-    return stbi__errpf("too large", "HDR image is too large");
-
-  // Read data
-  hdr_data = (float*)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
-  if (!hdr_data)
-    return stbi__errpf("outofmem", "Out of memory");
-
-  // Load image data
-  // image data is stored as some number of sca
-  if (width < 8 || width >= 32768) {
-    // Read flat data
-    for (j = 0; j < height; ++j) {
-      for (i = 0; i < width; ++i) {
-        stbi_uc rgbe[4];
-      main_decode_loop:
-        stbi__getn(s, rgbe, 4);
-        stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
-      }
-    }
-  } else {
-    // Read RLE-encoded data
-    scanline = NULL;
-
-    for (j = 0; j < height; ++j) {
-      c1 = stbi__get8(s);
-      c2 = stbi__get8(s);
-      len = stbi__get8(s);
-      if (c1 != 2 || c2 != 2 || (len & 0x80)) {
-        // not run-length encoded, so we have to actually use THIS data as a decoded
-        // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
-        stbi_uc rgbe[4];
-        rgbe[0] = (stbi_uc)c1;
-        rgbe[1] = (stbi_uc)c2;
-        rgbe[2] = (stbi_uc)len;
-        rgbe[3] = (stbi_uc)stbi__get8(s);
-        stbi__hdr_convert(hdr_data, rgbe, req_comp);
-        i = 1;
-        j = 0;
-        STBI_FREE(scanline);
-        goto main_decode_loop;  // yes, this makes no sense
-      }
-      len <<= 8;
-      len |= stbi__get8(s);
-      if (len != width) {
-        STBI_FREE(hdr_data);
-        STBI_FREE(scanline);
-        return stbi__errpf("invalid decoded scanline length", "corrupt HDR");
-      }
-      if (scanline == NULL) {
-        scanline = (stbi_uc*)stbi__malloc_mad2(width, 4, 0);
-        if (!scanline) {
-          STBI_FREE(hdr_data);
-          return stbi__errpf("outofmem", "Out of memory");
-        }
-      }
-
-      for (k = 0; k < 4; ++k) {
-        int nleft;
-        i = 0;
-        while ((nleft = width - i) > 0) {
-          count = stbi__get8(s);
-          if (count > 128) {
-            // Run
-            value = stbi__get8(s);
-            count -= 128;
-            if (count > nleft) {
-              STBI_FREE(hdr_data);
-              STBI_FREE(scanline);
-              return stbi__errpf("corrupt", "bad RLE data in HDR");
-            }
-            for (z = 0; z < count; ++z)
-              scanline[i++ * 4 + k] = value;
-          } else {
-            // Dump
-            if (count > nleft) {
-              STBI_FREE(hdr_data);
-              STBI_FREE(scanline);
-              return stbi__errpf("corrupt", "bad RLE data in HDR");
-            }
-            for (z = 0; z < count; ++z)
-              scanline[i++ * 4 + k] = stbi__get8(s);
-          }
-        }
-      }
-      for (i = 0; i < width; ++i)
-        stbi__hdr_convert(hdr_data + (j * width + i) * req_comp, scanline + i * 4, req_comp);
-    }
-    if (scanline)
-      STBI_FREE(scanline);
-  }
-
-  return hdr_data;
-}
-
-static int stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp) {
-  char buffer[STBI__HDR_BUFLEN];
-  char* token;
-  int valid = 0;
-  int dummy;
-
-  if (!x)
-    x = &dummy;
-  if (!y)
-    y = &dummy;
-  if (!comp)
-    comp = &dummy;
-
-  if (stbi__hdr_test(s) == 0) {
-    stbi__rewind(s);
-    return 0;
-  }
-
-  for (;;) {
-    token = stbi__hdr_gettoken(s, buffer);
-    if (token[0] == 0)
-      break;
-    if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0)
-      valid = 1;
-  }
-
-  if (!valid) {
-    stbi__rewind(s);
-    return 0;
-  }
-  token = stbi__hdr_gettoken(s, buffer);
-  if (strncmp(token, "-Y ", 3)) {
-    stbi__rewind(s);
-    return 0;
-  }
-  token += 3;
-  *y = (int)strtol(token, &token, 10);
-  while (*token == ' ')
-    ++token;
-  if (strncmp(token, "+X ", 3)) {
-    stbi__rewind(s);
-    return 0;
-  }
-  token += 3;
-  *x = (int)strtol(token, NULL, 10);
-  *comp = 3;
-  return 1;
-}
-#endif  // STBI_NO_HDR
-
-#ifndef STBI_NO_BMP
-static int stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp) {
-  void* p;
-  stbi__bmp_data info;
-
-  info.all_a = 255;
-  p = stbi__bmp_parse_header(s, &info);
-  if (p == NULL) {
-    stbi__rewind(s);
-    return 0;
-  }
-  if (x)
-    *x = s->img_x;
-  if (y)
-    *y = s->img_y;
-  if (comp) {
-    if (info.bpp == 24 && info.ma == 0xff000000)
-      *comp = 3;
-    else
-      *comp = info.ma ? 4 : 3;
-  }
-  return 1;
-}
-#endif
-
-#ifndef STBI_NO_PSD
-static int stbi__psd_info(stbi__context* s, int* x, int* y, int* comp) {
-  int channelCount, dummy, depth;
-  if (!x)
-    x = &dummy;
-  if (!y)
-    y = &dummy;
-  if (!comp)
-    comp = &dummy;
-  if (stbi__get32be(s) != 0x38425053) {
-    stbi__rewind(s);
-    return 0;
-  }
-  if (stbi__get16be(s) != 1) {
-    stbi__rewind(s);
-    return 0;
-  }
-  stbi__skip(s, 6);
-  channelCount = stbi__get16be(s);
-  if (channelCount < 0 || channelCount > 16) {
-    stbi__rewind(s);
-    return 0;
-  }
-  *y = stbi__get32be(s);
-  *x = stbi__get32be(s);
-  depth = stbi__get16be(s);
-  if (depth != 8 && depth != 16) {
-    stbi__rewind(s);
-    return 0;
-  }
-  if (stbi__get16be(s) != 3) {
-    stbi__rewind(s);
-    return 0;
-  }
-  *comp = 4;
-  return 1;
-}
-
-static int stbi__psd_is16(stbi__context* s) {
-  int channelCount, depth;
-  if (stbi__get32be(s) != 0x38425053) {
-    stbi__rewind(s);
-    return 0;
-  }
-  if (stbi__get16be(s) != 1) {
-    stbi__rewind(s);
-    return 0;
-  }
-  stbi__skip(s, 6);
-  channelCount = stbi__get16be(s);
-  if (channelCount < 0 || channelCount > 16) {
-    stbi__rewind(s);
-    return 0;
-  }
-  STBI_NOTUSED(stbi__get32be(s));
-  STBI_NOTUSED(stbi__get32be(s));
-  depth = stbi__get16be(s);
-  if (depth != 16) {
-    stbi__rewind(s);
-    return 0;
-  }
-  return 1;
-}
-#endif
-
-#ifndef STBI_NO_PIC
-static int stbi__pic_info(stbi__context* s, int* x, int* y, int* comp) {
-  int act_comp = 0, num_packets = 0, chained, dummy;
-  stbi__pic_packet packets[10];
-
-  if (!x)
-    x = &dummy;
-  if (!y)
-    y = &dummy;
-  if (!comp)
-    comp = &dummy;
-
-  if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {
-    stbi__rewind(s);
-    return 0;
-  }
-
-  stbi__skip(s, 88);
-
-  *x = stbi__get16be(s);
-  *y = stbi__get16be(s);
-  if (stbi__at_eof(s)) {
-    stbi__rewind(s);
-    return 0;
-  }
-  if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {
-    stbi__rewind(s);
-    return 0;
-  }
-
-  stbi__skip(s, 8);
-
-  do {
-    stbi__pic_packet* packet;
-
-    if (num_packets == sizeof(packets) / sizeof(packets[0]))
-      return 0;
-
-    packet = &packets[num_packets++];
-    chained = stbi__get8(s);
-    packet->size = stbi__get8(s);
-    packet->type = stbi__get8(s);
-    packet->channel = stbi__get8(s);
-    act_comp |= packet->channel;
-
-    if (stbi__at_eof(s)) {
-      stbi__rewind(s);
-      return 0;
-    }
-    if (packet->size != 8) {
-      stbi__rewind(s);
-      return 0;
-    }
-  } while (chained);
-
-  *comp = (act_comp & 0x10 ? 4 : 3);
-
-  return 1;
-}
-#endif
-
-// *************************************************************************************************
-// Portable Gray Map and Portable Pixel Map loader
-// by Ken Miller
-//
-// PGM: http://netpbm.sourceforge.net/doc/pgm.html
-// PPM: http://netpbm.sourceforge.net/doc/ppm.html
-//
-// Known limitations:
-//    Does not support comments in the header section
-//    Does not support ASCII image data (formats P2 and P3)
-
-#ifndef STBI_NO_PNM
-
-static int stbi__pnm_test(stbi__context* s) {
-  char p, t;
-  p = (char)stbi__get8(s);
-  t = (char)stbi__get8(s);
-  if (p != 'P' || (t != '5' && t != '6')) {
-    stbi__rewind(s);
-    return 0;
-  }
-  return 1;
-}
-
-static void* stbi__pnm_load(stbi__context* s,
-                            int* x,
-                            int* y,
-                            int* comp,
-                            int req_comp,
-                            stbi__result_info* ri) {
-  stbi_uc* out;
-  STBI_NOTUSED(ri);
-
-  ri->bits_per_channel = stbi__pnm_info(s, (int*)&s->img_x, (int*)&s->img_y, (int*)&s->img_n);
-  if (ri->bits_per_channel == 0)
-    return 0;
-
-  if (s->img_y > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-  if (s->img_x > STBI_MAX_DIMENSIONS)
-    return stbi__errpuc("too large", "Very large image (corrupt?)");
-
-  *x = s->img_x;
-  *y = s->img_y;
-  if (comp)
-    *comp = s->img_n;
-
-  if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
-    return stbi__errpuc("too large", "PNM too large");
-
-  out = (stbi_uc*)stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
-  if (!out)
-    return stbi__errpuc("outofmem", "Out of memory");
-  stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8));
-
-  if (req_comp && req_comp != s->img_n) {
-    out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
-    if (out == NULL)
-      return out;  // stbi__convert_format frees input on failure
-  }
-  return out;
-}
-
-static int stbi__pnm_isspace(char c) {
-  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
-}
-
-static void stbi__pnm_skip_whitespace(stbi__context* s, char* c) {
-  for (;;) {
-    while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
-      *c = (char)stbi__get8(s);
-
-    if (stbi__at_eof(s) || *c != '#')
-      break;
-
-    while (!stbi__at_eof(s) && *c != '\n' && *c != '\r')
-      *c = (char)stbi__get8(s);
-  }
-}
-
-static int stbi__pnm_isdigit(char c) {
-  return c >= '0' && c <= '9';
-}
-
-static int stbi__pnm_getinteger(stbi__context* s, char* c) {
-  int value = 0;
-
-  while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
-    value = value * 10 + (*c - '0');
-    *c = (char)stbi__get8(s);
-  }
-
-  return value;
-}
-
-static int stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp) {
-  int maxv, dummy;
-  char c, p, t;
-
-  if (!x)
-    x = &dummy;
-  if (!y)
-    y = &dummy;
-  if (!comp)
-    comp = &dummy;
-
-  stbi__rewind(s);
-
-  // Get identifier
-  p = (char)stbi__get8(s);
-  t = (char)stbi__get8(s);
-  if (p != 'P' || (t != '5' && t != '6')) {
-    stbi__rewind(s);
-    return 0;
-  }
-
-  *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
-
-  c = (char)stbi__get8(s);
-  stbi__pnm_skip_whitespace(s, &c);
-
-  *x = stbi__pnm_getinteger(s, &c);  // read width
-  stbi__pnm_skip_whitespace(s, &c);
-
-  *y = stbi__pnm_getinteger(s, &c);  // read height
-  stbi__pnm_skip_whitespace(s, &c);
-
-  maxv = stbi__pnm_getinteger(s, &c);  // read max value
-  if (maxv > 65535)
-    return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
-  else if (maxv > 255)
-    return 16;
-  else
-    return 8;
-}
-
-static int stbi__pnm_is16(stbi__context* s) {
-  if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
-    return 1;
-  return 0;
-}
-#endif
-
-static int stbi__info_main(stbi__context* s, int* x, int* y, int* comp) {
-#ifndef STBI_NO_JPEG
-  if (stbi__jpeg_info(s, x, y, comp))
-    return 1;
-#endif
-
-#ifndef STBI_NO_PNG
-  if (stbi__png_info(s, x, y, comp))
-    return 1;
-#endif
-
-#ifndef STBI_NO_GIF
-  if (stbi__gif_info(s, x, y, comp))
-    return 1;
-#endif
-
-#ifndef STBI_NO_BMP
-  if (stbi__bmp_info(s, x, y, comp))
-    return 1;
-#endif
-
-#ifndef STBI_NO_PSD
-  if (stbi__psd_info(s, x, y, comp))
-    return 1;
-#endif
-
-#ifndef STBI_NO_PIC
-  if (stbi__pic_info(s, x, y, comp))
-    return 1;
-#endif
-
-#ifndef STBI_NO_PNM
-  if (stbi__pnm_info(s, x, y, comp))
-    return 1;
-#endif
-
-#ifndef STBI_NO_HDR
-  if (stbi__hdr_info(s, x, y, comp))
-    return 1;
-#endif
-
-// test tga last because it's a crappy test!
-#ifndef STBI_NO_TGA
-  if (stbi__tga_info(s, x, y, comp))
-    return 1;
-#endif
-  return stbi__err("unknown image type", "Image not of any known type, or corrupt");
-}
-
-static int stbi__is_16_main(stbi__context* s) {
-#ifndef STBI_NO_PNG
-  if (stbi__png_is16(s))
-    return 1;
-#endif
-
-#ifndef STBI_NO_PSD
-  if (stbi__psd_is16(s))
-    return 1;
-#endif
-
-#ifndef STBI_NO_PNM
-  if (stbi__pnm_is16(s))
-    return 1;
-#endif
-  return 0;
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF int stbi_info(char const* filename, int* x, int* y, int* comp) {
-  FILE* f = stbi__fopen(filename, "rb");
-  int result;
-  if (!f)
-    return stbi__err("can't fopen", "Unable to open file");
-  result = stbi_info_from_file(f, x, y, comp);
-  fclose(f);
-  return result;
-}
-
-STBIDEF int stbi_info_from_file(FILE* f, int* x, int* y, int* comp) {
-  int r;
-  stbi__context s;
-  long pos = ftell(f);
-  stbi__start_file(&s, f);
-  r = stbi__info_main(&s, x, y, comp);
-  fseek(f, pos, SEEK_SET);
-  return r;
-}
-
-STBIDEF int stbi_is_16_bit(char const* filename) {
-  FILE* f = stbi__fopen(filename, "rb");
-  int result;
-  if (!f)
-    return stbi__err("can't fopen", "Unable to open file");
-  result = stbi_is_16_bit_from_file(f);
-  fclose(f);
-  return result;
-}
-
-STBIDEF int stbi_is_16_bit_from_file(FILE* f) {
-  int r;
-  stbi__context s;
-  long pos = ftell(f);
-  stbi__start_file(&s, f);
-  r = stbi__is_16_main(&s);
-  fseek(f, pos, SEEK_SET);
-  return r;
-}
-#endif  // !STBI_NO_STDIO
-
-STBIDEF int stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp) {
-  stbi__context s;
-  stbi__start_mem(&s, buffer, len);
-  return stbi__info_main(&s, x, y, comp);
-}
-
-STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const* c,
-                                     void* user,
-                                     int* x,
-                                     int* y,
-                                     int* comp) {
-  stbi__context s;
-  stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);
-  return stbi__info_main(&s, x, y, comp);
-}
-
-STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len) {
-  stbi__context s;
-  stbi__start_mem(&s, buffer, len);
-  return stbi__is_16_main(&s);
-}
-
-STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* c, void* user) {
-  stbi__context s;
-  stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);
-  return stbi__is_16_main(&s);
-}
-
-#endif  // STB_IMAGE_IMPLEMENTATION
-
-/*
-   revision history:
-      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
-      2.19  (2018-02-11) fix warning
-      2.18  (2018-01-30) fix warnings
-      2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
-                         1-bit BMP
-                         *_is_16_bit api
-                         avoid warnings
-      2.16  (2017-07-23) all functions have 16-bit variants;
-                         STBI_NO_STDIO works again;
-                         compilation fixes;
-                         fix rounding in unpremultiply;
-                         optimize vertical flip;
-                         disable raw_len validation;
-                         documentation fixes
-      2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
-                         warning fixes; disable run-time SSE detection on gcc;
-                         uniform handling of optional "return" values;
-                         thread-safe initialization of zlib tables
-      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
-      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
-      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
-      2.11  (2016-04-02) allocate large structures on the stack
-                         remove white matting for transparent PSD
-                         fix reported channel count for PNG & BMP
-                         re-enable SSE2 in non-gcc 64-bit
-                         support RGB-formatted JPEG
-                         read 16-bit PNGs (only as 8-bit)
-      2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
-      2.09  (2016-01-16) allow comments in PNM files
-                         16-bit-per-pixel TGA (not bit-per-component)
-                         info() for TGA could break due to .hdr handling
-                         info() for BMP to shares code instead of sloppy parse
-                         can use STBI_REALLOC_SIZED if allocator doesn't support realloc
-                         code cleanup
-      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
-      2.07  (2015-09-13) fix compiler warnings
-                         partial animated GIF support
-                         limited 16-bpc PSD support
-                         #ifdef unused functions
-                         bug with < 92 byte PIC,PNM,HDR,TGA
-      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
-      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
-      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
-      2.03  (2015-04-12) extra corruption checking (mmozeiko)
-                         stbi_set_flip_vertically_on_load (nguillemot)
-                         fix NEON support; fix mingw support
-      2.02  (2015-01-19) fix incorrect assert, fix warning
-      2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
-      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
-      2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
-                         progressive JPEG (stb)
-                         PGM/PPM support (Ken Miller)
-                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
-                         GIF bugfix -- seemingly never worked
-                         STBI_NO_*, STBI_ONLY_*
-      1.48  (2014-12-14) fix incorrectly-named assert()
-      1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
-                         optimize PNG (ryg)
-                         fix bug in interlaced PNG with user-specified channel count (stb)
-      1.46  (2014-08-26)
-              fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
-      1.45  (2014-08-16)
-              fix MSVC-ARM internal compiler error by wrapping malloc
-      1.44  (2014-08-07)
-              various warning fixes from Ronny Chevalier
-      1.43  (2014-07-15)
-              fix MSVC-only compiler problem in code changed in 1.42
-      1.42  (2014-07-09)
-              don't define _CRT_SECURE_NO_WARNINGS (affects user code)
-              fixes to stbi__cleanup_jpeg path
-              added STBI_ASSERT to avoid requiring assert.h
-      1.41  (2014-06-25)
-              fix search&replace from 1.36 that messed up comments/error messages
-      1.40  (2014-06-22)
-              fix gcc struct-initialization warning
-      1.39  (2014-06-15)
-              fix to TGA optimization when req_comp != number of components in TGA;
-              fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
-              add support for BMP version 5 (more ignored fields)
-      1.38  (2014-06-06)
-              suppress MSVC warnings on integer casts truncating values
-              fix accidental rename of 'skip' field of I/O
-      1.37  (2014-06-04)
-              remove duplicate typedef
-      1.36  (2014-06-03)
-              convert to header file single-file library
-              if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
-      1.35  (2014-05-27)
-              various warnings
-              fix broken STBI_SIMD path
-              fix bug where stbi_load_from_file no longer left file pointer in correct place
-              fix broken non-easy path for 32-bit BMP (possibly never used)
-              TGA optimization by Arseny Kapoulkine
-      1.34  (unknown)
-              use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure
-   case 1.33  (2011-07-14) make stbi_is_hdr work in STBI_NO_HDR (as specified), minor
-   compiler-friendly improvements 1.32  (2011-07-13) support for "info" function for all supported
-   filetypes (SpartanJ) 1.31  (2011-06-20) a few more leak fixes, bug in PNG handling (SpartanJ)
-      1.30  (2011-06-11)
-              added ability to load files via callbacks to accomidate custom input streams (Ben
-   Wenger) removed deprecated format-specific test/load functions removed support for installable
-   file formats (stbi_loader) -- would have been broken for IO callbacks anyway error cases in bmp
-   and tga give messages and don't leak (Raymond Barbiero, grisha) fix inefficiency in decoding
-   32-bit BMP (David Woo) 1.29  (2010-08-16) various warning fixes from Aurelien Pocheville 1.28
-   (2010-08-01) fix bug in GIF palette transparency (SpartanJ) 1.27  (2010-08-01) cast-to-stbi_uc to
-   fix warnings 1.26  (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ 1.25
-   (2010-07-17) refix trans_data warning (Won Chun) 1.24  (2010-07-12) perf improvements reading
-   from files on platforms with lock-heavy fgetc() minor perf improvements for jpeg deprecated
-   type-specific functions so we'll get feedback if they're needed attempt to fix trans_data warning
-   (Won Chun) 1.23    fixed bug in iPhone support 1.22  (2010-07-10) removed image *writing* support
-              stbi_info support from Jetro Lauha
-              GIF support from Jean-Marc Lienher
-              iPhone PNG-extensions from James Brown
-              warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
-      1.21    fix use of 'stbi_uc' in header (reported by jon blow)
-      1.20    added support for Softimage PIC, by Tom Seddon
-      1.19    bug in interlaced PNG corruption check (found by ryg)
-      1.18  (2008-08-02)
-              fix a threading bug (local mutable static)
-      1.17    support interlaced PNG
-      1.16    major bugfix - stbi__convert_format converted one too many pixels
-      1.15    initialize some fields for thread safety
-      1.14    fix threadsafe conversion bug
-              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
-      1.13    threadsafe
-      1.12    const qualifiers in the API
-      1.11    Support installable IDCT, colorspace conversion routines
-      1.10    Fixes for 64-bit (don't use "unsigned long")
-              optimized upsampling by Fabian "ryg" Giesen
-      1.09    Fix format-conversion for PSD code (bad global variables!)
-      1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
-      1.07    attempt to fix C++ warning/errors again
-      1.06    attempt to fix C++ warning/errors again
-      1.05    fix TGA loading to return correct *comp and use good luminance calc
-      1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
-      1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
-      1.02    support for (subset of) HDR files, float interface for preferred access to them
-      1.01    fix bug: possible bug in handling right-side up bmps... not sure
-              fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
-      1.00    interface to zlib that skips zlib header
-      0.99    correct handling of alpha in palette
-      0.98    TGA loader by lonesock; dynamically add loaders (untested)
-      0.97    jpeg errors on too large a file; also catch another malloc failure
-      0.96    fix detection of invalid v value - particleman@mollyrocket forum
-      0.95    during header scan, seek to markers in case of padding
-      0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
-      0.93    handle jpegtran output; verbose errors
-      0.92    read 4,8,16,24,32-bit BMP files of several formats
-      0.91    output 24-bit Windows 3.0 BMP files
-      0.90    fix a few more warnings; bump version number to approach 1.0
-      0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
-      0.60    fix compiling as c++
-      0.59    fix warnings: merge Dave Moore's -Wall fixes
-      0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
-      0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
-      0.56    fix bug: zlib uncompressed mode len vs. nlen
-      0.55    fix bug: restart_interval not initialized to 0
-      0.54    allow NULL for 'int *comp'
-      0.53    fix bug in png 3->4; speedup png decoding
-      0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
-      0.51    obey req_comp requests, 1-component jpegs return as 1-component,
-              on 'test' only check type, not whether we support this variant
-      0.50  (2006-11-19)
-              first released version
-*/
-
-/*
-------------------------------------------------------------------------------
-This software is available under 2 licenses -- choose whichever you prefer.
-------------------------------------------------------------------------------
-ALTERNATIVE A - MIT License
-Copyright (c) 2017 Sean Barrett
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-------------------------------------------------------------------------------
-ALTERNATIVE B - Public Domain (www.unlicense.org)
-This is free and unencumbered software released into the public domain.
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-software, either in source code form or as a compiled binary, for any purpose,
-commercial or non-commercial, and by any means.
-In jurisdictions that recognize copyright laws, the author or authors of this
-software dedicate any and all copyright interest in the software to the public
-domain. We make this dedication for the benefit of the public at large and to
-the detriment of our heirs and successors. We intend this dedication to be an
-overt act of relinquishment in perpetuity of all present and future rights to
-this software under copyright law.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------
-*/
diff --git a/third-party/stb_image/CMakeLists.txt b/third-party/stb_image/CMakeLists.txt
new file mode 100644
index 0000000000..f390f23512
--- /dev/null
+++ b/third-party/stb_image/CMakeLists.txt
@@ -0,0 +1,9 @@
+if (UNIX)
+    set(CMAKE_CXX_FLAGS "-O3")
+else ()
+    set(CMAKE_CXX_FLAGS "/EHsc")
+endif (UNIX)
+
+
+add_library(stb_image stb_image.cpp)
+
diff --git a/third-party/stb_image/stb_image.cpp b/third-party/stb_image/stb_image.cpp
new file mode 100644
index 0000000000..80d8f9af64
--- /dev/null
+++ b/third-party/stb_image/stb_image.cpp
@@ -0,0 +1,4 @@
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "stb_image_write.h"
+#include "stb_image.h"
\ No newline at end of file
diff --git a/third-party/tiny_gltf/stb_image.h b/third-party/stb_image/stb_image.h
similarity index 100%
rename from third-party/tiny_gltf/stb_image.h
rename to third-party/stb_image/stb_image.h
diff --git a/third-party/tiny_gltf/stb_image_write.h b/third-party/stb_image/stb_image_write.h
similarity index 100%
rename from third-party/tiny_gltf/stb_image_write.h
rename to third-party/stb_image/stb_image_write.h
diff --git a/third-party/tiny_gltf/CMakeLists.txt b/third-party/tiny_gltf/CMakeLists.txt
index eb0f926d45..639bd564c1 100644
--- a/third-party/tiny_gltf/CMakeLists.txt
+++ b/third-party/tiny_gltf/CMakeLists.txt
@@ -4,6 +4,7 @@ else ()
     set(CMAKE_CXX_FLAGS "/EHsc")
 endif (UNIX)
 
-include_directories(../)
+include_directories(../ ../stb_image)
 add_library(tiny_gltf tiny_gltf.cpp)
+target_link_libraries(tiny_gltf stb_image)
 
diff --git a/third-party/tiny_gltf/tiny_gltf.cpp b/third-party/tiny_gltf/tiny_gltf.cpp
index 3f27915208..76b6542c00 100644
--- a/third-party/tiny_gltf/tiny_gltf.cpp
+++ b/third-party/tiny_gltf/tiny_gltf.cpp
@@ -1,4 +1,4 @@
 #define TINYGLTF_IMPLEMENTATION
-#define STB_IMAGE_IMPLEMENTATION
-#define STB_IMAGE_WRITE_IMPLEMENTATION
-#include "tiny_gltf.h"
+
+#include "third-party/tiny_gltf/tiny_gltf.h"
+

From c13934708a0fb7c138c4541ce7868373997a8206 Mon Sep 17 00:00:00 2001
From: water111 <48171810+water111@users.noreply.github.com>
Date: Sun, 19 Jun 2022 20:44:07 -0400
Subject: [PATCH 07/17] Initial implementation of custom level tool (#1482)

* wip

* learning about colors

* gltf node stuff working

* cleanup

* support textures

* bvh generation seems reasonable

* tree layout

* frag packer, untested and doesnt do real stripping yet

* temp

* working collide frags

* handle bad inputs better

* clean up

* format

* include

* another include

* reorganize for release build use
---
 .../scripts/releases/extract_build_linux.sh   |   1 +
 .../scripts/releases/extract_build_windows.sh |   1 +
 common/CMakeLists.txt                         |   1 +
 common/custom_data/TFrag3Data.cpp             |  68 +-
 common/custom_data/Tfrag3Data.h               |  20 +-
 common/custom_data/pack_helpers.cpp           |  71 ++
 common/custom_data/pack_helpers.h             |   6 +
 common/math/Vector.h                          |  74 +-
 common/math/geometry.cpp                      |  40 +
 common/math/geometry.h                        |   6 +
 custom_levels/README.md                       |  36 +
 custom_levels/test-zone/test-zone.jsonc       |  15 +
 custom_levels/test-zone/test-zone2.glb        | Bin 0 -> 8264 bytes
 custom_levels/test-zone/testzone.gd           |   8 +
 decompiler/level_extractor/extract_level.cpp  |  45 --
 decompiler/level_extractor/extract_tfrag.cpp  |  86 +--
 .../opengl_renderer/background/Tfrag3.cpp     |  11 +-
 .../opengl_renderer/background/Tfrag3.h       |   1 +
 .../opengl_renderer/loader/LoaderStages.cpp   |  53 +-
 goal_src/engine/collide/collide-cache.gc      |   2 +
 goal_src/engine/collide/collide-frag.gc       |  13 +
 goal_src/engine/gfx/texture.gc                |   7 +-
 goal_src/engine/level/level-info.gc           |  41 ++
 goal_src/engine/level/level.gc                | 217 ++++--
 goal_src/engine/target/target-death.gc        |   6 +-
 goal_src/game.gp                              |  28 +
 goalc/CMakeLists.txt                          |  12 +-
 goalc/build_level/FileInfo.cpp                |  30 +
 goalc/build_level/FileInfo.h                  |  28 +
 goalc/build_level/LevelFile.cpp               | 105 +++
 goalc/build_level/LevelFile.h                 | 140 ++++
 goalc/build_level/ResLump.cpp                 |   4 +
 goalc/build_level/ResLump.h                   |   7 +
 goalc/build_level/TexturePool.h               |  12 +
 goalc/build_level/Tfrag.cpp                   |  99 +++
 goalc/build_level/Tfrag.h                     |  17 +
 goalc/build_level/build_level.cpp             | 102 +++
 goalc/build_level/build_level.h               |   7 +
 goalc/build_level/collide_bvh.cpp             | 284 ++++++++
 goalc/build_level/collide_bvh.h               |  37 +
 goalc/build_level/collide_common.h            | 110 +++
 goalc/build_level/collide_drawable.cpp        | 268 +++++++
 goalc/build_level/collide_drawable.h          |  12 +
 goalc/build_level/collide_pack.cpp            | 263 +++++++
 goalc/build_level/collide_pack.h              |  21 +
 goalc/build_level/color_quantization.cpp      | 220 ++++++
 goalc/build_level/color_quantization.h        |  20 +
 goalc/build_level/gltf_mesh_extract.cpp       | 688 ++++++++++++++++++
 goalc/build_level/gltf_mesh_extract.h         |  34 +
 goalc/data_compiler/DataObjectGenerator.cpp   |  23 +
 goalc/data_compiler/DataObjectGenerator.h     |   7 +
 goalc/make/MakeSystem.cpp                     |   1 +
 goalc/make/Tools.cpp                          |  20 +-
 goalc/make/Tools.h                            |   7 +
 tools/CMakeLists.txt                          |   1 -
 tools/build_level/CMakeLists.txt              |   4 -
 tools/build_level/main.cpp                    |   3 -
 57 files changed, 3184 insertions(+), 259 deletions(-)
 create mode 100644 common/custom_data/pack_helpers.cpp
 create mode 100644 common/custom_data/pack_helpers.h
 create mode 100644 custom_levels/README.md
 create mode 100644 custom_levels/test-zone/test-zone.jsonc
 create mode 100644 custom_levels/test-zone/test-zone2.glb
 create mode 100644 custom_levels/test-zone/testzone.gd
 create mode 100644 goalc/build_level/FileInfo.cpp
 create mode 100644 goalc/build_level/FileInfo.h
 create mode 100644 goalc/build_level/LevelFile.cpp
 create mode 100644 goalc/build_level/LevelFile.h
 create mode 100644 goalc/build_level/ResLump.cpp
 create mode 100644 goalc/build_level/ResLump.h
 create mode 100644 goalc/build_level/TexturePool.h
 create mode 100644 goalc/build_level/Tfrag.cpp
 create mode 100644 goalc/build_level/Tfrag.h
 create mode 100644 goalc/build_level/build_level.cpp
 create mode 100644 goalc/build_level/build_level.h
 create mode 100644 goalc/build_level/collide_bvh.cpp
 create mode 100644 goalc/build_level/collide_bvh.h
 create mode 100644 goalc/build_level/collide_common.h
 create mode 100644 goalc/build_level/collide_drawable.cpp
 create mode 100644 goalc/build_level/collide_drawable.h
 create mode 100644 goalc/build_level/collide_pack.cpp
 create mode 100644 goalc/build_level/collide_pack.h
 create mode 100644 goalc/build_level/color_quantization.cpp
 create mode 100644 goalc/build_level/color_quantization.h
 create mode 100644 goalc/build_level/gltf_mesh_extract.cpp
 create mode 100644 goalc/build_level/gltf_mesh_extract.h
 delete mode 100644 tools/build_level/CMakeLists.txt
 delete mode 100644 tools/build_level/main.cpp

diff --git a/.github/scripts/releases/extract_build_linux.sh b/.github/scripts/releases/extract_build_linux.sh
index 8fca5cde22..8bd4be176d 100755
--- a/.github/scripts/releases/extract_build_linux.sh
+++ b/.github/scripts/releases/extract_build_linux.sh
@@ -28,3 +28,4 @@ cp -r $SOURCE/decompiler/config $DEST/data/decompiler/
 cp -r $SOURCE/goal_src $DEST/data
 cp -r $SOURCE/game/assets $DEST/data/game/
 cp -r $SOURCE/game/graphics/opengl_renderer/shaders $DEST/data/game/graphics/opengl_renderer
+cp -r $SOURCE/custom_levels $DEST/data
diff --git a/.github/scripts/releases/extract_build_windows.sh b/.github/scripts/releases/extract_build_windows.sh
index 8982346d80..7829505725 100755
--- a/.github/scripts/releases/extract_build_windows.sh
+++ b/.github/scripts/releases/extract_build_windows.sh
@@ -24,3 +24,4 @@ cp -r $SOURCE/decompiler/config $DEST/data/decompiler/
 cp -r $SOURCE/goal_src $DEST/data
 cp -r $SOURCE/game/assets $DEST/data/game/
 cp -r $SOURCE/game/graphics/opengl_renderer/shaders $DEST/data/game/graphics/opengl_renderer
+cp -r $SOURCE/custom_levels $DEST/data
\ No newline at end of file
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index f813120fc1..6c9656c9fd 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -4,6 +4,7 @@ add_library(common
         cross_sockets/XSocket.cpp
         cross_sockets/XSocketServer.cpp
         cross_sockets/XSocketClient.cpp
+        custom_data/pack_helpers.cpp
         custom_data/TFrag3Data.cpp
         deserialization/subtitles/subtitles.cpp
         dma/dma.cpp
diff --git a/common/custom_data/TFrag3Data.cpp b/common/custom_data/TFrag3Data.cpp
index 70c527eb9a..bd36741f69 100644
--- a/common/custom_data/TFrag3Data.cpp
+++ b/common/custom_data/TFrag3Data.cpp
@@ -1,3 +1,5 @@
+#include <functional>
+#include <algorithm>
 #include "Tfrag3Data.h"
 #include "common/util/Assert.h"
 
@@ -21,6 +23,7 @@ void StripDraw::serialize(Serializer& ser) {
   ser.from_ptr(&mode);
   ser.from_ptr(&tree_tex_id);
   ser.from_pod_vector(&runs);
+  ser.from_pod_vector(&plain_indices);
   ser.from_pod_vector(&vis_groups);
   ser.from_ptr(&num_triangles);
 }
@@ -64,6 +67,7 @@ void TfragTree::serialize(Serializer& ser) {
   ser.from_pod_vector(&packed_vertices.cluster_origins);
   ser.from_pod_vector(&colors);
   bvh.serialize(ser);
+  ser.from_ptr(&use_strips);
 }
 
 void TieTree::unpack() {
@@ -78,7 +82,7 @@ void TieTree::unpack() {
         vtx.x = proto_vtx.x;
         vtx.y = proto_vtx.y;
         vtx.z = proto_vtx.z;
-        vtx.q = 1.f;
+        vtx.q_unused = 1.f;
         vtx.s = proto_vtx.s;
         vtx.t = proto_vtx.t;
         i++;
@@ -93,7 +97,7 @@ void TieTree::unpack() {
         vtx.x = temp.x();
         vtx.y = temp.y();
         vtx.z = temp.z();
-        vtx.q = 1.f;
+        vtx.q_unused = 1.f;
         vtx.s = proto_vtx.s;
         vtx.t = proto_vtx.t;
         i++;
@@ -103,6 +107,7 @@ void TieTree::unpack() {
 
   for (auto& draw : static_draws) {
     draw.unpacked.idx_of_first_idx_in_full_buffer = unpacked.indices.size();
+    ASSERT(draw.plain_indices.empty());
     for (auto& run : draw.runs) {
       for (u32 ri = 0; ri < run.length; ri++) {
         unpacked.indices.push_back(run.vertex0 + ri);
@@ -152,7 +157,7 @@ void TfragTree::unpack() {
     o.z = cz + in.zoff * rescale;
     o.s = in.s / (1024.f);
     o.t = in.t / (1024.f);
-    o.q = 1.f;
+    o.q_unused = 1.f;
     o.color_index = in.color_index;
   }
 
@@ -162,8 +167,12 @@ void TfragTree::unpack() {
       for (u32 ri = 0; ri < run.length; ri++) {
         unpacked.indices.push_back(run.vertex0 + ri);
       }
-      unpacked.indices.push_back(UINT32_MAX);
+      if (use_strips) {
+        unpacked.indices.push_back(UINT32_MAX);
+      }
     }
+    unpacked.indices.insert(unpacked.indices.end(), draw.plain_indices.begin(),
+                            draw.plain_indices.end());
   }
 }
 
@@ -359,6 +368,7 @@ std::array<int, MemoryUsageCategory::NUM_CATEGORIES> Level::get_memory_usage() c
     for (const auto& tfrag_tree : tfrag_tree_geoms) {
       for (const auto& draw : tfrag_tree.draws) {
         result[TFRAG_INDEX] += draw.runs.size() * sizeof(StripDraw::VertexRun);
+        result[TFRAG_INDEX] += draw.plain_indices.size() * sizeof(u32);
         result[TFRAG_VIS] += draw.vis_groups.size() * sizeof(StripDraw::VisGroup);
       }
       result[TFRAG_VERTS] +=
@@ -417,4 +427,54 @@ std::array<int, MemoryUsageCategory::NUM_CATEGORIES> Level::get_memory_usage() c
   return result;
 }
 
+void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) {
+  int total_accounted = 0;
+  auto memory_use_by_category = lev.get_memory_usage();
+
+  std::vector<std::pair<std::string, int>> known_categories = {
+      {"texture", memory_use_by_category[tfrag3::MemoryUsageCategory::TEXTURE]},
+      {"tie-deinst-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_DEINST_VIS]},
+      {"tie-deinst-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_DEINST_INDEX]},
+      {"tie-inst-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_INST_VIS]},
+      {"tie-inst-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_INST_INDEX]},
+      {"tie-bvh", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_BVH]},
+      {"tie-verts", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_VERTS]},
+      {"tie-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_TIME_OF_DAY]},
+      {"tie-wind-inst-info",
+       memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_WIND_INSTANCE_INFO]},
+      {"tie-cidx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_CIDX]},
+      {"tie-mats", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_MATRICES]},
+      {"tie-grps", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_GRPS]},
+      {"tfrag-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_VIS]},
+      {"tfrag-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_INDEX]},
+      {"tfrag-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_VERTS]},
+      {"tfrag-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_TIME_OF_DAY]},
+      {"tfrag-cluster", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_CLUSTER]},
+      {"tfrag-bvh", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_BVH]},
+      {"shrub-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_TIME_OF_DAY]},
+      {"shrub-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_VERT]},
+      {"shrub-ind", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_IND]},
+      {"collision", memory_use_by_category[tfrag3::MemoryUsageCategory::COLLISION]},
+      {"merc-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::MERC_VERT]},
+      {"merc-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::MERC_INDEX]}};
+  for (auto& known : known_categories) {
+    total_accounted += known.second;
+  }
+
+  known_categories.push_back({"unknown", uncompressed_data_size - total_accounted});
+
+  std::sort(known_categories.begin(), known_categories.end(),
+            [](const auto& a, const auto& b) { return a.second > b.second; });
+
+  for (const auto& x : known_categories) {
+    fmt::print("{:30s} : {:6d} kB {:3.1f}%\n", x.first, x.second / 1024,
+               100.f * (float)x.second / uncompressed_data_size);
+  }
+}
+
+std::size_t PreloadedVertex::hash::operator()(const PreloadedVertex& v) const {
+  return std::hash<float>()(v.x) ^ std::hash<float>()(v.y) ^ std::hash<float>()(v.z) ^
+         std::hash<float>()(v.s) ^ std::hash<float>()(v.t) ^ std::hash<u16>()(v.color_index);
+}
+
 }  // namespace tfrag3
diff --git a/common/custom_data/Tfrag3Data.h b/common/custom_data/Tfrag3Data.h
index 2354eeac2c..c730190bb4 100644
--- a/common/custom_data/Tfrag3Data.h
+++ b/common/custom_data/Tfrag3Data.h
@@ -53,17 +53,26 @@ enum MemoryUsageCategory {
   NUM_CATEGORIES
 };
 
-constexpr int TFRAG3_VERSION = 19;
+constexpr int TFRAG3_VERSION = 20;
 
 // These vertices should be uploaded to the GPU at load time and don't change
 struct PreloadedVertex {
   // the vertex position
   float x, y, z;
   // texture coordinates
-  float s, t, q;
+  float s, t, q_unused;
   // color table index
   u16 color_index;
   u16 pad[3];
+
+  struct hash {
+    std::size_t operator()(const PreloadedVertex& x) const;
+  };
+
+  bool operator==(const PreloadedVertex& other) const {
+    return x == other.x && y == other.y && z == other.z && s == other.s && t == other.t &&
+           color_index == other.color_index;
+  }
 };
 static_assert(sizeof(PreloadedVertex) == 32, "PreloadedVertex size");
 
@@ -144,12 +153,14 @@ struct StripDraw {
     u32 idx_of_first_idx_in_full_buffer = 0;
   } unpacked;
 
+  // indices can be specified as lists of runs and plain indices.
+  // the runs are still drawn with indexed opengl calls, it just uses less space in the file.
   struct VertexRun {
     u32 vertex0;
     u16 length;
   };
-
   std::vector<VertexRun> runs;
+  std::vector<u32> plain_indices;
 
   // to do culling, the above vertex stream is grouped.
   // by following the visgroups and checking the visibility, you can leave out invisible vertices.
@@ -260,6 +271,7 @@ struct TfragTree {
   PackedTfragVertices packed_vertices;
   std::vector<TimeOfDayColor> colors;  // vertex colors (pre-interpolation)
   BVH bvh;                             // the bvh for frustum culling
+  bool use_strips = true;
 
   struct {
     std::vector<PreloadedVertex> vertices;  // mesh vertices
@@ -397,4 +409,6 @@ struct Level {
   std::array<int, MemoryUsageCategory::NUM_CATEGORIES> get_memory_usage() const;
 };
 
+void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size);
+
 }  // namespace tfrag3
diff --git a/common/custom_data/pack_helpers.cpp b/common/custom_data/pack_helpers.cpp
new file mode 100644
index 0000000000..0dd17d700b
--- /dev/null
+++ b/common/custom_data/pack_helpers.cpp
@@ -0,0 +1,71 @@
+#include "pack_helpers.h"
+#include <map>
+
+constexpr float kClusterSize = 4096 * 40;  // 100 in-game meters
+constexpr float kMasterOffset = 12000 * 4096;
+
+std::pair<u64, u16> position_to_cluster_and_offset(float in) {
+  in += kMasterOffset;
+  if (in < 0) {
+    fmt::print("negative: {}\n", in);
+  }
+  ASSERT(in >= 0);
+  int cluster_cell = (in / kClusterSize);
+  float leftover = in - (cluster_cell * kClusterSize);
+  u16 offset = (leftover / kClusterSize) * float(UINT16_MAX);
+
+  float recovered = ((float)cluster_cell + ((float)offset / UINT16_MAX)) * kClusterSize;
+  float diff = std::fabs(recovered - in);
+  ASSERT(diff < 7);
+  ASSERT(cluster_cell >= 0);
+  ASSERT(cluster_cell < UINT16_MAX);
+  return {cluster_cell, offset};
+}
+
+void pack_tfrag_vertices(tfrag3::PackedTfragVertices* result,
+                         const std::vector<tfrag3::PreloadedVertex>& vertices) {
+  u32 next_cluster_idx = 0;
+  std::map<u64, u32> clusters;
+
+  for (auto& vtx : vertices) {
+    auto x = position_to_cluster_and_offset(vtx.x);
+    auto y = position_to_cluster_and_offset(vtx.y);
+    auto z = position_to_cluster_and_offset(vtx.z);
+    u64 cluster_id = 0;
+    cluster_id |= x.first;
+    cluster_id |= (y.first << 16);
+    cluster_id |= (z.first << 32);
+
+    auto cluster_it = clusters.find(cluster_id);
+    u32 my_cluster_idx = 0;
+    if (cluster_it == clusters.end()) {
+      // first in cluster
+      clusters[cluster_id] = next_cluster_idx;
+      my_cluster_idx = next_cluster_idx;
+      next_cluster_idx++;
+    } else {
+      my_cluster_idx = cluster_it->second;
+    }
+
+    tfrag3::PackedTfragVertices::Vertex out_vtx;
+    out_vtx.xoff = x.second;
+    out_vtx.yoff = y.second;
+    out_vtx.zoff = z.second;
+    out_vtx.cluster_idx = my_cluster_idx;
+    // TODO check these
+    out_vtx.s = vtx.s * 1024;
+    out_vtx.t = vtx.t * 1024;
+    out_vtx.color_index = vtx.color_index;
+    result->vertices.push_back(out_vtx);
+  }
+
+  result->cluster_origins.resize(next_cluster_idx);
+  for (auto& cluster : clusters) {
+    auto& res = result->cluster_origins[cluster.second];
+    res.x() = (u16)cluster.first;
+    res.y() = (u16)(cluster.first >> 16);
+    res.z() = (u16)(cluster.first >> 32);
+  }
+
+  ASSERT(next_cluster_idx < UINT16_MAX);
+}
\ No newline at end of file
diff --git a/common/custom_data/pack_helpers.h b/common/custom_data/pack_helpers.h
new file mode 100644
index 0000000000..c477a22c40
--- /dev/null
+++ b/common/custom_data/pack_helpers.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include "common/custom_data/Tfrag3Data.h"
+
+void pack_tfrag_vertices(tfrag3::PackedTfragVertices* result,
+                         const std::vector<tfrag3::PreloadedVertex>& vertices);
\ No newline at end of file
diff --git a/common/math/Vector.h b/common/math/Vector.h
index bb0ba31a1c..9351b123ea 100644
--- a/common/math/Vector.h
+++ b/common/math/Vector.h
@@ -89,6 +89,14 @@ class Vector {
     return result;
   }
 
+  Vector<T, Size> operator+(const T& other) const {
+    Vector<T, Size> result;
+    for (int i = 0; i < Size; i++) {
+      result[i] = m_data[i] + other;
+    }
+    return result;
+  }
+
   Vector<T, Size>& operator+=(const Vector<T, Size>& other) {
     for (int i = 0; i < Size; i++) {
       m_data[i] += other[i];
@@ -103,6 +111,13 @@ class Vector {
     return *this;
   }
 
+  Vector<T, Size>& operator-=(const T& other) {
+    for (int i = 0; i < Size; i++) {
+      m_data[i] -= other;
+    }
+    return *this;
+  }
+
   Vector<T, Size> elementwise_multiply(const Vector<T, Size>& other) const {
     Vector<T, Size> result;
     for (int i = 0; i < Size; i++) {
@@ -180,6 +195,18 @@ class Vector {
 
   void normalize(const T& norm = T(1)) { *this = normalized(norm); }
 
+  void max_in_place(const Vector<T, Size>& other) {
+    for (int i = 0; i < Size; i++) {
+      m_data[i] = std::max(m_data[i], other[i]);
+    }
+  }
+
+  void min_in_place(const Vector<T, Size>& other) {
+    for (int i = 0; i < Size; i++) {
+      m_data[i] = std::min(m_data[i], other[i]);
+    }
+  }
+
   std::string to_string_aligned() const {
     std::string result = "[";
     for (auto x : m_data) {
@@ -229,6 +256,8 @@ class Vector {
     }
   }
 
+  void set_zero() { fill(0); }
+
  private:
   T m_data[Size];
 };
@@ -246,8 +275,24 @@ struct Matrix {
     return result;
   }
 
-  //  const T& operator()(int r, int c) const { return m_data[c + r * Cols]; }
-  //  T& operator()(int r, int c) { return m_data[r + c * Rows]; }
+  static Matrix identity() {
+    Matrix result;
+    for (int c = 0; c < Cols; c++) {
+      for (int r = 0; r < Rows; r++) {
+        result(r, c) = r == c ? T(1) : T(0);
+      }
+    }
+    return result;
+  }
+
+  void set_zero() {
+    for (auto& x : m_data) {
+      x = 0;
+    }
+  }
+
+  T& operator()(int r, int c) { return m_data[r + c * Rows]; }
+  const T& operator()(int r, int c) const { return m_data[r + c * Rows]; }
 
   Vector<T, Rows> col(int c) const {
     Vector<T, Rows> result;
@@ -274,6 +319,31 @@ struct Matrix {
     return result;
   }
 
+  template <int OtherCols>
+  Matrix<T, Rows, OtherCols> operator*(const Matrix<T, Cols, OtherCols>& y) const {
+    Matrix<T, Rows, OtherCols> result;
+    result.set_zero();
+    for (int rx = 0; rx < Rows; rx++) {
+      for (int cx = 0; cx < Cols; cx++) {
+        for (int yi = 0; yi < OtherCols; yi++) {
+          result(rx, yi) += operator()(rx, cx) * y(cx, yi);
+        }
+      }
+    }
+    return result;
+  }
+
+  Vector<T, Rows> operator*(const Vector<T, Cols>& y) const {
+    Vector<T, Rows> result;
+    result.set_zero();
+    for (int rx = 0; rx < Rows; rx++) {
+      for (int cx = 0; cx < Cols; cx++) {
+        result[rx] += operator()(rx, cx) * y[cx];
+      }
+    }
+    return result;
+  }
+
  private:
   T m_data[Rows * Cols];
 };
diff --git a/common/math/geometry.cpp b/common/math/geometry.cpp
index e69de29bb2..59aad2c4c0 100644
--- a/common/math/geometry.cpp
+++ b/common/math/geometry.cpp
@@ -0,0 +1,40 @@
+#include "geometry.h"
+
+namespace math {
+
+Vector4f bsphere_of_triangle(const Vector3f* v) {
+  Vector4f bsphere;
+  auto& p1 = v[0];
+  auto& p2 = v[1];
+  auto& p3 = v[2];
+  float A = (p1 - p2).length();
+  float B = (p2 - p3).length();
+  float C = (p3 - p1).length();
+
+  const Vector3f *a = &p3, *b = &p1, *c = &p2;
+  if (B < C)
+    std::swap(B, C), std::swap(b, c);
+  if (A < B)
+    std::swap(A, B), std::swap(a, b);
+
+  float r;
+  math::Vector3f origin;
+  if ((B * B) + (C * C) <= (A * A)) {
+    r = A / 2.f;
+    origin = (*b + *c) / 2.f;
+  } else {
+    float cos_a = (B * B + C * C - A * A) / (B * C * 2);
+    r = A / (sqrt(1 - cos_a * cos_a) * 2.f);
+    Vector3f alpha = *a - *c, beta = *b - *c;
+    origin = (beta * alpha.dot(alpha) - alpha * beta.dot(beta)).cross(alpha.cross(beta)) /
+                 (alpha.cross(beta).dot(alpha.cross(beta)) * 2.f) +
+             *c;
+  }
+  bsphere.x() = origin.x();
+  bsphere.y() = origin.y();
+  bsphere.z() = origin.z();
+  bsphere.w() = r;
+  return bsphere;
+}
+
+}  // namespace math
\ No newline at end of file
diff --git a/common/math/geometry.h b/common/math/geometry.h
index 4c71592bf7..a61988f444 100644
--- a/common/math/geometry.h
+++ b/common/math/geometry.h
@@ -44,4 +44,10 @@ RaySphereResult<T> ray_sphere_intersect(const Vector3<T>& ray_origin,
   result.u[1] = minus_b - sqrt_val;
   return result;
 }
+
+math::Vector4f bsphere_of_triangle(const Vector3f* vertices);
+
+inline bool point_in_bsphere(const Vector4f& sphere, const Vector3f& pt) {
+  return (sphere.xyz() - pt).squared_length() <= (sphere.w() * sphere.w());
+}
 }  // namespace math
\ No newline at end of file
diff --git a/custom_levels/README.md b/custom_levels/README.md
new file mode 100644
index 0000000000..0a6a9aff04
--- /dev/null
+++ b/custom_levels/README.md
@@ -0,0 +1,36 @@
+# Custom Levels
+Disclaimer: custom levels are still in development and are missing most features.
+
+
+The first three steps are already done for "test zone", so this can be used as a starting point.
+
+# 1: File Setup
+To create a custom level, copy the layout of `custom_levels/test-zone`. See `test-zone.jsonc` for information on how to name things. The `.gd` file also contains the level name.
+
+# 2: Modify the engine
+Modify `goal_src/engine/level/level-info.gc` to add level info for each custom level. There is level info for `test-zone` at the bottom that can be used as an example.
+
+# 3: Modify the build system
+Modify `goal_src/game.gp` and add a custom level target:
+```lisp
+(build-custom-level "test-zone")
+;; the DGO file
+(custom-level-cgo "TESTZONE.DGO" "test-zone/testzone.gd")
+```
+
+# 4: Export the GLTF file from blender.
+For now, all meshes are displayed and treated as ground collision. This causes buggy collision because walls shouldn't use "floor" mode.
+
+Blender will create a `.glb` file, which must have the name specified in the `.jsonc` file and should be located in `custom_level/your_level`
+
+# 5: Rebuild the game
+Any time the `.glb` file is changed, you must rebuild the game. Launch the compiler (`goalc`) and run `(mi)` to rebuild everything. It's recommended to leave the compiler open - it will remember files that haven't changed and skip rebuilding them.
+
+# 6: Go to the custom level
+Start the game in debug mode `gk`.
+
+In the compiler window, run `(lt)` to connect to the game. You must run this again every time you restart the game.  If this doesn't work, there could be a firewall issue and you must allow goalc/gk to use the network. They don't make any outside connections.
+
+In the compiler window, run a command like `(bg-custom 'test-zone-vis)` to load and start at a custom level.
+
+
diff --git a/custom_levels/test-zone/test-zone.jsonc b/custom_levels/test-zone/test-zone.jsonc
new file mode 100644
index 0000000000..73c66edf00
--- /dev/null
+++ b/custom_levels/test-zone/test-zone.jsonc
@@ -0,0 +1,15 @@
+{
+  // The "in-game" name of the level. Should be lower case, with dashes (GOAL symbol name)
+  // the name of this file, and the folder this file is in must have the same name.
+  "long_name": "test-zone",
+  // The file name, should be upper case and 8 characters or less.
+  "iso_name": "TESTZONE",
+  // The nickname, should be exactly 3 characters
+  "nickname": "TSZ", // 3 char name, all uppercase
+
+  // Background mesh file.
+  // Must have vertex colors. Use the blender cycles renderer, bake, diffuse, uncheck color,
+  // and bake to vertex colors. For now, only the first vertex color group is used, so make sure you
+  // only have 1.
+  "gltf_file": "custom_levels/test-zone/test-zone2.glb"
+}
\ No newline at end of file
diff --git a/custom_levels/test-zone/test-zone2.glb b/custom_levels/test-zone/test-zone2.glb
new file mode 100644
index 0000000000000000000000000000000000000000..2df7f4d59537ba09ff5baa8b42fe6e3aefb196d1
GIT binary patch
literal 8264
zcmcIoTaO#J6`s^do1|&eq)qxN%=1_!Im5Z^L%WWfqHbMpU>iXI!-(XyycV!48ItTa
zSS+A_ru>Nfpnf04v!dpvb=-16p8JJ|XE>uhJAeGQUqn&#_dyhW_q!<i$D@-6m+^SH
zoUG!b%lK?En=Hnw`651w|M_w;pUszpGnfp%I-ks*PZooFe>xew(ZhVG^Eip$Ocu-O
zd<JDRRPj|3FHhkhK2jcA#z#*s<JtHEy!ermCGl+jd_sXrbYhY;$r797Pp_WtB-bYQ
zr)Mu$h_jfl#w!=-i9+N`6&8Ocx2jTAmZ)K2ZCR=mYnoNLs*>AkXbPhXoff4|ZC+|^
zp=S%|7uFPIX|uFI%++E%Tb_&eY^aLdWHwbrWlWl<i5*rdEpuH}mDTVE*LtXwg-(@a
zM&$+(o?a!F@x^5M(rw-E_nY%EYQaj3&$0g}x)>tevMj5jP*xRXasy5yLp#(qD{ZCn
zT&c<$Yzz5hMOviF<{47VQj_R>h@?@HvdHo@Evg_xjY4lv<nHTdOmM#h<xrKW&aBRK
zuC&cfVur?MSy7?zWrh;#q#BwkMIfD~kf2h5d$Z>rrP7TMw~Jw#SEfjHiBqhqQrWi*
zlGcNmu+TVYV--%><R&kR9Q#Rhqmpe|U}HFGRpoh6!4kFQp;?=0rBs^dMh831>Pe)v
zB+z^n{GnF1%F<kAZCPkLG%Cw8w1u{|$aRr`rB%TuHEC%LI;}SkThAl=Ie8d8%xrEe
zV{KLH4E>#GmP2VgX2?UO1*+IA2kOescaQY-gDbuNYB9Z-uBLCibi7(Erq5olNM6R@
zoIJYs_}<9__s~2zdHAn8_v0gU=i{%xyL)o-@UP#i_{dx(@pSfldP<Y@%2hw;Uf*6f
z41)ucFsNA=CJ)0DZDH78Hw;tm!c_ZVv^p4vJ#@o38nu%BXgqe|hID!`61(K4k%G$M
z!r6~>xOVm}A{D`&P{_SKRrNhRP2%w>T4On1Y+p6cUcY!TS$sR4{M$9v>HOl=d^VY_
z9{=zPSFqJ4PvX=0>lt2l*wV%LKfIR{youoHVu}~pliMV3yW7Iq#NU2>H^r;%G7f#C
z@&|mcdaQ9)H?}s7*tq+e){g1+g&q3XkM7>Nf9GMt9vg4k@9JWL!fdb2>%yQm2P=c_
zIOM&rG%k<sLt}C~^JZ<(J<aI5+N1}IgO+PL_@5OA*TEt0yWO>P9lon>Zr`j9FN{O>
z@=W}UqvsuT$bIkjT048c_&fWS?fri{n-||ZZSNg>8y^M-<G;)4-Jtb%)o*Few`y*j
z9<JQol^R~(?&^N_!)kJWGCN!0n>s^kq)%SFaKBUV7M1Jxpl(+7DC0|c&vAu!S*K^V
zcbJ7XeXdPeOIl}rX*blfUhnv*+)t*^eW@2UZTt-B*=Qpde4xLLI^ME<?i;z|v$rq#
zMmqS3)91gTZ}8>blYb*0lbWnYzv<uh7Qd-qsy_dXd{R~6Gq|%a8~#&M_W5t><40JJ
z|7JdAu69qae{#de6+gZ32|f7f(<u7t-UIx%VZFgZ_?PXx#)S_^nd1wO^xCnuvK!!`
ze_D#q_o7>%Zbkgh2Xk=~{(EuCMW+kj1U|ZO@j-bTE<D0MT;zRl<MR<er0clQgl+hS
zmBU!Ob$fOmCr50Yw8j@XZRN~6{UA<V$JzV%jMHfgnUgyMF54kIu@$=04|8A~j<s9e
zB6+p>`SH)*h7gl9$H3=A$T1@A<;Ubv50Lt#2bduoQOC>0E`-!4U5E51b|Iu5={md>
zeFVWr(J!M9qmQF6uznH!KKeuS$LKSxpGCioeiMBfy^r;MJTJeBK94@Z`bqR&^kwwx
z=mV@DMEJ*H|Fg4!ocRhogj4U(6d%-U!-YrKhl{)qZi3vqaN1Kw*oH&q#@YkY9w$d^
zh}Zd!1)t<gFN%(%!IvR*bM#ucJ3Gedw1v#cX}0FF9l{e^p+%=P2gc!8yVWg{dz&Bj
zt>Y77x;}>0z~@BBF(U2d$K(Nz0I5%UfEmIOb-Y~cLP&kmbx40=7eeZhu0yH&n*KI^
zw&COnzYE_4KDu!6L3tZ4Ji<O)<b81C^ASI!>$uQ_ZTJRt<5;_O+s7U!M{Jz5#uqtl
zrH4BGAWmJ!+57m6(`gHtlRE=0+aWx$6}r<8b6^~fwOidHdA0drzd1f3rt3dg4SY_7
z93#?ReoP+l2$1@u2bduoQOC>0E`-!4U5E51b|Iu5={oG~`OS83PKWV0h1Z5tc4&$n
z_1bXZ5%%FC?}M8l-!7c?lo7V!kh!t;fV9WS5gX!lzGJ~B`HbNPa1KYW9qJaT+m_hp
zlVTk4->`+u$(<sX?GT>W3f<`kIo5UPNpf%V!+vvoLQL0xuo?KA2suWiz5JLw;1MA8
zNe?hXIHHc1i(Lq*Pr452PwYZSJ<@e3bzjrp#?LmKJmGiYo4`jGE<Px4!-YrKhl{)q
zZhSuChjbkmny?Mupl%#%w>JCM+c>G$Ax>Y{a1Hv9AIR%E&feQOfADk7^SGlCOX5kM
z!oP;luj{yY;LC954_|JqJp_=vB!|!s`_0jWn6Cd|Gw?YPa*RlO`7wFGBS7kt9$<!W
YL>(^|yAV>JbRE*4*oBaKr1#-}0kv~_egFUf

literal 0
HcmV?d00001

diff --git a/custom_levels/test-zone/testzone.gd b/custom_levels/test-zone/testzone.gd
new file mode 100644
index 0000000000..0ecd61c2ae
--- /dev/null
+++ b/custom_levels/test-zone/testzone.gd
@@ -0,0 +1,8 @@
+;; DGO definition file for Awful Village level
+;; We use the convention of having a longer DGO name for levels without precomputed visibility.
+
+;; the actual file name still needs to be 8.3
+("TSZ.DGO"
+  ("static-screen.o" "static-screen")
+  ("test-zone.go" "test-zone")
+  )
\ No newline at end of file
diff --git a/decompiler/level_extractor/extract_level.cpp b/decompiler/level_extractor/extract_level.cpp
index f63931a31c..ec1a12d34a 100644
--- a/decompiler/level_extractor/extract_level.cpp
+++ b/decompiler/level_extractor/extract_level.cpp
@@ -53,51 +53,6 @@ bool is_valid_bsp(const decompiler::LinkedObjectFile& file) {
   return true;
 }
 
-void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) {
-  int total_accounted = 0;
-  auto memory_use_by_category = lev.get_memory_usage();
-
-  std::vector<std::pair<std::string, int>> known_categories = {
-      {"texture", memory_use_by_category[tfrag3::MemoryUsageCategory::TEXTURE]},
-      {"tie-deinst-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_DEINST_VIS]},
-      {"tie-deinst-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_DEINST_INDEX]},
-      {"tie-inst-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_INST_VIS]},
-      {"tie-inst-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_INST_INDEX]},
-      {"tie-bvh", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_BVH]},
-      {"tie-verts", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_VERTS]},
-      {"tie-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_TIME_OF_DAY]},
-      {"tie-wind-inst-info",
-       memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_WIND_INSTANCE_INFO]},
-      {"tie-cidx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_CIDX]},
-      {"tie-mats", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_MATRICES]},
-      {"tie-grps", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_GRPS]},
-      {"tfrag-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_VIS]},
-      {"tfrag-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_INDEX]},
-      {"tfrag-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_VERTS]},
-      {"tfrag-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_TIME_OF_DAY]},
-      {"tfrag-cluster", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_CLUSTER]},
-      {"tfrag-bvh", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_BVH]},
-      {"shrub-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_TIME_OF_DAY]},
-      {"shrub-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_VERT]},
-      {"shrub-ind", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_IND]},
-      {"collision", memory_use_by_category[tfrag3::MemoryUsageCategory::COLLISION]},
-      {"merc-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::MERC_VERT]},
-      {"merc-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::MERC_INDEX]}};
-  for (auto& known : known_categories) {
-    total_accounted += known.second;
-  }
-
-  known_categories.push_back({"unknown", uncompressed_data_size - total_accounted});
-
-  std::sort(known_categories.begin(), known_categories.end(),
-            [](const auto& a, const auto& b) { return a.second > b.second; });
-
-  for (const auto& x : known_categories) {
-    fmt::print("{:30s} : {:6d} kB {:3.1f}%\n", x.first, x.second / 1024,
-               100.f * (float)x.second / uncompressed_data_size);
-  }
-}
-
 void add_all_textures_from_level(tfrag3::Level& lev,
                                  const std::string& level_name,
                                  const TextureDB& tex_db) {
diff --git a/decompiler/level_extractor/extract_tfrag.cpp b/decompiler/level_extractor/extract_tfrag.cpp
index 2af87aa9dc..82a3045d8a 100644
--- a/decompiler/level_extractor/extract_tfrag.cpp
+++ b/decompiler/level_extractor/extract_tfrag.cpp
@@ -5,6 +5,7 @@
 #include "common/util/FileUtil.h"
 #include "common/dma/gs.h"
 #include "common/util/Assert.h"
+#include "common/custom_data/pack_helpers.h"
 
 namespace decompiler {
 namespace {
@@ -2056,9 +2057,9 @@ void make_tfrag3_data(std::map<u32, std::vector<GroupedDraw>>& draws,
           vtx.z = vert.pre_cam_trans_pos.z();
           vtx.s = vert.stq.x();
           vtx.t = vert.stq.y();
-          vtx.q = vert.stq.z();
+          vtx.q_unused = vert.stq.z();
           // if this is true, we can remove a divide in the shader
-          ASSERT(vtx.q == 1.f);
+          ASSERT(vtx.q_unused == 1.f);
           vtx.color_index = vert.rgba / 4;
           // ASSERT((vert.rgba >> 2) < 1024); spider cave has 2048?
           ASSERT((vert.rgba & 3) == 0);
@@ -2137,85 +2138,6 @@ void merge_groups(std::vector<tfrag3::StripDraw::VisGroup>& grps) {
 
 }  // namespace
 
-constexpr float kClusterSize = 4096 * 40;  // 100 in-game meters
-constexpr float kMasterOffset = 12000 * 4096;
-
-std::pair<u64, u16> position_to_cluster_and_offset(float in) {
-  in += kMasterOffset;
-  if (in < 0) {
-    fmt::print("negative: {}\n", in);
-  }
-  ASSERT(in >= 0);
-  int cluster_cell = (in / kClusterSize);
-  float leftover = in - (cluster_cell * kClusterSize);
-  u16 offset = (leftover / kClusterSize) * float(UINT16_MAX);
-
-  float recovered = ((float)cluster_cell + ((float)offset / UINT16_MAX)) * kClusterSize;
-  float diff = std::fabs(recovered - in);
-  ASSERT(diff < 7);
-  ASSERT(cluster_cell >= 0);
-  ASSERT(cluster_cell < UINT16_MAX);
-  return {cluster_cell, offset};
-}
-
-void pack_vertices(tfrag3::PackedTfragVertices* result,
-                   const std::vector<tfrag3::PreloadedVertex>& vertices) {
-  u32 next_cluster_idx = 0;
-  std::map<u64, u32> clusters;
-
-  for (auto& vtx : vertices) {
-    auto x = position_to_cluster_and_offset(vtx.x);
-    auto y = position_to_cluster_and_offset(vtx.y);
-    auto z = position_to_cluster_and_offset(vtx.z);
-    u64 cluster_id = 0;
-    cluster_id |= x.first;
-    cluster_id |= (y.first << 16);
-    cluster_id |= (z.first << 32);
-
-    auto cluster_it = clusters.find(cluster_id);
-    u32 my_cluster_idx = 0;
-    if (cluster_it == clusters.end()) {
-      // first in cluster
-      clusters[cluster_id] = next_cluster_idx;
-      my_cluster_idx = next_cluster_idx;
-      next_cluster_idx++;
-    } else {
-      my_cluster_idx = cluster_it->second;
-    }
-
-    tfrag3::PackedTfragVertices::Vertex out_vtx;
-    out_vtx.xoff = x.second;
-    out_vtx.yoff = y.second;
-    out_vtx.zoff = z.second;
-    out_vtx.cluster_idx = my_cluster_idx;
-    // TODO check these
-    out_vtx.s = vtx.s * 1024;
-    out_vtx.t = vtx.t * 1024;
-    out_vtx.color_index = vtx.color_index;
-    result->vertices.push_back(out_vtx);
-  }
-
-  result->cluster_origins.resize(next_cluster_idx);
-  for (auto& cluster : clusters) {
-    auto& res = result->cluster_origins[cluster.second];
-    res.x() = (u16)cluster.first;
-    res.y() = (u16)(cluster.first >> 16);
-    res.z() = (u16)(cluster.first >> 32);
-  }
-
-  /*
-  std::unordered_set<tfrag3::PackedTfragVertices::Vertex, tfrag3::PackedTfragVertices::Vertex::hash>
-      a;
-  for (auto& v : result->vertices) {
-    a.insert(v);
-  }
-  fmt::print("SIZE: {} vs {} {}\n", a.size(), result->vertices.size(),
-             (float)a.size() / result->vertices.size());
-             */
-
-  ASSERT(next_cluster_idx < UINT16_MAX);
-}
-
 void extract_tfrag(const level_tools::DrawableTreeTfrag* tree,
                    const std::string& debug_name,
                    const std::vector<level_tools::TextureRemap>& map,
@@ -2282,7 +2204,7 @@ void extract_tfrag(const level_tools::DrawableTreeTfrag* tree,
     std::vector<tfrag3::PreloadedVertex> vertices;
     emulate_tfrags(geom, as_tfrag_array->tfragments, debug_name, map, out, this_tree, vertices,
                    tex_db, expected_missing_textures, dump_level);
-    pack_vertices(&this_tree.packed_vertices, vertices);
+    pack_tfrag_vertices(&this_tree.packed_vertices, vertices);
     extract_time_of_day(tree, this_tree);
 
     for (auto& draw : this_tree.draws) {
diff --git a/game/graphics/opengl_renderer/background/Tfrag3.cpp b/game/graphics/opengl_renderer/background/Tfrag3.cpp
index 1b6846e0a7..a1ee89a64a 100644
--- a/game/graphics/opengl_renderer/background/Tfrag3.cpp
+++ b/game/graphics/opengl_renderer/background/Tfrag3.cpp
@@ -78,6 +78,7 @@ void Tfrag3::update_load(const std::vector<tfrag3::TFragmentTreeKind>& tree_kind
         tree_cache.vis = &tree.bvh;
         tree_cache.index_data = tree.unpacked.indices.data();
         tree_cache.tod_cache = swizzle_time_of_day(tree.colors);
+        tree_cache.draw_mode = tree.use_strips ? GL_TRIANGLE_STRIP : GL_TRIANGLES;
         vis_temp_len = std::max(vis_temp_len, tree.bvh.vis_nodes.size());
         glBindBuffer(GL_ARRAY_BUFFER, tree_cache.vertex_buffer);
         //            glBufferData(GL_ARRAY_BUFFER, verts * sizeof(tfrag3::PreloadedVertex),
@@ -240,11 +241,11 @@ void Tfrag3::render_tree(int geom,
 
     prof.add_draw_call();
     if (render_state->no_multidraw) {
-      glDrawElements(GL_TRIANGLE_STRIP, singledraw_indices.second, GL_UNSIGNED_INT,
+      glDrawElements(tree.draw_mode, singledraw_indices.second, GL_UNSIGNED_INT,
                      (void*)(singledraw_indices.first * sizeof(u32)));
     } else {
-      glMultiDrawElements(GL_TRIANGLE_STRIP,
-                          &m_cache.multidraw_count_buffer[multidraw_indices.first], GL_UNSIGNED_INT,
+      glMultiDrawElements(tree.draw_mode, &m_cache.multidraw_count_buffer[multidraw_indices.first],
+                          GL_UNSIGNED_INT,
                           &m_cache.multidraw_index_offset_buffer[multidraw_indices.first],
                           multidraw_indices.second);
     }
@@ -260,11 +261,11 @@ void Tfrag3::render_tree(int geom,
                     double_draw.aref_second);
         glDepthMask(GL_FALSE);
         if (render_state->no_multidraw) {
-          glDrawElements(GL_TRIANGLE_STRIP, singledraw_indices.second, GL_UNSIGNED_INT,
+          glDrawElements(tree.draw_mode, singledraw_indices.second, GL_UNSIGNED_INT,
                          (void*)(singledraw_indices.first * sizeof(u32)));
         } else {
           glMultiDrawElements(
-              GL_TRIANGLE_STRIP, &m_cache.multidraw_count_buffer[multidraw_indices.first],
+              tree.draw_mode, &m_cache.multidraw_count_buffer[multidraw_indices.first],
               GL_UNSIGNED_INT, &m_cache.multidraw_index_offset_buffer[multidraw_indices.first],
               multidraw_indices.second);
         }
diff --git a/game/graphics/opengl_renderer/background/Tfrag3.h b/game/graphics/opengl_renderer/background/Tfrag3.h
index 24762a087f..295263ee1f 100644
--- a/game/graphics/opengl_renderer/background/Tfrag3.h
+++ b/game/graphics/opengl_renderer/background/Tfrag3.h
@@ -64,6 +64,7 @@ class Tfrag3 {
     const tfrag3::BVH* vis = nullptr;
     const u32* index_data = nullptr;
     SwizzledTimeOfDay tod_cache;
+    u64 draw_mode = 0;
 
     void reset_stats() {
       rendered_this_frame = false;
diff --git a/game/graphics/opengl_renderer/loader/LoaderStages.cpp b/game/graphics/opengl_renderer/loader/LoaderStages.cpp
index e75e76c2a2..618ae46611 100644
--- a/game/graphics/opengl_renderer/loader/LoaderStages.cpp
+++ b/game/graphics/opengl_renderer/loader/LoaderStages.cpp
@@ -83,6 +83,7 @@ class TfragLoadStage : public LoaderStage {
           GLuint& tree_out = data.lev_data->tfrag_vertex_data[geo].emplace_back();
           glGenBuffers(1, &tree_out);
           glBindBuffer(GL_ARRAY_BUFFER, tree_out);
+
           glBufferData(GL_ARRAY_BUFFER,
                        in_tree.unpacked.vertices.size() * sizeof(tfrag3::PreloadedVertex), nullptr,
                        GL_STATIC_DRAW);
@@ -97,34 +98,38 @@ class TfragLoadStage : public LoaderStage {
     u32 unique_buffers = 0;
 
     while (true) {
-      const auto& tree = data.lev_data->level->tfrag_trees[m_next_geo][m_next_tree];
-      u32 end_vert_in_tree = tree.unpacked.vertices.size();
-      // the number of vertices we'd need to finish the tree right now
-      size_t num_verts_left_in_tree = end_vert_in_tree - m_next_vert;
-      size_t start_vert_for_chunk;
-      size_t end_vert_for_chunk;
-
       bool complete_tree;
 
-      if (num_verts_left_in_tree > CHUNK_SIZE) {
-        complete_tree = false;
-        // should only do partial
-        start_vert_for_chunk = m_next_vert;
-        end_vert_for_chunk = start_vert_for_chunk + CHUNK_SIZE;
-        m_next_vert += CHUNK_SIZE;
-      } else {
-        // should do all!
-        start_vert_for_chunk = m_next_vert;
-        end_vert_for_chunk = end_vert_in_tree;
+      if (data.lev_data->level->tfrag_trees[m_next_geo].empty()) {
         complete_tree = true;
-      }
+      } else {
+        const auto& tree = data.lev_data->level->tfrag_trees[m_next_geo][m_next_tree];
+        u32 end_vert_in_tree = tree.unpacked.vertices.size();
+        // the number of vertices we'd need to finish the tree right now
+        size_t num_verts_left_in_tree = end_vert_in_tree - m_next_vert;
+        size_t start_vert_for_chunk;
+        size_t end_vert_for_chunk;
 
-      glBindBuffer(GL_ARRAY_BUFFER, data.lev_data->tfrag_vertex_data[m_next_geo][m_next_tree]);
-      u32 upload_size =
-          (end_vert_for_chunk - start_vert_for_chunk) * sizeof(tfrag3::PreloadedVertex);
-      glBufferSubData(GL_ARRAY_BUFFER, start_vert_for_chunk * sizeof(tfrag3::PreloadedVertex),
-                      upload_size, tree.unpacked.vertices.data() + start_vert_for_chunk);
-      uploaded_bytes += upload_size;
+        if (num_verts_left_in_tree > CHUNK_SIZE) {
+          complete_tree = false;
+          // should only do partial
+          start_vert_for_chunk = m_next_vert;
+          end_vert_for_chunk = start_vert_for_chunk + CHUNK_SIZE;
+          m_next_vert += CHUNK_SIZE;
+        } else {
+          // should do all!
+          start_vert_for_chunk = m_next_vert;
+          end_vert_for_chunk = end_vert_in_tree;
+          complete_tree = true;
+        }
+
+        glBindBuffer(GL_ARRAY_BUFFER, data.lev_data->tfrag_vertex_data[m_next_geo][m_next_tree]);
+        u32 upload_size =
+            (end_vert_for_chunk - start_vert_for_chunk) * sizeof(tfrag3::PreloadedVertex);
+        glBufferSubData(GL_ARRAY_BUFFER, start_vert_for_chunk * sizeof(tfrag3::PreloadedVertex),
+                        upload_size, tree.unpacked.vertices.data() + start_vert_for_chunk);
+        uploaded_bytes += upload_size;
+      }
 
       if (complete_tree) {
         unique_buffers++;
diff --git a/goal_src/engine/collide/collide-cache.gc b/goal_src/engine/collide/collide-cache.gc
index 031c25d23a..800af98612 100644
--- a/goal_src/engine/collide/collide-cache.gc
+++ b/goal_src/engine/collide/collide-cache.gc
@@ -384,6 +384,8 @@
           (set! (-> v1-5 mesh) (-> obj mesh))
           (set! (-> v1-5 inst) #f)
           )
+        ; (add-debug-sphere #t (bucket-id debug) (-> obj bsphere) (-> obj bsphere w) (new 'static 'rgba :g #x80 :a #x80))
+        ; (format 0 "~f~%" (-> obj bsphere w))
         (+! (-> arg1 num-items) 1)
         )
       (&+! obj 32)
diff --git a/goal_src/engine/collide/collide-frag.gc b/goal_src/engine/collide/collide-frag.gc
index a9703339dc..7246ac2ab7 100644
--- a/goal_src/engine/collide/collide-frag.gc
+++ b/goal_src/engine/collide/collide-frag.gc
@@ -83,6 +83,19 @@
 
 (defmethod draw collide-fragment ((obj collide-fragment) (arg0 collide-fragment) (arg1 display-frame))
   ;; if we wanted to draw collide-fragment's we'd do it here.
+  ; (when (< (-> obj bsphere w) (meters 22.))
+  ;   (format 0 "sp: ~m : ~D~%" (-> obj bsphere w) (-> obj mesh poly-count))
+  ;   (let ((mins (vector-copy! (new-stack-vector0) (-> obj bsphere)))
+  ;         (maxs (vector-copy! (new-stack-vector0) (-> obj bsphere))))
+  ;     (dotimes (i 3)
+  ;       (-! (-> mins data i) (-> obj bsphere w))
+  ;       (+! (-> maxs data i) (-> obj bsphere w))
+  ;       )
+  ;     (add-debug-box #t (bucket-id debug) mins maxs (new 'static 'rgba :r #x80 :a #x80)
+  ;     )
+  ;   ;(add-debug-sphere #t (bucket-id debug) (-> obj bsphere) (-> obj bsphere w) (new 'static 'rgba :r #x80 :a #x80))
+  ;   )
+  ;; (add-debug-point #t (bucket-id debug) (-> obj bsphere))
   (none)
   )
 
diff --git a/goal_src/engine/gfx/texture.gc b/goal_src/engine/gfx/texture.gc
index acab5a8189..83b85241d6 100644
--- a/goal_src/engine/gfx/texture.gc
+++ b/goal_src/engine/gfx/texture.gc
@@ -1371,7 +1371,12 @@
   (dotimes (page-idx 9)
     (set! (-> level texture-page page-idx) #f)
     )
-
+  (#when PC_PORT
+    (when (zero? id-array)
+      (format #t "ERROR: texture id array is 0, skipping texture login!~%")
+      (return #f)
+      )
+    )
   (if (>= max-page-kind 0) ;; tfrag.
       ;; login the texture. If the texture isn't there, it will try to load it
       ;; and allocate with the given allocation function.
diff --git a/goal_src/engine/level/level-info.gc b/goal_src/engine/level/level-info.gc
index c8761e7778..5864764e72 100644
--- a/goal_src/engine/level/level-info.gc
+++ b/goal_src/engine/level/level-info.gc
@@ -1972,3 +1972,44 @@
 
 
 
+;;;;;;;;; CUSTOM LEVELS
+(define test-zone (new 'static 'level-load-info
+                           :index 26
+                           :name 'test-zone
+                           :visname 'test-zone-vis ;; name + -vis
+                           :nickname 'tsz          ;; nickname
+                           :packages '()
+                           :sound-banks '()
+                           :music-bank #f
+                           :ambient-sounds '()
+                           :mood '*default-mood*
+                           :mood-func 'update-mood-default
+                           :ocean #f
+                           :sky #t
+                           :continues '((new 'static 'continue-point
+                                             :name "test-zone-start"
+                                             :level 'test-zone
+                                             :trans (new 'static 'vector :x 0.0 :y (meters 10.) :z (meters 10.) :w 1.0)
+                                             :quat (new 'static 'quaternion  :w 1.0)
+                                             :camera-trans (new 'static 'vector :x 0.0 :y 4096.0 :z 0.0 :w 1.0)
+                                             :camera-rot (new 'static 'array float 9 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0)
+                                             :load-commands '()
+                                             :vis-nick 'none
+                                             :lev0 'test-zone
+                                             :disp0 'display
+                                             :lev1 'village1
+                                             :disp1 'display
+                                             ))
+                           :tasks '()
+                           :priority 100
+                           :load-commands '()
+                           :alt-load-commands '()
+                           :bsp-mask #xffffffffffffffff
+                           :bsphere (new 'static 'sphere :w 167772160000.0)
+                           :bottom-height (meters -20)
+                           :run-packages '()
+                           :wait-for-load #t
+                           )
+        )
+
+(cons! *level-load-list* 'test-zone)
\ No newline at end of file
diff --git a/goal_src/engine/level/level.gc b/goal_src/engine/level/level.gc
index 7b3c549f74..371e0fcf8a 100644
--- a/goal_src/engine/level/level.gc
+++ b/goal_src/engine/level/level.gc
@@ -93,13 +93,13 @@
   "Draw a level!"
   ;; do the draw
   (draw arg0 arg0 arg3)
-  
+
   (if (nonzero? *display-strip-lines*)
       (debug-draw arg0 arg0 arg3)
       )
   (none)
   )
-  
+
 
 (defmethod print level ((obj level))
   "print a level."
@@ -115,7 +115,7 @@
 
 (defmethod relocate bsp-header ((obj bsp-header) (dest-heap kheap) (name (pointer uint8)))
   "Handle a bsp file load."
-  
+
   ;; we expect that we'll have a loading-level set when we link/login a bsp-header
   (let ((s5-0 (-> *level* loading-level)))
     (if s5-0
@@ -175,7 +175,7 @@
 
 (defmethod vis-clear level ((obj level))
   "Clear the visibility info for when the level is loading."
-  
+
   ;; clear vis-infos, so we can't try to look up a vis string.
   (countdown (v1-0 8)
     (nop!) ;; the usual.
@@ -194,11 +194,11 @@
 (defmethod vis-load level ((obj level))
   "Start the initial load of a VIS file to the IOP VIS buffer. After this is done, we can use
    ramdisk-load to load chunks."
-   
-  ;; check to see if we have a buffer for loaded vis data. 
+
+  ;; check to see if we have a buffer for loaded vis data.
   (when (zero? (-> obj vis-info (-> obj vis-self-index) ramdisk))
     ;; nope, we have no vis data buffer, we need to set it up.
-    
+
     ;; first, we should see if the other level has loaded vis. if so, kill it.
     (let ((vis (-> obj other vis-info (-> obj other vis-self-index))))
         (when (and vis (nonzero? (-> vis ramdisk)))
@@ -207,7 +207,7 @@
           0
           )
         )
-    
+
     ;; set up a ramdisk rpc (fill command, actually load the file from DVD to IOP buffer)
     (let ((visname (make-file-name (file-kind vis) (the-as string (-> obj nickname)) 0 #f))
           (cmd (the-as ramdisk-rpc-fill (add-element *ramdisk-rpc*)))
@@ -222,7 +222,7 @@
         (set! (-> obj vis-info (-> obj vis-self-index) ramdisk) s5-0)
         )
     )
-  
+
   ;; return the ramdisk ID.
   (-> obj vis-info (-> obj vis-self-index) ramdisk)
   )
@@ -278,7 +278,7 @@
           )
         )
       )
-    
+
     ;; check for up to 6 neighbor level vis info.  The last one is always left as null.
     (dotimes (s5-1 6)
       (let* ((s3-0 (+ s5-1 1))
@@ -425,7 +425,7 @@
          (('alive)
           (when (and *dproc* (= want-status 'active))
             ;; only if we want to do alive -> active
-            
+
             ;; will set the level to be drawn.
             (remove-by-param1 *background-draw-engine* (-> obj bsp))
             (add-connection *background-draw-engine* *dproc* (the (function object object object object object) add-bsp-drawable) (-> obj bsp) obj #f)
@@ -539,10 +539,10 @@
 
   ;; set the level heap. level code logins called from linker may allocate here
   (set! loading-level (-> obj heap))
-  
+
   ;; relocate method of the bsp will look for this
   (set! (-> *level* loading-level) obj)
-  
+
   ;; clear out old stuff
   (set! (-> *level* log-in-level-bsp) #f)
   (set! (-> obj nickname) #f)
@@ -551,12 +551,12 @@
   (set! (-> obj ambient) #f)
   (set! (-> obj linking) #f)
   (vis-clear obj)
-  
+
   (set! (-> obj status) 'loading)
-  
+
   ;; incoming textures should use the level allocator
   (set! (-> *texture-pool* allocate-func) texture-page-level-allocate)
-  
+
   ;; build name
   (if (= (-> obj load-name) (-> obj info visname))
     (format (clear *temp-string*) "~S" (-> obj info nickname))
@@ -564,19 +564,19 @@
     )
   (set! (-> *temp-string* data 8) (the-as uint 0))
   (format *temp-string* ".DGO")
-  
+
   ;; reset temporary allocations on level heap
   (set! (-> obj heap top) (-> obj heap top-base))
-  
+
   ;; allocate DGO loading buffers
   (let ((s4-0 (kmalloc (-> obj heap) (* 2 1024 1024) (kmalloc-flags align-64 top) "dgo-level-buf-2"))
         (s5-2 (kmalloc (-> obj heap) (* 2 1024 1024) (kmalloc-flags align-64 top) "dgo-level-buf-2"))
         )
       (load-dbg " DGO buffers at #x~X #x~X~%" s4-0 s5-2)
-      
+
       ;; we expect to load code first, remember where the heap is now.
       (set! (-> obj code-memory-start) (-> obj heap current))
-      
+
       (format 0 "-----------> begin load ~A [~S]~%" (-> obj load-name) *temp-string*)
       ;; kick off the load!
       (dgo-load-begin *temp-string* s5-2 s4-0 (the pointer (align64 (-> obj heap current))))
@@ -586,10 +586,10 @@
 
 (defmethod login-begin level ((obj level))
   "Start the login.  This is spread over multiple frames."
-  
+
   ;; done with load, reset the texture page allocator
   (set! (-> *texture-pool* allocate-func) texture-page-default-allocate)
-  
+
   (cond
     ((-> obj bsp)
      (set! (-> *level* log-in-level-bsp) (-> obj bsp))
@@ -605,7 +605,7 @@
            )
          )
        )
-     
+
      ;; set the login state machine at the beginning.
      (set! (-> *login-state* state) -1)
      (set! (-> *login-state* pos) (the-as uint 0))
@@ -633,16 +633,16 @@
     (sv-16 prototype-bucket-tie)
     (sv-32 int)
     )
-  
+
   ;; there is some logic for not doing the whole login all at once...
   ;; for now, we will somewhat ignore that.
-  
-  
+
+
   (let ((level-drawable-trees (-> loaded-level bsp drawable-trees)))
     ;;(.mfc0 initial-timer Count)
     (label cfg-1)
     ;;(.mfc0 current-timer Count)
-    
+
     ;; this would quit the login function after some amount of time elapsed.
     #|
     (let ((elapsed-timer (- current-timer initial-timer)))
@@ -652,16 +652,16 @@
         )
       )
     |#
-    
+
     (let ((current-login-pos (the-as int (-> level-login-state pos))))
-      
+
       ;; Login state -1.
       ;; in this state, we log in drawables/art-groups that are in referenced in the bsp directly
       ;; the current-login-pos in the index of the drawable/art to login.
-      
+
       (when (= (-> level-login-state state) -1)
         ;;(load-dbg "login state -1~%")
-        
+
         ;; login some drawables.
         (when (< current-login-pos (-> level-drawable-trees length))
           (let ((current-drawable (-> level-drawable-trees trees (the-as uint current-login-pos))))
@@ -699,7 +699,7 @@
           (+! (-> level-login-state pos) 1)
           (goto cfg-1)
           )
-        
+
         ;; this makes the art groups go at the end.
         (let ((v1-39 (- (the-as uint current-login-pos) (the-as uint (-> level-drawable-trees length)))))
           (when (< (the-as int v1-39) (-> loaded-level art-group art-group-array length))
@@ -713,14 +713,14 @@
             (goto cfg-1)
             )
           )
-        
+
         ;; if we got here, we're done with state -1!
         (set! (-> level-login-state pos) (the-as uint 0))
         (set! (-> level-login-state state) 0)
         (goto cfg-1)
         )
-      
-      
+
+
       ;; login state 0.
       ;; we log in children of the drawables from state -1.
       (when (< (-> level-login-state state) (the-as int (-> level-login-state elts)))
@@ -755,7 +755,7 @@
                    (when (< current-login-pos (-> s1-2 length))
                      (set! sv-16 (-> s1-2 array-data (the-as uint current-login-pos)))
                      (set! sv-32 0)
-                     
+
                      (#when PC_PORT
                        ;; if a TIE uses environment mapping, disable the fade out so it always renderers with
                        ;; the generic renderer. In the port, we just make envmapped things always envmap.
@@ -763,7 +763,7 @@
                          (*! (-> sv-16 envmap-fade-far) 10000.)
                          )
                        )
-                     
+
                      (while (< sv-32 4)
                        (let ((a0-28 (-> sv-16 geometry sv-32)))
                          ;;(load-dbg " login geom: ~A~%" a0-28)
@@ -810,8 +810,8 @@
           )
         (goto cfg-1)
         )
-      
-      
+
+
       (when (= (-> level-login-state state) (-> level-login-state elts))
         (let ((v1-115 (-> loaded-level bsp)))
           (cond
@@ -837,8 +837,7 @@
         )
       )
     )
-  
-  
+
   ;; done!
   (set! (-> loaded-level nickname) (-> loaded-level bsp nickname))
   (if (nonzero? (-> loaded-level bsp nodes))
@@ -857,7 +856,7 @@
     (set! (-> *subdivide-settings* close 3) f0-0)
     (set! (-> *subdivide-settings* far 3) f1-0)
     )
-  
+
   (load-dbg "init-vis~%")
   (init-vis loaded-level)
   (load-dbg "package load~%")
@@ -905,31 +904,31 @@
   (case (-> obj status)
     (('active 'alive)
      (format 0 "----------- kill ~A (status ~A)~%" obj (-> obj status))
-     
+
      ;; copy data from the level to the game-info storage.  This will remember permanent level stuff, like
      ;; what you collected/completed.
      (copy-perms-from-level! *game-info* obj)
      (send-event *camera* 'level-deactivate (-> obj name))
      (send-event *target* 'level-deactivate (-> obj name))
-     
+
      ;; remove this BSP from the engine. This will stop us from being drawn.
      (remove-by-param1 *background-draw-engine* (-> obj bsp))
-     
+
      ;; track down all the entities and kill them
      (deactivate-entities (-> obj bsp))
-     
+
      ;; kill any remaining particles not associated with a part-tracker
      (kill-all-particles-in-level obj)
-     
+
      ;; clean up our level
      (set! (-> obj inside-sphere?) #f)
      (set! (-> obj inside-boxes?) #f)
      (set! (-> obj meta-inside?) #f)
      (set! (-> obj force-inside?) #f)
-     
+
      ;; we're still loaded.
      (set! (-> obj status) 'loaded)
-     
+
      (set! (-> obj all-visible?) 'loading)
      ;; clear vis buffers
      (dotimes (v1-19 128)
@@ -955,10 +954,10 @@
 
 (defmethod unload! level ((obj level))
   "Unloads the level. This does not free the heap. The level will be made inactive and ready to be loaded some other time."
-  
+
   (deactivate obj)
   (when (!= (-> obj status) 'inactive)
-    
+
     ;; if we linked art group, unlink it.
     (when (or (= (-> obj status) 'loaded)
               (= (-> obj status) 'alive)
@@ -973,7 +972,7 @@
           )
         )
       )
-    
+
     ;; turn some things off
     (set! (-> obj bsp) #f)
     (set! (-> obj entity) #f)
@@ -981,7 +980,7 @@
     (set! (-> obj status) 'inactive)
     (set! (-> obj art-group string-array length) 0)
     (set! (-> obj art-group art-group-array length) 0)
-    
+
     ;; unload texture pages
     (countdown (s5-1 (-> obj loaded-texture-page-count))
       (dotimes (v1-27 32)
@@ -993,10 +992,10 @@
       )
     (set! (-> obj loaded-texture-page-count) 0)
     (unlink-textures-in-heap! *texture-page-dir* (-> obj heap))
-    
+
     ;; unload particle groups that were defined in the level data
     (unlink-part-group-by-heap (-> obj heap))
-    
+
     ;; if there are any in-progress art loads for this level, kill them.
     (dotimes (s5-2 2)
       (let ((v1-41 (-> *art-control* buffer s5-2 pending-load-file)))
@@ -1019,7 +1018,7 @@
            (a0-29 (car s5-3))
            )
       (while (not (null? s5-3))
-        (case (rtype-of a0-29) 
+        (case (rtype-of a0-29)
               ((symbol)
                (unload (symbol->string (the-as symbol a0-29)))
                )
@@ -1033,7 +1032,7 @@
       )
 
     (vis-clear obj)
-    
+
     ;; reset the level heap!
     (let ((v1-64 (-> obj heap)))
       (set! (-> v1-64 current) (-> v1-64 base))
@@ -1057,14 +1056,14 @@
 
   ;; note : pc port added option to show every actor regardless
   (with-pc (if (-> *pc-settings* force-actors?) (return #t)))
-        
+
   ;; check the vis bits!
-  (let* (;; lwu v1, 388(a0) 
+  (let* (;; lwu v1, 388(a0)
             (vis-data (-> obj vis-bits))
             ;; sra a0, a1, 3
             (byte-idx (sar arg0 3))
             ;; daddu v1, a0, v1
-            ;; lb v1, 0(v1) 
+            ;; lb v1, 0(v1)
             (vis-byte (-> (the (pointer int8) vis-data) byte-idx))
             ;; andi a0, a1, 7
             (bit-idx (logand arg0 #b111))
@@ -1117,7 +1116,7 @@
 (defmethod debug-print-splitbox level ((obj level) (arg0 vector) (arg1 string))
   "Print the current splitbox, if we're in one."
   (cond
-   ((or (not (-> obj bsp)) (zero? (-> obj bsp boxes)) (zero? (-> obj bsp split-box-indices))) 
+   ((or (not (-> obj bsp)) (zero? (-> obj bsp boxes)) (zero? (-> obj bsp split-box-indices)))
     ;; do nothing!
     )
    (else
@@ -1431,6 +1430,11 @@
   )
 
 (defun bg ((level-name symbol))
+  "Begin game in a given level.
+   The level name can be the full name (village3), the nickname (vi3), or visname (village3-vis)
+   If the visname is used (and its a recognized level in level-info), it will use vis mode.
+   Otherwise, it will use the non-vis DGO name (like VILLAGE3.DGO) which will usually fail.
+   "
   (set! *cheat-mode* (if *debug-segment*
                       'debug
                       #f
@@ -1496,6 +1500,62 @@
   0
   )
 
+(defun bg-custom ((level-name symbol))
+  "Modified version of bg for the PC Port custom levels."
+
+  ;; lookup info
+  (format 0 "(bg-custom ~A)%" level-name)
+  (let ((lev-info (lookup-level-info level-name)))
+    (when (= lev-info default-level)
+      (format 0 "Unable to (bg-custom ~A), the level was not found in *level-load-list*~%" level-name)
+      (return #f)
+      )
+
+    ;; kill jak (rip)
+    (format 0 "doing stop~%")
+    (stop 'play)
+
+    ;; enable visiblity. the custom level won't use it, but we want it on so other levels can be loaded.
+    (set! (-> *level* vis?) #t)
+
+    ;; disable border and play mode to prevent loading levels
+    (set! (-> *level* border?) #f)
+    (set! (-> *setting-control* default border-mode) #f)
+    (set! (-> *level* play?) #f)
+
+    (format 0 "doing level load~%")
+    ;; allocate level. This may start the loading process, but won't finish it.
+    (let ((lev (level-get-for-use *level* level-name 'active)))
+      (when (not lev)
+        (format 0 "Unable to load level, could not level-get-for-use~%")
+        (return #f)
+        )
+      (format 0 "about to start load loop, game will freeze and hopefully come back soon~%")
+
+      ;; spin in a loop and load it. This will cause the game to freeze during the load,
+      ;; but this is good enough for now.
+      (while (or (= (-> lev status) 'loading)
+                 (= (-> lev status) 'loading-bt)
+                 (= (-> lev status) 'login)
+                 )
+        (load-continue lev)
+        )
+
+      (when (not (-> lev info continues))
+        (format 0 "level info has no continues, can't load it.~%")
+        )
+
+      (let ((cont (car (-> lev info continues))))
+        (start 'play (the continue-point cont))
+        )
+
+      (vis-load lev)
+      (set! (-> lev all-visible?) #f)
+      (set! (-> lev force-all-visible?) #t)
+      )
+    )
+  )
+
 (defun play ((use-vis symbol) (init-game symbol))
   "The entry point to the actual game! This allocates the level heaps, loads some data, sets some default parameters and sets the startup level."
 
@@ -1817,11 +1877,11 @@
         )
       )
     )
-  
+
   ;; load vis info.
   ;; The load-state's vis-nick is the level we want vis data for.
   ;; Note that we won't load vis until we are inside the level's boxes.
-  
+
   ;; this will be the level that is currently being used.
   (let ((s5-3 #f))
     (dotimes (v1-121 (-> *level* length))
@@ -1833,7 +1893,7 @@
             )
           )
       )
-    
+
     ;; if we have the wrong vis
     (when (and (!= s5-3 (-> obj vis-nick)) (-> *level* vis?))
       ;; and we want a vis
@@ -1859,23 +1919,23 @@
 ;; method 16 level-group (debug text stuff)
 
 (defmethod level-update level-group ((obj level-group))
-  
+
   ;; this does nothing...
   (camera-pos)
   (new 'static 'boxed-array :type symbol :length 0 :allocated-length 2)
-  
+
   ;; compute the settings for this frame
   (update *setting-control*)
-  
+
   ;; run the art loading system
   (update *art-control* #t)
   (clear-rec *art-control*)
-  
+
   ;; run level loading!
   (dotimes (s5-0 2)
     (load-continue (-> obj level s5-0))
     )
-  
+
   ;; compute inside for each level
   (dotimes (s5-1 (-> obj length))
     (let ((s4-0 (-> obj level s5-1)))
@@ -1889,10 +1949,9 @@
         )
       )
     )
-  
+
   ;; update load state machine (the level-border one)
   (update! *load-state*)
-  
   ;; checkpoint assignment
   (dotimes (s5-2 (-> obj length))
     (let ((s4-1 (-> obj level s5-2)))
@@ -1927,7 +1986,7 @@
         )
       )
     )
-  
+
   ;; determine vis info idx for each level
   (dotimes (v1-67 (-> obj length))
     (let ((a0-26 (-> obj level v1-67)))
@@ -1950,7 +2009,7 @@
         )
       )
     )
-  
+
   ;; display level vis info
   (when *display-level-border*
     (dotimes (s5-3 (-> obj length))
@@ -1978,7 +2037,7 @@
         )
       )
     )
-  
+
   ;; if we have vis for level A, but we aren't "in" it, display an error and
   ;; force us out of the other level.  Ideally the boxes and the load boundary system
   ;; will be consistent and there is no way to set a vis to a level that we aren't in.
@@ -2002,7 +2061,6 @@
         )
       )
     )
-  
   ;; if we are outside of the boxes, we consider ourselves "outside of bsp"
   ;; if we are outside of both levels boxes, then we don't really know what to do
   ;; for vis, and we can display the classic "outside of bsp" error.
@@ -2041,7 +2099,7 @@
                   )
                 )
               )
-            
+
             ;; now, handle setting bit 31 (maybe single vis mode?)
             (cond
               ;; special display self mode.
@@ -2074,7 +2132,6 @@
         )
       )
     )
-  
   (when (or *display-level-border* *display-texture-download* *display-split-box-info*)
     (when *display-level-border*
       (format *stdcon* "  want: ~A ~A/~A   ~A ~A/~A~%"
@@ -2144,13 +2201,13 @@
         )
       )
     )
-  
+
   ;; tell PC port about our levels
-  (__pc-set-levels 
+  (__pc-set-levels
     (if (= (-> obj level0 status) 'inactive) "none" (symbol->string (-> obj level0 nickname)))
     (if (= (-> obj level1 status) 'inactive) "none" (symbol->string (-> obj level1 nickname)))
     )
-  
+
   0
   )
 
diff --git a/goal_src/engine/target/target-death.gc b/goal_src/engine/target/target-death.gc
index 110ce8b206..8e1621fae4 100644
--- a/goal_src/engine/target/target-death.gc
+++ b/goal_src/engine/target/target-death.gc
@@ -180,9 +180,11 @@
         )
       )
     (let ((s5-2 (level-get *level* (-> arg0 level))))
-      (when s5-2
+
+      ;; vis info check added for PC, don't bother waiting for vis if the level doesn't have it.
+      (when (and s5-2 (-> s5-2 vis-info 0))
         (while (and (= (-> s5-2 all-visible?) 'loading) (-> *level* vis?))
-          (suspend)
+            (suspend)
           )
         )
       )
diff --git a/goal_src/game.gp b/goal_src/game.gp
index 40b7e3b588..26e536dff2 100644
--- a/goal_src/game.gp
+++ b/goal_src/game.gp
@@ -103,6 +103,17 @@
     )
   )
 
+(defun custom-level-cgo (output-name desc-file-name)
+  "Add a CGO with the given output name (in out/iso) and input name (in custom_levels/)"
+  (let ((out-name (string-append "out/iso/" output-name)))
+    (defstep :in (string-append "custom_levels/" desc-file-name)
+      :tool 'dgo
+      :out `(,out-name)
+      )
+    (set! *all-cgos* (cons out-name *all-cgos*))
+    )
+  )
+
 (defun cgo (output-name desc-file-name)
   "Add a CGO with the given output name (in out/iso) and input name (in goal_src/dgos)"
   (let ((out-name (string-append "out/iso/" output-name)))
@@ -147,6 +158,12 @@
     )
   )
 
+(defmacro build-custom-level (name)
+  (let* ((path (string-append "custom_levels/" name "/" name ".jsonc")))
+    `(defstep :in ,path
+              :tool 'build-level
+              :out '(,(string-append "out/obj/" name ".go")))))
+
 (defun get-iso-data-path ()
   (if *use-iso-data-path*
     (string-append *iso-data* "/")
@@ -1553,6 +1570,17 @@
   "ndi-volumes-ag"
   "title-vis")
 
+;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Example Custom Level
+;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Set up the build system to build the level geometry
+;; this path is relative to the custom_levels/ folder
+;; it should point to the .jsonc file that specifies the level.
+(build-custom-level "test-zone")
+;; the DGO file
+(custom-level-cgo "TESTZONE.DGO" "test-zone/testzone.gd")
+
 ;;;;;;;;;;;;;;;;;;;;;
 ;; Game Engine Code
 ;;;;;;;;;;;;;;;;;;;;;
diff --git a/goalc/CMakeLists.txt b/goalc/CMakeLists.txt
index 24b84263bb..40aa875e4b 100644
--- a/goalc/CMakeLists.txt
+++ b/goalc/CMakeLists.txt
@@ -5,6 +5,16 @@ add_library(compiler
         emitter/ObjectGenerator.cpp
         emitter/Register.cpp
         debugger/disassemble.cpp
+        build_level/build_level.cpp
+        build_level/collide_bvh.cpp
+        build_level/collide_drawable.cpp
+        build_level/collide_pack.cpp
+        build_level/color_quantization.cpp
+        build_level/FileInfo.cpp
+        build_level/gltf_mesh_extract.cpp
+        build_level/LevelFile.cpp
+        build_level/ResLump.cpp
+        build_level/Tfrag.cpp
         compiler/Compiler.cpp
         compiler/Env.cpp
         compiler/Val.cpp
@@ -44,7 +54,7 @@ add_library(compiler
         regalloc/Allocator_v2.cpp
         )
 
-target_link_libraries(compiler common Zydis)
+target_link_libraries(compiler common Zydis tiny_gltf)
 
 if (WIN32)
     target_link_libraries(compiler mman)
diff --git a/goalc/build_level/FileInfo.cpp b/goalc/build_level/FileInfo.cpp
new file mode 100644
index 0000000000..1c5367143e
--- /dev/null
+++ b/goalc/build_level/FileInfo.cpp
@@ -0,0 +1,30 @@
+#include "FileInfo.h"
+#include "goalc/data_compiler/DataObjectGenerator.h"
+#include "common/versions.h"
+
+size_t FileInfo::add_to_object_file(DataObjectGenerator& gen) const {
+  gen.align_to_basic();
+  gen.add_type_tag("file-info");
+  size_t offset = gen.current_offset_bytes();
+  gen.add_type_tag(file_type);
+  gen.add_ref_to_string_in_pool(file_name);
+  gen.add_word(major_version);
+  gen.add_word(minor_version);
+  gen.add_ref_to_string_in_pool(maya_file_name);
+  gen.add_ref_to_string_in_pool(tool_debug);
+  gen.add_ref_to_string_in_pool(tool_debug);
+
+  return offset;
+}
+
+FileInfo make_file_info_for_level(const std::string& file_name) {
+  FileInfo info;
+  info.file_type = "bsp-header";
+  info.file_name = file_name;
+  info.major_version = versions::jak1::LEVEL_FILE_VERSION;
+  info.minor_version = 0;
+  info.maya_file_name = "Unknown";
+  info.tool_debug = "Created by OpenGOAL buildlevel";
+  info.mdb_file_name = "Unknown";
+  return info;
+}
\ No newline at end of file
diff --git a/goalc/build_level/FileInfo.h b/goalc/build_level/FileInfo.h
new file mode 100644
index 0000000000..7932018ca1
--- /dev/null
+++ b/goalc/build_level/FileInfo.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <string>
+
+#include "common/common_types.h"
+
+class DataObjectGenerator;
+
+struct FileInfo {
+  //  (file-type      symbol  :offset-assert 4)
+  std::string file_type;
+  //  (file-name      basic   :offset-assert 8)
+  std::string file_name;
+  //  (major-version  uint32  :offset-assert 12)
+  u32 major_version = 0;
+  //  (minor-version  uint32  :offset-assert 16)
+  u32 minor_version = 0;
+  //  (maya-file-name basic   :offset-assert 20)
+  std::string maya_file_name;
+  //  (tool-debug     basic   :offset-assert 24)
+  std::string tool_debug;
+  //  (mdb-file-name  basic   :offset-assert 28)
+  std::string mdb_file_name;
+
+  size_t add_to_object_file(DataObjectGenerator& gen) const;
+};
+
+FileInfo make_file_info_for_level(const std::string& file_name);
\ No newline at end of file
diff --git a/goalc/build_level/LevelFile.cpp b/goalc/build_level/LevelFile.cpp
new file mode 100644
index 0000000000..b5902eb988
--- /dev/null
+++ b/goalc/build_level/LevelFile.cpp
@@ -0,0 +1,105 @@
+#include "LevelFile.h"
+#include "goalc/data_compiler/DataObjectGenerator.h"
+
+size_t DrawableTreeArray::add_to_object_file(DataObjectGenerator& gen) const {
+  /*
+   (deftype drawable-tree-array (drawable-group)
+    ((trees drawable-tree 1 :offset 32 :score 100))
+    :flag-assert #x1200000024
+    )
+    (deftype drawable-group (drawable)
+      ((length  int16       :offset 6)
+       (data    drawable 1  :offset-assert 32)
+       )
+      (:methods
+        (new (symbol type int) _type_)
+        )
+      :flag-assert #x1200000024
+      )
+   */
+  gen.align_to_basic();
+  gen.add_type_tag("drawable-tree-array");
+  size_t result = gen.current_offset_bytes();
+  int num_trees = 0;
+  num_trees += tfrags.size();
+  num_trees += collides.size();
+  gen.add_word(num_trees << 16);
+  gen.add_word(0);
+  gen.add_word(0);
+
+  gen.add_word(0);
+  gen.add_word(0);
+  gen.add_word(0);
+  gen.add_word(0);
+
+  // todo add trees...
+
+  if (num_trees == 0) {
+    gen.add_word(0);  // the one at the end.
+  } else {
+    int tree_word = (int)gen.current_offset_bytes() / 4;
+    for (int i = 0; i < num_trees; i++) {
+      gen.add_word(0);
+    }
+
+    for (auto& tfrag : tfrags) {
+      // gen.set_word(tree_word++, tfrag.add_to_object_file(gen));
+      gen.link_word_to_byte(tree_word++, tfrag.add_to_object_file(gen));
+    }
+
+    for (auto& collide : collides) {
+      gen.link_word_to_byte(tree_word++, collide.add_to_object_file(gen));
+    }
+  }
+
+  return result;
+}
+
+std::vector<u8> LevelFile::save_object_file() const {
+  DataObjectGenerator gen;
+  gen.add_type_tag("bsp-header");
+
+  // add blank space for the bsp-header
+  while (gen.words() < 100) {
+    gen.add_word(0);
+  }
+
+  //(info                   file-info                        :offset          4)
+  auto file_info_slot = info.add_to_object_file(gen);
+  gen.link_word_to_byte(1, file_info_slot);
+
+  //(bsphere                vector :inline                   :offset-assert  16)
+  //(all-visible-list       (pointer uint16)                 :offset-assert  32)
+  //(visible-list-length    int32                            :offset-assert  36)
+  //(drawable-trees         drawable-tree-array              :offset-assert  40)
+  gen.link_word_to_byte(40 / 4, drawable_trees.add_to_object_file(gen));
+  //(pat                    pointer                          :offset-assert  44)
+  //(pat-length             int32                            :offset-assert  48)
+  //(texture-remap-table    (pointer uint64)                 :offset-assert  52)
+  //(texture-remap-table-len int32                           :offset-assert  56)
+  //(texture-ids            (pointer texture-id)             :offset-assert  60)
+  //(texture-page-count     int32                            :offset-assert  64)
+  //(unk-zero-0             basic                            :offset-assert  68)
+  //(name                   symbol                           :offset-assert  72)
+  gen.link_word_to_symbol(name, 72 / 4);
+  //(nickname               symbol                           :offset-assert  76)
+  gen.link_word_to_symbol(nickname, 76 / 4);
+  //(vis-info               level-vis-info                8  :offset-assert  80)
+  //(actors                 drawable-inline-array-actor      :offset-assert 112)
+  //(cameras                (array entity-camera)            :offset-assert 116)
+  //(nodes                  (inline-array bsp-node)          :offset-assert 120)
+  //(level                  level                            :offset-assert 124)
+  //(current-leaf-idx       uint16                           :offset-assert 128)
+  //(unk-data-2             uint16                        9  :offset-assert 130)
+  //(boxes                  box8s-array                      :offset-assert 148)
+  //(current-bsp-back-flags uint32                           :offset-assert 152)
+  //(ambients               drawable-inline-array-ambient    :offset-assert 156)
+  //(unk-data-4             float                            :offset-assert 160)
+  //(unk-data-5             float                            :offset-assert 164)
+  //(adgifs                 adgif-shader-array               :offset-assert 168)
+  //(actor-birth-order      (pointer uint32)                 :offset-assert 172)
+  //(split-box-indices      (pointer uint16)                 :offset-assert 176)
+  //(unk-data-8             uint32                        55 :offset-assert 180)
+
+  return gen.generate_v2();
+}
\ No newline at end of file
diff --git a/goalc/build_level/LevelFile.h b/goalc/build_level/LevelFile.h
new file mode 100644
index 0000000000..29f53dd4c0
--- /dev/null
+++ b/goalc/build_level/LevelFile.h
@@ -0,0 +1,140 @@
+#pragma once
+
+#include <vector>
+#include <string>
+#include <array>
+
+#include "common/common_types.h"
+#include "goalc/build_level/FileInfo.h"
+#include "goalc/build_level/Tfrag.h"
+#include "goalc/build_level/collide_pack.h"
+#include "goalc/build_level/collide_common.h"
+#include "goalc/build_level/collide_bvh.h"
+#include "goalc/build_level/collide_drawable.h"
+
+struct VisibilityString {
+  std::vector<u8> bytes;
+};
+
+struct DrawableTreeInstanceTie {};
+
+struct DrawableTreeActor {};
+
+struct DrawableTreeAmbient {};
+
+struct DrawableTreeInstanceShrub {};
+
+struct DrawableTreeArray {
+  std::vector<DrawableTreeTfrag> tfrags;
+  std::vector<DrawableTreeInstanceTie> ties;
+  std::vector<DrawableTreeActor> actors;  // unused?
+  std::vector<DrawableTreeCollideFragment> collides;
+  std::vector<DrawableTreeAmbient> ambients;
+  std::vector<DrawableTreeInstanceShrub> shrubs;
+  size_t add_to_object_file(DataObjectGenerator& gen) const;
+};
+
+struct TextureRemap {};
+
+struct TextureId {};
+
+struct VisInfo {};
+
+struct DrawableActor {};
+
+struct DrawableInlineArrayActor {};
+
+struct EntityCamera {};
+
+struct BspNode {};
+
+struct Box8s {};
+
+struct DrawableAmbient {};
+
+struct DrawableInlineArrayAmbient {};
+
+struct AdgifShaderArray {};
+
+// This is a place to collect all the data that should go into the bsp-header file.
+struct LevelFile {
+  //  (info                   file-info                        :offset          4)
+  FileInfo info;
+
+  //  (all-visible-list       (pointer uint16)                 :offset-assert  32)
+  //  (visible-list-length    int32                            :offset-assert  36)
+  VisibilityString all_visibile_list;
+
+  //  (drawable-trees         drawable-tree-array              :offset-assert  40)
+  DrawableTreeArray drawable_trees;
+
+  //  (pat                    pointer                          :offset-assert  44)
+  //  (pat-length             int32                            :offset-assert  48)
+  std::vector<PatSurface> pat;
+
+  //  (texture-remap-table    (pointer uint64)                 :offset-assert  52)
+  //  (texture-remap-table-len int32                           :offset-assert  56)
+  std::vector<TextureRemap> texture_remap_table;
+
+  //  (texture-ids            (pointer texture-id)             :offset-assert  60)
+  //  (texture-page-count     int32                            :offset-assert  64)
+  std::vector<TextureId> texture_ids;
+
+  //  (unk-zero-0             basic                            :offset-assert  68)
+  //  "misc", seems like it can be zero and is unused.
+
+  //  (name                   symbol                           :offset-assert  72)
+  std::string name;  // full name
+
+  //  (nickname               symbol                           :offset-assert  76)
+  std::string nickname;  // 3 char name
+
+  //  (vis-info               level-vis-info                8  :offset-assert  80) ;; note: 0 when
+  std::array<VisInfo, 8> vis_infos;
+
+  //  (actors                 drawable-inline-array-actor      :offset-assert 112)
+  DrawableInlineArrayActor actors;
+
+  //  (cameras                (array entity-camera)            :offset-assert 116)
+  std::vector<EntityCamera> cameras;
+
+  //  (nodes                  (inline-array bsp-node)          :offset-assert 120)
+  std::vector<BspNode> nodes;
+
+  //  (level                  level                            :offset-assert 124)
+  // zero
+
+  //  (current-leaf-idx       uint16                           :offset-assert 128)
+  // zero
+
+  //  (unk-data-2             uint16                        9  :offset-assert 130)
+  // looks like padding plus 4 floats? unused
+
+  //  (boxes                  box8s-array                      :offset-assert 148)
+  std::vector<Box8s> boxes;
+
+  //  (current-bsp-back-flags uint32                           :offset-assert 152)
+  // zero
+
+  //  (ambients               drawable-inline-array-ambient    :offset-assert 156)
+  DrawableInlineArrayAmbient ambients;
+
+  //  (unk-data-4             float                            :offset-assert 160)
+  float close_subdiv = 0;
+
+  //  (unk-data-5             float                            :offset-assert 164)
+  float far_subdiv = 0;
+
+  //  (adgifs                 adgif-shader-array               :offset-assert 168)
+  AdgifShaderArray adgifs;
+
+  //  (actor-birth-order      (pointer uint32)                 :offset-assert 172)
+  std::vector<u32> actor_birth_order;
+
+  //  (split-box-indices      (pointer uint16)                 :offset-assert 176)
+  std::vector<u16> split_box_indices;
+
+  //  (unk-data-8             uint32                        55 :offset-assert 180)
+
+  std::vector<u8> save_object_file() const;
+};
\ No newline at end of file
diff --git a/goalc/build_level/ResLump.cpp b/goalc/build_level/ResLump.cpp
new file mode 100644
index 0000000000..f70a09ee2b
--- /dev/null
+++ b/goalc/build_level/ResLump.cpp
@@ -0,0 +1,4 @@
+#include "ResLump.h"
+
+#include "third-party/fmt/core.h"
+#include "goalc/data_compiler/DataObjectGenerator.h"
diff --git a/goalc/build_level/ResLump.h b/goalc/build_level/ResLump.h
new file mode 100644
index 0000000000..19b6ee6453
--- /dev/null
+++ b/goalc/build_level/ResLump.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <string>
+#include <vector>
+#include <memory>
+
+#include "common/common_types.h"
diff --git a/goalc/build_level/TexturePool.h b/goalc/build_level/TexturePool.h
new file mode 100644
index 0000000000..2cef160246
--- /dev/null
+++ b/goalc/build_level/TexturePool.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include <vector>
+#include <unordered_map>
+#include <string>
+
+#include "common/custom_data/Tfrag3Data.h"
+
+struct TexturePool {
+  std::unordered_map<std::string, int> textures_by_name;
+  std::vector<tfrag3::Texture> textures_by_idx;
+};
\ No newline at end of file
diff --git a/goalc/build_level/Tfrag.cpp b/goalc/build_level/Tfrag.cpp
new file mode 100644
index 0000000000..449cdbb22f
--- /dev/null
+++ b/goalc/build_level/Tfrag.cpp
@@ -0,0 +1,99 @@
+#include <iostream>
+
+#include "Tfrag.h"
+#include "common/custom_data/pack_helpers.h"
+#include "goalc/data_compiler/DataObjectGenerator.h"
+#include "goalc/build_level/gltf_mesh_extract.h"
+
+void tfrag_from_gltf(const gltf_mesh_extract::TfragOutput& mesh_extract_out,
+                     DrawableTreeTfrag& out,
+                     tfrag3::TfragTree& out_pc) {
+  out_pc.kind = tfrag3::TFragmentTreeKind::NORMAL;  // todo more types?
+  out_pc.draws = std::move(mesh_extract_out.strip_draws);
+  fmt::print("have {} draws\n", out_pc.draws.size());
+  pack_tfrag_vertices(&out_pc.packed_vertices, mesh_extract_out.vertices);
+  fmt::print("have {} vertices\n", out_pc.packed_vertices.vertices.size());
+
+  for (auto& col : mesh_extract_out.color_palette) {
+    tfrag3::TimeOfDayColor todc;
+    for (auto& rgba : todc.rgba) {
+      rgba = col;
+    }
+    out_pc.colors.push_back(todc);
+  }
+  out_pc.use_strips = false;
+}
+
+/*
+
+(deftype drawable-group (drawable)
+  ((length  int16       :offset 6)
+   (data    drawable 1  :offset-assert 32)
+   )
+  (:methods
+    (new (symbol type int) _type_)
+    )
+  :flag-assert #x1200000024
+  )
+
+ (deftype drawable-tree (drawable-group)
+  ()
+  :flag-assert #x1200000024
+  )
+
+(deftype drawable-inline-array (drawable)
+  ((length  int16          :offset 6) ;; this is kinda weird.
+   )
+  :method-count-assert 18
+  :size-assert         #x20
+  :flag-assert         #x1200000020
+  )
+
+(deftype drawable-inline-array-tfrag (drawable-inline-array)
+  ((data tfragment 1 :inline :offset-assert 32)
+   (pad uint32))
+  :method-count-assert 18
+  :size-assert         #x64
+  :flag-assert         #x1200000064
+  )
+
+(deftype drawable-tree-tfrag (drawable-tree)
+  ((time-of-day-pal time-of-day-palette :offset 12)
+   (arrays    drawable-inline-array 1  :offset 32 :score 100) ;; either drawable-inline-array-node
+or drawable-inline-array-tfrag
+   )
+  :method-count-assert #x12
+  :size-assert #x24
+  :flag-assert #x1200000024
+  )
+
+
+ */
+
+size_t add_empty_dia(const std::string& name, DataObjectGenerator& gen, int total_size) {
+  gen.align_to_basic();
+  gen.add_type_tag(name);
+  size_t result = gen.current_offset_bytes();
+  total_size -= 4;
+  while (total_size > 0) {
+    gen.add_word(0);
+    total_size -= 4;
+  }
+
+  return result;
+}
+
+size_t DrawableTreeTfrag::add_to_object_file(DataObjectGenerator& gen) const {
+  gen.align_to_basic();
+  gen.add_type_tag("drawable-tree-tfrag");
+  size_t result = gen.current_offset_bytes();
+  gen.add_word(1 << 16);
+  for (int i = 0; i < 6; i++) {
+    gen.add_word(0);
+  }
+  size_t slot = gen.add_word(0);
+  ASSERT(slot * 4 - result == 28);
+  gen.link_word_to_byte(slot, add_empty_dia("drawable-inline-array-tfrag", gen, 0x64));
+
+  return result;
+}
\ No newline at end of file
diff --git a/goalc/build_level/Tfrag.h b/goalc/build_level/Tfrag.h
new file mode 100644
index 0000000000..f9c137a473
--- /dev/null
+++ b/goalc/build_level/Tfrag.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <string>
+
+#include "common/custom_data/Tfrag3Data.h"
+#include "goalc/build_level/TexturePool.h"
+#include "goalc/build_level/gltf_mesh_extract.h"
+
+class DataObjectGenerator;
+
+struct DrawableTreeTfrag {
+  size_t add_to_object_file(DataObjectGenerator& gen) const;
+};
+
+void tfrag_from_gltf(const gltf_mesh_extract::TfragOutput& mesh_extract_out,
+                     DrawableTreeTfrag& out,
+                     tfrag3::TfragTree& out_pc);
\ No newline at end of file
diff --git a/goalc/build_level/build_level.cpp b/goalc/build_level/build_level.cpp
new file mode 100644
index 0000000000..9edb3586ef
--- /dev/null
+++ b/goalc/build_level/build_level.cpp
@@ -0,0 +1,102 @@
+#include "third-party/fmt/core.h"
+#include "common/util/json_util.h"
+#include "common/util/FileUtil.h"
+#include "common/log/log.h"
+#include "goalc/build_level/LevelFile.h"
+#include "goalc/build_level/FileInfo.h"
+#include "goalc/build_level/Tfrag.h"
+#include "goalc/build_level/gltf_mesh_extract.h"
+#include "goalc/build_level/collide_bvh.h"
+#include "goalc/build_level/collide_pack.h"
+
+#include "common/custom_data/Tfrag3Data.h"
+#include "common/util/compress.h"
+
+void save_pc_data(const std::string& nickname, tfrag3::Level& data) {
+  Serializer ser;
+  data.serialize(ser);
+  auto compressed =
+      compression::compress_zstd(ser.get_save_result().first, ser.get_save_result().second);
+  fmt::print("stats for {}\n", data.level_name);
+  print_memory_usage(data, ser.get_save_result().second);
+  fmt::print("compressed: {} -> {} ({:.2f}%)\n", ser.get_save_result().second, compressed.size(),
+             100.f * compressed.size() / ser.get_save_result().second);
+  file_util::write_binary_file(file_util::get_file_path({fmt::format("assets/{}.fr3", nickname)}),
+                               compressed.data(), compressed.size());
+}
+
+std::vector<std::string> get_build_level_deps(const std::string& input_file) {
+  auto level_json = parse_commented_json(
+      file_util::read_text_file(file_util::get_file_path({input_file})), input_file);
+  return {level_json.at("gltf_file").get<std::string>()};
+}
+
+bool run_build_level(const std::string& input_file, const std::string& output_file) {
+  auto level_json = parse_commented_json(
+      file_util::read_text_file(file_util::get_file_path({input_file})), input_file);
+  LevelFile file;          // GOAL level file
+  tfrag3::Level pc_level;  // PC level file
+  TexturePool tex_pool;    // pc level texture pool
+
+  // process input mesh from blender
+  gltf_mesh_extract::Input mesh_extract_in;
+  mesh_extract_in.filename =
+      file_util::get_file_path({level_json.at("gltf_file").get<std::string>()});
+  mesh_extract_in.tex_pool = &tex_pool;
+  gltf_mesh_extract::Output mesh_extract_out;
+  gltf_mesh_extract::extract(mesh_extract_in, mesh_extract_out);
+
+  // add stuff to the GOAL level structure
+  file.info = make_file_info_for_level(std::filesystem::path(input_file).filename().string());
+  // all vis
+  // drawable trees
+  // pat
+  // texture remap
+  // texture ids
+  // unk zero
+  // name
+  file.name = level_json.at("long_name").get<std::string>();
+  // nick
+  file.nickname = level_json.at("nickname").get<std::string>();
+  // vis infos
+  // actors
+  // cameras
+  // nodes
+  // boxes
+  // ambients
+  // subdivs
+  // adgifs
+  // actor birht
+  // split box
+
+  // add stuff to the PC level structure
+  pc_level.level_name = file.name;
+
+  // TFRAG
+  auto& tfrag_drawable_tree = file.drawable_trees.tfrags.emplace_back();
+  tfrag_from_gltf(mesh_extract_out.tfrag, tfrag_drawable_tree,
+                  pc_level.tfrag_trees[0].emplace_back());
+  pc_level.textures = std::move(tex_pool.textures_by_idx);
+
+  // COLLIDE
+  if (mesh_extract_out.collide.faces.empty()) {
+    lg::error("No collision geometry was found");
+  } else {
+    auto& collide_drawable_tree = file.drawable_trees.collides.emplace_back();
+    collide_drawable_tree.bvh = collide::construct_collide_bvh(mesh_extract_out.collide.faces);
+    collide_drawable_tree.packed_frags = pack_collide_frags(collide_drawable_tree.bvh.frags.frags);
+  }
+
+  // Save the GOAL level
+  auto result = file.save_object_file();
+  fmt::print("Level bsp file size {} bytes\n", result.size());
+  auto save_path = file_util::get_file_path({output_file});
+  file_util::create_dir_if_needed_for_file(save_path);
+  fmt::print("Saving to {}\n", save_path);
+  file_util::write_binary_file(save_path, result.data(), result.size());
+
+  // Save the PC level
+  save_pc_data(file.nickname, pc_level);
+
+  return true;
+}
diff --git a/goalc/build_level/build_level.h b/goalc/build_level/build_level.h
new file mode 100644
index 0000000000..afe685daac
--- /dev/null
+++ b/goalc/build_level/build_level.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <vector>
+#include <string>
+
+bool run_build_level(const std::string& input_file, const std::string& output_file);
+std::vector<std::string> get_build_level_deps(const std::string& input_file);
\ No newline at end of file
diff --git a/goalc/build_level/collide_bvh.cpp b/goalc/build_level/collide_bvh.cpp
new file mode 100644
index 0000000000..8d410b2a3c
--- /dev/null
+++ b/goalc/build_level/collide_bvh.cpp
@@ -0,0 +1,284 @@
+#include <algorithm>
+
+#include "collide_bvh.h"
+#include "common/util/Assert.h"
+#include "common/log/log.h"
+#include "common/util/Timer.h"
+
+// Collision BVH algorithm
+// We start with all the points in a single node, then recursively split nodes in 8 until no nodes
+// have too many faces.
+// The splitting is done by doing median cuts along the x, y, or z axis.
+
+// The bspheres are built at the end.
+
+namespace collide {
+
+namespace {
+
+constexpr int MAX_FACES_IN_FRAG = 100;
+
+/*!
+ * The Collide node.
+ * Has either children collide node or children faces, but not both
+ * The size of child_nodes is either 0 or 8 at all times.
+ */
+struct CNode {
+  std::vector<CNode> child_nodes;
+  std::vector<CollideFace> faces;
+  math::Vector4f bsphere;
+};
+
+struct BBox {
+  math::Vector3f mins, maxs;
+  std::string sz_to_string() const {
+    return fmt::format("({})", ((maxs - mins) / 4096.f).to_string_aligned());
+  }
+};
+
+/*!
+ * Make the bounding box hold this node and all its children.
+ */
+void add_to_bbox_recursive(const CNode& node, BBox& bbox) {
+  if (node.faces.empty()) {
+    ASSERT(node.child_nodes.size() == 8);
+    for (auto& child : node.child_nodes) {
+      add_to_bbox_recursive(child, bbox);
+    }
+  } else {
+    for (auto& face : node.faces) {
+      for (auto& vert : face.v) {
+        bbox.mins.min_in_place(vert);
+        bbox.maxs.max_in_place(vert);
+      }
+    }
+  }
+}
+
+BBox bbox_of_node(const CNode& node) {
+  BBox bbox;
+  bbox.mins.fill(std::numeric_limits<float>::max());
+  bbox.maxs.fill(-std::numeric_limits<float>::max());
+  add_to_bbox_recursive(node, bbox);
+  return bbox;
+}
+
+/*!
+ * Make the bsphere hold this node and all its children.
+ */
+void update_bsphere_recursive(const CNode& node, const math::Vector3f& origin, float& r_squared) {
+  if (node.faces.empty()) {
+    ASSERT(node.child_nodes.size() == 8);
+    for (auto& child : node.child_nodes) {
+      update_bsphere_recursive(child, origin, r_squared);
+    }
+  } else {
+    for (auto& face : node.faces) {
+      for (auto& vert : face.v) {
+        r_squared = std::max(r_squared, (vert - origin).squared_length());
+      }
+    }
+  }
+}
+
+/*!
+ * Compute the bsphere of a single node.
+ */
+void compute_my_bsphere(CNode& node) {
+  // first compute bbox.
+  BBox bbox = bbox_of_node(node);
+  float r = 0;
+  math::Vector3f origin = (bbox.maxs + bbox.mins) * 0.5;
+  update_bsphere_recursive(node, origin, r);
+  node.bsphere.x() = origin.x();
+  node.bsphere.y() = origin.y();
+  node.bsphere.z() = origin.z();
+  node.bsphere.w() = std::sqrt(r);
+}
+
+/*!
+ * Split faces in two along a coordinate plane.
+ * Will clear the input faces
+ */
+void split_along_dim(std::vector<CollideFace>& faces,
+                     int dim,
+                     std::vector<CollideFace>* out0,
+                     std::vector<CollideFace>* out1) {
+  std::sort(faces.begin(), faces.end(), [=](const CollideFace& a, const CollideFace& b) {
+    return a.bsphere[dim] < b.bsphere[dim];
+  });
+  size_t split_idx = faces.size() / 2;
+  out0->insert(out0->end(), faces.begin(), faces.begin() + split_idx);
+  out1->insert(out1->end(), faces.begin() + split_idx, faces.end());
+}
+
+/*!
+ * Split a node into two nodes. The outputs should be uninitialized nodes
+ */
+void split_node_once(CNode& node, CNode* out0, CNode* out1) {
+  CNode temps[6];
+  // split_along_dim(node.faces, pick_dim_for_split(node.faces), &out0->faces, &out1->faces);
+  split_along_dim(node.faces, 0, &temps[0].faces, &temps[1].faces);
+  split_along_dim(node.faces, 1, &temps[2].faces, &temps[3].faces);
+  split_along_dim(node.faces, 2, &temps[4].faces, &temps[5].faces);
+  node.faces.clear();
+  for (auto& t : temps) {
+    compute_my_bsphere(t);
+  }
+
+  float max_bspheres[3] = {0, 0, 0};
+  for (int i = 0; i < 3; i++) {
+    max_bspheres[i] = std::max(temps[i * 2].bsphere.w(), temps[i * 2 + 1].bsphere.w());
+  }
+
+  int best_dim = 0;
+  float best_w = max_bspheres[0];
+  for (int i = 0; i < 3; i++) {
+    if (max_bspheres[i] < best_w) {
+      best_dim = i;
+      best_w = max_bspheres[i];
+    }
+  }
+
+  *out0 = temps[best_dim * 2];
+  *out1 = temps[best_dim * 2 + 1];
+}
+
+/*!
+ * Split a node into 8 children and store these in the given node.
+ */
+void split_node_to_8_children(CNode& node) {
+  ASSERT(node.child_nodes.empty());
+  node.child_nodes.resize(8);
+  // level 0
+  CNode level0[2];
+  split_node_once(node, &level0[0], &level0[1]);
+  // level 1
+  CNode level1[4];
+  split_node_once(level0[0], &level1[0], &level1[1]);
+  split_node_once(level0[1], &level1[2], &level1[3]);
+  // level 2
+  split_node_once(level1[0], &node.child_nodes[0], &node.child_nodes[1]);
+  split_node_once(level1[1], &node.child_nodes[2], &node.child_nodes[3]);
+  split_node_once(level1[2], &node.child_nodes[4], &node.child_nodes[5]);
+  split_node_once(level1[3], &node.child_nodes[6], &node.child_nodes[7]);
+}
+
+/*!
+ * Split all leaf nodes. Returns the number of faces in the leaf with the most faces after
+ * splitting.
+ * This slightly unusual recursion pattern is to make sure we split everything to same depth,
+ * which we believe might be a requirement of the collision system.
+ */
+size_t split_all_leaves(CNode& node) {
+  size_t worst_leaf_face_count = 0;
+  if (node.child_nodes.empty()) {
+    // we're a leaf!
+    // split us:
+    split_node_to_8_children(node);
+    for (auto& child : node.child_nodes) {
+      worst_leaf_face_count = std::max(worst_leaf_face_count, child.faces.size());
+    }
+    return worst_leaf_face_count;
+  } else {
+    // not a leaf, recurse
+    for (auto& child : node.child_nodes) {
+      worst_leaf_face_count = std::max(worst_leaf_face_count, split_all_leaves(child));
+    }
+    return worst_leaf_face_count;
+  }
+}
+
+/*!
+ * Main BVH construction function. Splits leaves until it is no longer needed.
+ */
+void split_as_needed(CNode& root) {
+  int initial_tri_count = root.faces.size();
+  int num_leaves = 1;
+  bool need_to_split = true;
+  while (need_to_split) {
+    int faces_in_worst = split_all_leaves(root);
+    num_leaves *= 8;
+    lg::info("after splitting, the worst leaf has {} tris", faces_in_worst);
+    if (faces_in_worst < MAX_FACES_IN_FRAG) {
+      need_to_split = false;
+    }
+  }
+  lg::info("average triangles per leaf: {}", initial_tri_count / num_leaves);
+  lg::info("leaf count: {}", num_leaves);
+}
+
+/*!
+ * Recursively compute bspheres of all children
+ * (note that we don't do bspheres of bspheres... I think this is better?)
+ */
+void bsphere_recursive(CNode& node) {
+  compute_my_bsphere(node);
+  for (auto& child : node.child_nodes) {
+    bsphere_recursive(child);
+  }
+}
+
+void drawable_layout_helper(CNode& node, int depth, CollideTree& tree_out, size_t my_idx_check) {
+  if (node.child_nodes.empty()) {
+    // we're a leaf! add us to the frags
+    auto& frag = tree_out.frags.frags.emplace_back();
+    frag.bsphere = node.bsphere;
+    frag.faces = node.faces;
+  } else {
+    // not a leaf
+    if ((int)tree_out.node_arrays.size() <= depth) {
+      tree_out.node_arrays.resize(depth + 1);
+    }
+    ASSERT(my_idx_check == tree_out.node_arrays.at(depth).nodes.size());
+    auto& draw_node = tree_out.node_arrays[depth].nodes.emplace_back();
+    draw_node.bsphere = node.bsphere;
+    for (int i = 0; i < 8; i++) {
+      draw_node.children[i] = my_idx_check * 8 + i;
+      drawable_layout_helper(node.child_nodes.at(i), depth + 1, tree_out, draw_node.children[i]);
+    }
+  }
+}
+
+CollideTree build_collide_tree(CNode& root) {
+  CollideTree tree;
+  drawable_layout_helper(root, 0, tree, 0);
+  return tree;
+}
+
+void debug_stats(const CollideTree& tree) {
+  lg::info("Tree build: {} draw node layers", tree.node_arrays.size());
+  float sum_w = 0, max_w = 0;
+  for (auto& frag : tree.frags.frags) {
+    sum_w += frag.bsphere.w();
+    max_w = std::max(frag.bsphere.w(), max_w);
+  }
+  lg::info("Max bsphere radius: {:.2f}m, average {:.2f} (aiming for around 20-30m avg)",
+           max_w / 4096, sum_w / (4096 * tree.frags.frags.size()));
+}
+
+}  // namespace
+
+CollideTree construct_collide_bvh(const std::vector<CollideFace>& tris) {
+  // part 1: build the tree
+  Timer bvh_timer;
+  lg::info("Building collide bvh from {} triangles", tris.size());
+  CNode root;
+  root.faces = tris;
+  split_as_needed(root);
+  lg::info("BVH tree constructed in {:.2f} ms", bvh_timer.getMs());
+
+  // part 2: compute bspheres
+  bvh_timer.start();
+  bsphere_recursive(root);
+  lg::info("Found bspheres in {:.2f} ms", bvh_timer.getMs());
+
+  // part 3: layout tree
+  bvh_timer.start();
+  auto tree = build_collide_tree(root);
+  debug_stats(tree);
+  lg::info("Tree layout done in {:.2f} ms", bvh_timer.getMs());
+  return tree;
+}
+
+}  // namespace collide
\ No newline at end of file
diff --git a/goalc/build_level/collide_bvh.h b/goalc/build_level/collide_bvh.h
new file mode 100644
index 0000000000..11b4f4bdaa
--- /dev/null
+++ b/goalc/build_level/collide_bvh.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <vector>
+#include "goalc/build_level/collide_common.h"
+
+// requirements:
+// max depth of 3 (maybe?)
+// max face per frag = 90
+// max vert per frag = 110
+// branching factor of 8 everywhere.
+namespace collide {
+
+struct DrawNode {
+  s32 children[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+  math::Vector4f bsphere;
+};
+
+struct CollideFrag {
+  math::Vector4f bsphere;
+  std::vector<CollideFace> faces;
+};
+
+struct DrawableInlineArrayNode {
+  std::vector<DrawNode> nodes;
+};
+
+struct DrawableInlineArrayCollideFrag {
+  std::vector<CollideFrag> frags;
+};
+
+struct CollideTree {
+  std::vector<DrawableInlineArrayNode> node_arrays;
+  DrawableInlineArrayCollideFrag frags;
+};
+
+CollideTree construct_collide_bvh(const std::vector<CollideFace>& tris);
+}  // namespace collide
diff --git a/goalc/build_level/collide_common.h b/goalc/build_level/collide_common.h
new file mode 100644
index 0000000000..3ef5146167
--- /dev/null
+++ b/goalc/build_level/collide_common.h
@@ -0,0 +1,110 @@
+#pragma once
+#include "common/common_types.h"
+#include "common/math/Vector.h"
+
+struct PatSurface {
+  enum class Mode { GROUND = 0, WALL = 1, OBSTACLE = 2 };
+  enum class Material {
+    STONE = 0,
+    ICE = 1,
+    QUICKSAND = 2,
+    WATERBOTTOM = 3,
+    TAR = 4,
+    SAND = 5,
+    WOOD = 6,
+    GRASS = 7,
+    PCMETAL = 8,
+    SNOW = 9,
+    DEEPSNOW = 10,
+    HOTCOALS = 11,
+    LAVA = 12,
+    CRWOOD = 13,
+    GRAVEL = 14,
+    DIRT = 15,
+    METAL = 16,
+    STRAW = 17,
+    TUBE = 18,
+    SWAMP = 19,
+    STOPPROJ = 20,
+    ROTATE = 21,
+    NEUTRAL = 22,
+  };
+
+  enum class Event {
+    NONE = 0,
+    DEADLY = 1,
+    ENDLESSFALL = 2,
+    BURN = 3,
+    DEADLYUP = 4,
+    BURNUP = 5,
+    MELT = 6,
+  };
+
+  void set_noentity(bool x) {
+    if (x) {
+      val |= (1 << 0);
+    } else {
+      val &= ~(1 << 0);
+    }
+  }
+  bool get_noentity() const { return val & (1 << 0); }
+
+  void set_nocamera(bool x) {
+    if (x) {
+      val |= (1 << 1);
+    } else {
+      val &= ~(1 << 1);
+    }
+  }
+  bool get_nocamera() const { return val & (1 << 1); }
+
+  void set_noedge(bool x) {
+    if (x) {
+      val |= (1 << 2);
+    } else {
+      val &= ~(1 << 2);
+    }
+  }
+  bool get_noedge() const { return val & (1 << 2); }
+
+  void set_mode(Mode mode) {
+    val &= ~(0b111 << 3);
+    val |= ((u32)mode << 3);
+  }
+  Mode get_mode() const { return (Mode)(0b111 & (val >> 3)); }
+
+  void set_material(Material mat) {
+    val &= ~(0b111111 << 6);
+    val |= ((u32)mat << 6);
+  }
+  Material get_material() const { return (Material)(0b111111 & (val >> 6)); }
+
+  void set_nolineofsight(bool x) {
+    if (x) {
+      val |= (1 << 12);
+    } else {
+      val &= ~(1 << 12);
+    }
+  }
+  bool get_nolineofsight() const { return val & (1 << 12); }
+
+  void set_event(Event ev) {
+    val &= ~(0b111111 << 14);
+    val |= ((u32)ev << 6);
+  }
+  Event get_event() const { return (Event)(0b111111 & (val >> 14)); }
+
+  bool operator==(const PatSurface& other) const { return val == other.val; }
+  // bits 13, [15-31] are unused, or have unknown purpose.
+  u32 val = 0;
+};
+
+struct CollideVertex {
+  float x, y, z;
+};
+
+struct CollideFace {
+  math::Vector4f bsphere;
+  math::Vector3f v[3];
+  PatSurface pat;
+};
diff --git a/goalc/build_level/collide_drawable.cpp b/goalc/build_level/collide_drawable.cpp
new file mode 100644
index 0000000000..4198eadfbd
--- /dev/null
+++ b/goalc/build_level/collide_drawable.cpp
@@ -0,0 +1,268 @@
+#include "collide_drawable.h"
+#include "goalc/data_compiler/DataObjectGenerator.h"
+#include "common/util/Assert.h"
+
+/*
+(deftype drawable (basic)
+  ((id      int16          :offset-assert 4)
+   (bsphere vector :inline :offset-assert 16)
+   )
+
+(deftype drawable-group (drawable)
+  ((length  int16       :offset 6)
+   (data    drawable 1  :offset-assert 32)
+   )
+
+(deftype drawable-tree (drawable-group)
+  ()
+
+(deftype drawable-inline-array (drawable)
+  ((length  int16          :offset 6) ;; this is kinda weird.
+   )
+
+(deftype drawable-tree-collide-fragment (drawable-tree)
+  ((data-override drawable-inline-array 1 :offset 32)) ;; should be 1 there
+
+(deftype drawable-inline-array-collide-fragment (drawable-inline-array)
+  ((data    collide-fragment 1 :inline      :offset-assert 32)
+
+ (deftype collide-fragment (drawable)
+  ((mesh    collide-frag-mesh          :offset 8)
+   )
+  :method-count-assert 18
+  :size-assert         #x20
+
+(deftype collide-frag-mesh (basic)
+  ((packed-data     uint32         :offset-assert 4)
+   (pat-array       uint32         :offset-assert 8)
+   (strip-data-len  uint16         :offset-assert 12)
+   (poly-count      uint16         :offset-assert 14)
+   (base-trans      vector :inline :offset-assert 16)
+   ;; these go in the w of the vector above.
+   (vertex-count    uint8          :offset 28)
+   (vertex-data-qwc uint8          :offset 29)
+   (total-qwc       uint8          :offset 30)
+   (unused          uint8          :offset 31)
+   )
+  :method-count-assert 9
+  :size-assert         #x20
+
+(deftype draw-node (drawable)
+  ((child-count uint8          :offset 6)
+   (flags       uint8          :offset 7)
+   (child       drawable        :offset 8)
+   (distance    float          :offset 12)
+   )
+   :size-assert         #x20
+
+ (deftype drawable-inline-array-node (drawable-inline-array)
+  ((data draw-node 1 :inline)
+   (pad uint32)
+   )
+  :method-count-assert 18
+  :size-assert         #x44
+ */
+
+size_t generate_pat_array(DataObjectGenerator& gen, const std::vector<PatSurface>& pats) {
+  gen.align_to_basic();
+  size_t result = gen.current_offset_bytes();
+  for (auto& pat : pats) {
+    gen.add_word(pat.val);
+  }
+  return result;
+}
+
+size_t generate_packed_collide_data(DataObjectGenerator& gen, const std::vector<u8>& data) {
+  gen.align_to_basic();
+  size_t result = gen.current_offset_bytes();
+  ASSERT((data.size() % 4) == 0);
+  for (size_t i = 0; i < data.size(); i += 4) {
+    u32 word;
+    memcpy(&word, data.data() + i, 4);
+    gen.add_word(word);
+  }
+  return result;
+}
+
+size_t generate_collide_frag_mesh(DataObjectGenerator& gen,
+                                  const CollideFragMeshData& mesh,
+                                  size_t packed_data_loc,
+                                  size_t pat_array_loc) {
+  gen.align_to_basic();
+  gen.add_type_tag("collide-frag-mesh");  // 0
+  size_t result = gen.current_offset_bytes();
+  gen.link_word_to_byte(gen.add_word(0), packed_data_loc);      // 4
+  gen.link_word_to_byte(gen.add_word(0), pat_array_loc);        // 8
+  gen.add_word(mesh.strip_data_len | (mesh.poly_count << 16));  // 12
+  gen.add_word(mesh.base_trans_xyz_s32.x());                    // 16
+  gen.add_word(mesh.base_trans_xyz_s32.y());                    // 20
+  gen.add_word(mesh.base_trans_xyz_s32.z());                    // 24
+  u32 packed = 0;
+  packed |= mesh.vertex_count;
+  packed |= ((u32)mesh.vertex_data_qwc) << 8;
+  packed |= ((u32)mesh.total_qwc) << 16;
+  gen.add_word(packed);  // 28
+  return result;
+}
+
+size_t generate_collide_fragment(DataObjectGenerator& gen,
+                                 const CollideFragMeshData& mesh,
+                                 size_t frag_mesh_loc) {
+  /*
+    .type collide-fragment
+    .word 0x10000
+    .word L705
+    .word 0x0
+    .word 0x46bf480a
+    .word 0x43dc730b
+    .word 0xb71ed4fe
+    .word 0x46c42e44
+   */
+  gen.align_to_basic();
+  gen.add_type_tag("collide-fragment");
+  size_t result = gen.current_offset_bytes();
+  gen.add_word(0x10000);  // ???
+  gen.link_word_to_byte(gen.add_word(0), frag_mesh_loc);
+  gen.add_word(0);
+  for (int i = 0; i < 4; i++) {
+    gen.add_word_float(mesh.bsphere[i]);
+  }
+
+  return result;
+}
+
+size_t generate_collide_fragment_array(DataObjectGenerator& gen,
+                                       const std::vector<CollideFragMeshData>& meshes,
+                                       const std::vector<size_t>& frag_mesh_locs,
+                                       std::vector<size_t>& parent_ref_out) {
+  gen.align_to_basic();
+  gen.add_type_tag("drawable-inline-array-collide-fragment");  // 0
+  size_t result = gen.current_offset_bytes();
+  ASSERT(meshes.size() < UINT16_MAX);
+  gen.add_word(meshes.size() << 16);  // 4, 6
+  gen.add_word(0);                    // 8
+  gen.add_word(0);                    // 12
+  gen.add_word(0);                    // 16
+  gen.add_word(0);                    // 20
+  gen.add_word(0);                    // 24
+  gen.add_word(0);                    // 28
+
+  fmt::print("have: {}\n", meshes.size());
+
+  ASSERT(meshes.size() == frag_mesh_locs.size());
+  for (size_t i = 0; i < meshes.size(); i++) {
+    auto& mesh = meshes[i];
+    // should be 8 words here:
+    gen.add_type_tag("collide-fragment");  // 1
+    size_t me = gen.current_offset_bytes();
+    gen.add_word(0x10000);  // ???
+    gen.link_word_to_byte(gen.add_word(0), frag_mesh_locs[i]);
+    gen.add_word(0);
+    for (int j = 0; j < 4; j++) {
+      gen.add_word_float(mesh.bsphere[j]);
+    }
+    if ((i % 8) == 0) {
+      parent_ref_out.push_back(me);
+    }
+  }
+
+  return result;
+}
+
+size_t generate_collide_draw_node_array(DataObjectGenerator& gen,
+                                        const std::vector<collide::DrawNode>& nodes,
+                                        u32 flag,
+                                        const std::vector<size_t>& children,
+                                        std::vector<size_t>& parent_ref_out) {
+  gen.align_to_basic();
+  gen.add_type_tag("drawable-inline-array-node");  // 0
+  size_t result = gen.current_offset_bytes();
+  gen.add_word(nodes.size() << 16);  // 4, 6
+  gen.add_word(0);                   // 8
+  gen.add_word(0);                   // 12
+  gen.add_word(0);                   // 16
+  gen.add_word(0);                   // 20
+  gen.add_word(0);                   // 24
+  gen.add_word(0);                   // 28
+
+  ASSERT(nodes.size() == children.size());
+  for (size_t i = 0; i < nodes.size(); i++) {
+    auto& node = nodes[i];
+    // should be 8 words here:
+    gen.add_type_tag("draw-node");  // 1
+    size_t me = gen.current_offset_bytes();
+    u32 packed_flags = 0;
+    packed_flags |= (8 << 16);  // TODO hard-coded size here
+    packed_flags |= (flag << 24);
+    gen.add_word(packed_flags);                           // 2
+    gen.link_word_to_byte(gen.add_word(0), children[i]);  // 3
+    gen.add_word(0);                                      // 4
+    if ((i % 8) == 0) {
+      parent_ref_out.push_back(me);
+    }
+    gen.add_word_float(node.bsphere.x());  // 5
+    gen.add_word_float(node.bsphere.y());  // 6
+    gen.add_word_float(node.bsphere.z());  // 7
+    gen.add_word_float(node.bsphere.w());  // 8
+  }
+
+  return result;
+}
+
+size_t DrawableTreeCollideFragment::add_to_object_file(DataObjectGenerator& gen) const {
+  for (auto& lev : bvh.node_arrays) {
+    fmt::print("lev: {}\n", lev.nodes.size());
+  }
+  // generate pat array
+  size_t pat_array_loc = generate_pat_array(gen, packed_frags.pats);
+
+  // generated packed data
+  std::vector<size_t> packed_data_locs;
+  for (auto& mesh : packed_frags.packed_frag_data) {
+    packed_data_locs.push_back(generate_packed_collide_data(gen, mesh.packed_data));
+  }
+
+  // generate collide frag meshes
+  std::vector<size_t> collide_frag_meshes;
+  for (size_t i = 0; i < packed_data_locs.size(); i++) {
+    collide_frag_meshes.push_back(generate_collide_frag_mesh(gen, packed_frags.packed_frag_data[i],
+                                                             packed_data_locs[i], pat_array_loc));
+  }
+
+  std::vector<size_t> array_locs;
+  array_locs.resize(bvh.node_arrays.size() + 1);  // plus one for the frags.
+  int array_slot = bvh.node_arrays.size();
+
+  std::vector<size_t> children_refs;
+  array_locs[array_slot--] = generate_collide_fragment_array(gen, packed_frags.packed_frag_data,
+                                                             collide_frag_meshes, children_refs);
+  u32 flag = 0;
+  while (array_slot >= 0) {
+    fmt::print("sizes: {} {}\n", children_refs.size(), bvh.node_arrays.at(array_slot).nodes.size());
+    ASSERT(children_refs.size() == bvh.node_arrays.at(array_slot).nodes.size());
+    std::vector<size_t> next_children;
+
+    array_locs[array_slot] = generate_collide_draw_node_array(
+        gen, bvh.node_arrays.at(array_slot).nodes, flag, children_refs, next_children);
+
+    children_refs = std::move(next_children);
+    array_slot--;
+    flag = 1;
+  }
+
+  {
+    gen.align_to_basic();
+    gen.add_type_tag("drawable-tree-collide-fragment");
+    size_t result = gen.current_offset_bytes();
+    gen.add_word((array_locs.size() - 1) << 16);  // todo the minus one here??
+    for (int i = 0; i < 6; i++) {
+      gen.add_word(0);
+    }
+
+    for (size_t i = 1; i < array_locs.size(); i++) {  // todo the offset here?
+      gen.link_word_to_byte(gen.add_word(0), array_locs[i]);
+    }
+
+    return result;
+  }
+}
diff --git a/goalc/build_level/collide_drawable.h b/goalc/build_level/collide_drawable.h
new file mode 100644
index 0000000000..5804dbb5c1
--- /dev/null
+++ b/goalc/build_level/collide_drawable.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include "goalc/build_level/collide_bvh.h"
+#include "goalc/build_level/collide_pack.h"
+
+class DataObjectGenerator;
+
+struct DrawableTreeCollideFragment {
+  CollideFragMeshDataArray packed_frags;
+  collide::CollideTree bvh;
+  size_t add_to_object_file(DataObjectGenerator& gen) const;
+};
diff --git a/goalc/build_level/collide_pack.cpp b/goalc/build_level/collide_pack.cpp
new file mode 100644
index 0000000000..595ad6c89c
--- /dev/null
+++ b/goalc/build_level/collide_pack.cpp
@@ -0,0 +1,263 @@
+#include <functional>
+
+#include "collide_pack.h"
+#include "common/util/Assert.h"
+#include "common/log/log.h"
+#include "common/util/Timer.h"
+
+struct PackedU16Verts {
+  math::Vector<s32, 3> base;
+  std::vector<math::Vector<u16, 3>> vertex;
+};
+
+/*!
+ * Assert that the given float can be converted to a packed u16, and return that u16.
+ */
+u16 magic_float_to_u16(float in) {
+  u16 u16s[2];
+  memcpy(u16s, &in, 4);
+  ASSERT_MSG(u16s[1] == 0x4d00,
+             "Unable to pack collision vertex data to u16's. Try to make smaller triangles and "
+             "avoid skinny triangles. (or there is a bug in the packer)");
+  return u16s[0];
+}
+
+namespace {
+float u32_to_float(u32 in) {
+  float out;
+  memcpy(&out, &in, 4);
+  return out;
+}
+}  // namespace
+
+/*!
+ * Pack vertices to base + u16.
+ * The format is quite strange to allow for fast unpacking.
+ */
+PackedU16Verts pack_verts_to_u16(const std::vector<math::Vector3f>& input) {
+  PackedU16Verts result;
+  ASSERT(!input.empty());
+
+  // this "magic" offset is a large float where a ulp is 16.f, or 1/256th of a meter.
+  // this means that float -> int can be done as a single addition.
+  // (or, in some cases, we can avoid float->int entirely)
+  math::Vector3f magic_offset;
+  magic_offset.fill(u32_to_float(0x4d000000));
+
+  // we'll be treating everything as an offset from this minimum vertex:
+  math::Vector3f min_vtx = input[0];
+  for (auto& vtx : input) {
+    min_vtx.min_in_place(vtx);
+  }
+  //  give us a tiny bit of extra room to avoid rounding problems
+  min_vtx -= 16.f;
+
+  // to round down to nearest integer
+  result.base = min_vtx.cast<s32>();
+  auto base = result.base.cast<float>();
+
+  // compute offset relative to base.
+  for (auto& vtx : input) {
+    // add the "magic offset" to make this a 0x4dXXXXXX float.
+    // subtract the base to make this a 0x4d00XXXX float.
+    auto vertex_magic = vtx + magic_offset - base;
+    // and if we did it right, we should be able to pack to u16's here
+    result.vertex.emplace_back(magic_float_to_u16(vertex_magic[0]),
+                               magic_float_to_u16(vertex_magic[1]),
+                               magic_float_to_u16(vertex_magic[2]));
+  }
+
+  // verify
+  /*
+  for (size_t i = 0; i < input.size(); i++) {
+    math::Vector3f v;
+    v[0] = u32_to_float(0x4d000000 + result.vertex[i][0]);
+    v[1] = u32_to_float(0x4d000000 + result.vertex[i][1]);
+    v[2] = u32_to_float(0x4d000000 + result.vertex[i][2]);
+    float base_offset = u32_to_float(0x4d000000);
+    math::Vector3f vf13_combo_offset(base_offset, base_offset, base_offset);
+    math::Vector3f vf14_base_trans_float(result.base[0], result.base[1], result.base[2]);
+    vf13_combo_offset -= vf14_base_trans_float;
+    v -= vf13_combo_offset;
+    fmt::print("error {}\n", (v - input[i]).to_string_aligned());;
+  }
+   */
+
+  return result;
+}
+
+struct PatSurfaceHash {
+  size_t operator()(const PatSurface& in) const { return std::hash<u32>()(in.val); }
+};
+
+/*!
+ * pat -> pat index mapping.
+ * There's a pat "palette" with up 255 unique pats.
+ */
+struct PatMap {
+  std::unordered_map<PatSurface, u32, PatSurfaceHash> map;
+  std::vector<PatSurface> pats;
+
+  u32 add_pat(PatSurface pat) {
+    const auto& lookup = map.find(pat);
+    if (lookup == map.end()) {
+      u32 new_idx = pats.size();
+      if (new_idx > UINT8_MAX) {
+        lg::die("Too many pats. Use fewer. Or improve the pat code to use multiple pat arrays.");
+      }
+      map[pat] = new_idx;
+      pats.push_back(pat);
+      return new_idx;
+    } else {
+      return lookup->second;
+    }
+  }
+};
+
+/*!
+ * A face, represented as indices
+ */
+struct IndexFace {
+  math::Vector<u16, 3> vertex_indices;  // per vertex, winding order matters here
+  u8 pat_idx;                           // pat for the whole face
+};
+
+/*!
+ * All the faces in a frag
+ */
+struct IndexedFaces {
+  std::vector<IndexFace> faces;  // index in to the two vertex array below:
+  std::vector<math::Vector3f> vertices_float;
+  PackedU16Verts vertices_u16;
+};
+
+struct Vector3fHash {
+  size_t operator()(const math::Vector3f& in) const {
+    return std::hash<float>()(in.x()) ^ std::hash<float>()(in.y()) ^ std::hash<float>()(in.z());
+  }
+};
+
+/*!
+ * Deduplicate vertices, converted to indexed, add to pat palette, pack to u16s.
+ */
+IndexedFaces dedup_frag_mesh(const collide::CollideFrag& frag, PatMap* pat_map) {
+  IndexedFaces result;
+  std::unordered_map<math::Vector3f, u32, Vector3fHash> vertex_map;
+  // avoid confusion with 0 in strip table. (todo, can probably remove)
+  result.vertices_float.push_back(math::Vector3f::zero());
+
+  for (auto& face_in : frag.faces) {
+    auto& face_out = result.faces.emplace_back();
+    // pat:
+    face_out.pat_idx = pat_map->add_pat(face_in.pat);
+    // vertices
+    for (int i = 0; i < 3; i++) {
+      const auto& lookup = vertex_map.find(face_in.v[i]);
+      if (lookup == vertex_map.end()) {
+        u32 idx = result.vertices_float.size();
+        result.vertices_float.push_back(face_in.v[i]);
+        face_out.vertex_indices[i] = idx;
+        vertex_map[face_in.v[i]] = idx;
+      } else {
+        face_out.vertex_indices[i] = lookup->second;
+      }
+    }
+  }
+  // fmt::print("{} -> {}\n", frag.faces.size() * 3, result.vertices_float.size());
+  result.vertices_u16 = pack_verts_to_u16(result.vertices_float);
+  return result;
+}
+
+/*!
+ * make strip table that doesn't do any stripping. It will be quite long, which might cause problem
+ */
+std::vector<u8> make_dumb_strip_table(const IndexedFaces& faces) {
+  std::vector<u8> out;
+  ASSERT_MSG(
+      faces.vertices_float.size() < UINT8_MAX,
+      "somehow have UINT8_MAX deduped vertices in a single fragment, likely a bug somewhere.");
+  for (auto& face : faces.faces) {
+    out.push_back(face.vertex_indices[0]);
+    out.push_back(face.vertex_indices[1]);
+    out.push_back(face.vertex_indices[2]);
+    out.push_back(0);
+  }
+  out.push_back(-1);
+
+  return out;
+}
+
+CollideFragMeshDataArray pack_collide_frags(const std::vector<collide::CollideFrag>& frag_data) {
+  Timer pack_timer;
+  CollideFragMeshDataArray result;
+  PatMap pat_map;
+
+  size_t total_pack_bytes = 0;
+  lg::info("Packing {} fragments", frag_data.size());
+
+  for (auto& frag_in : frag_data) {
+    auto& frag_out = result.packed_frag_data.emplace_back();
+    auto indexed = dedup_frag_mesh(frag_in, &pat_map);
+    // first part of packed_data is the u16 vertex data:
+    frag_out.vertex_count = indexed.vertices_u16.vertex.size();
+    frag_out.packed_data.resize(sizeof(u16) * frag_out.vertex_count * 3);
+    memcpy(frag_out.packed_data.data(), indexed.vertices_u16.vertex.data(),
+           frag_out.packed_data.size());
+    // align to 16-bytes
+    while (frag_out.packed_data.size() & 0xf) {
+      frag_out.packed_data.push_back(0);
+    }
+    // remember where
+    frag_out.vertex_data_qwc = frag_out.packed_data.size() / 16;
+
+    // up next, the strip table
+    auto strip = make_dumb_strip_table(indexed);
+    frag_out.packed_data.insert(frag_out.packed_data.end(), strip.begin(), strip.end());
+    frag_out.strip_data_len = strip.size();
+    ASSERT(frag_out.strip_data_len < UINT16_MAX);  // probably in big trouble in here.
+
+    // pat table
+    for (auto& face : indexed.faces) {
+      frag_out.packed_data.push_back(face.pat_idx);
+    }
+
+    // align to 16-bytes so total_qwc works.
+    while (frag_out.packed_data.size() & 0xf) {
+      frag_out.packed_data.push_back(0);
+    }
+    // gonna guess here:
+    frag_out.poly_count = indexed.faces.size();
+    frag_out.total_qwc = frag_out.packed_data.size() / 16;
+    frag_out.base_trans_xyz_s32 = indexed.vertices_u16.base;
+    frag_out.bsphere = frag_in.bsphere;
+    total_pack_bytes += frag_out.packed_data.size();
+  }
+
+  result.pats = pat_map.pats;
+  lg::info("Total packed data size: {} kB, took {:.2f} ms", total_pack_bytes / 1024,
+           pack_timer.getMs());
+  return result;
+}
+
+/*
+(deftype collide-frag-mesh (basic)
+  ((packed-data     uint32         :offset-assert 4)  <- ptr
+   (pat-array       uint32         :offset-assert 8)  <- ptr
+   (strip-data-len  uint16         :offset-assert 12)
+   (poly-count      uint16         :offset-assert 14)
+   (base-trans      vector :inline :offset-assert 16)
+   ;; these go in the w of the vector above.
+   (vertex-count    uint8          :offset 28)        // done!
+   (vertex-data-qwc uint8          :offset 29)        // done!
+   (total-qwc       uint8          :offset 30)
+   (unused          uint8          :offset 31)
+   )
+  :method-count-assert 9
+  :size-assert         #x20
+  :flag-assert         #x900000020
+  )
+ */
+
+// packed_data:
+//  (u16x3) per vertex, packed float vtx format.
+//
diff --git a/goalc/build_level/collide_pack.h b/goalc/build_level/collide_pack.h
new file mode 100644
index 0000000000..316b56e9f4
--- /dev/null
+++ b/goalc/build_level/collide_pack.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "goalc/build_level/collide_bvh.h"
+
+struct CollideFragMeshData {
+  math::Vector4f bsphere;  // not part of the collide frag, but is part of the drawable wrapping it
+  std::vector<u8> packed_data;
+  u32 strip_data_len;
+  u32 poly_count;
+  math::Vector<s32, 3> base_trans_xyz_s32;
+  u8 vertex_count;
+  u8 vertex_data_qwc;
+  u8 total_qwc;
+};
+
+struct CollideFragMeshDataArray {
+  std::vector<CollideFragMeshData> packed_frag_data;
+  std::vector<PatSurface> pats;
+};
+
+CollideFragMeshDataArray pack_collide_frags(const std::vector<collide::CollideFrag>& frag_data);
\ No newline at end of file
diff --git a/goalc/build_level/color_quantization.cpp b/goalc/build_level/color_quantization.cpp
new file mode 100644
index 0000000000..91784dc117
--- /dev/null
+++ b/goalc/build_level/color_quantization.cpp
@@ -0,0 +1,220 @@
+#include <algorithm>
+#include "color_quantization.h"
+#include "common/util/Assert.h"
+#include "common/log/log.h"
+
+/*!
+ * Just removes duplicate colors, which can work if there are only a few unique colors.
+ */
+QuantizedColors quantize_colors_dumb(const std::vector<math::Vector<u8, 4>>& in) {
+  QuantizedColors result;
+  std::unordered_map<u64, u32> color_to_slot;
+  for (auto& vtx : in) {
+    u64 key;
+    memcpy(&key, vtx.data(), sizeof(u64));
+    const auto& existing = color_to_slot.find(key);
+    if (existing == color_to_slot.end()) {
+      auto cidx = result.final_colors.size();
+      result.vtx_to_color.push_back(cidx);
+      color_to_slot[key] = cidx;
+      result.final_colors.push_back(vtx);
+    } else {
+      result.vtx_to_color.push_back(existing->second);
+    }
+  }
+  fmt::print("quantize_colors_dumb: {} -> {}\n", in.size(), result.final_colors.size());
+  ASSERT(result.final_colors.size() < 8192);
+  return result;
+}
+
+namespace {
+
+using Color = math::Vector<u8, 4>;
+
+// An octree node.
+// Represents a color in the output if rgb_sum_count > 0.
+// Otherwise, just organizational.
+struct Node {
+  u32 r_sum = 0;
+  u32 g_sum = 0;
+  u32 b_sum = 0;
+  u32 rgb_sum_count = 0;
+  u8 depth = 0xff;  // 0 for root, 7 deepest
+
+  // children stuff
+  u32 leaves_under_me = 0;
+  std::vector<Node> children;
+  Node* parent = nullptr;
+
+  u32 final_idx = UINT32_MAX;
+};
+
+u8 child_index(Color color, u8 depth) {
+  u8 r_bit = (color.x() >> (7 - depth)) & 1;
+  u8 g_bit = (color.y() >> (7 - depth)) & 1;
+  u8 b_bit = (color.z() >> (7 - depth)) & 1;
+  return (r_bit) + (g_bit * 2) + (b_bit * 4);
+}
+
+void insert(Node& root, Color color, u8 current_depth) {
+  if (current_depth == 7) {
+    root.r_sum += color.x();
+    root.g_sum += color.y();
+    root.b_sum += color.z();
+    if (root.rgb_sum_count == 0) {
+      for (auto* up = root.parent; up; up = up->parent) {
+        up->leaves_under_me++;
+      }
+    }
+    root.rgb_sum_count++;
+  } else {
+    if (root.children.empty()) {
+      root.children.resize(8);
+    }
+    auto& next_node = root.children[child_index(color, current_depth)];
+    if (next_node.depth == 0xff) {
+      next_node.depth = current_depth + 1;
+      next_node.parent = &root;
+    }
+    insert(next_node, color, current_depth + 1);
+  }
+}
+
+template <typename T>
+void for_each_node(Node& root, T&& func) {
+  func(root);
+  for (auto& child : root.children) {
+    for_each_node(child, func);
+  }
+}
+
+u32 count_leaves(Node& root) {
+  u32 result = 0;
+  for_each_node(root, [&](Node& n) {
+    if (n.rgb_sum_count) {
+      ASSERT(n.children.empty());
+      result++;
+    }
+  });
+  return result;
+}
+
+void collapse1(Node& root) {
+  ASSERT(!root.children.empty());
+  u32 total_children_removed = 0;
+  u32 total_rgb_sum_moved_up = 0;
+  bool started_as_leaf = root.rgb_sum_count;
+  for (auto& child : root.children) {
+    if (child.depth != 0xff) {
+      ASSERT(child.children.empty());
+      ASSERT(child.rgb_sum_count);
+      total_children_removed++;
+      root.r_sum += child.r_sum;
+      root.g_sum += child.g_sum;
+      root.b_sum += child.b_sum;
+      root.rgb_sum_count += child.rgb_sum_count;
+      total_rgb_sum_moved_up += child.rgb_sum_count;
+    }
+  }
+  ASSERT(total_children_removed == root.leaves_under_me);
+
+  if (!started_as_leaf && root.rgb_sum_count) {
+    total_children_removed--;
+  }
+  root.children.clear();
+  root.leaves_under_me = 0;
+  if (total_children_removed) {
+    for (auto* up = root.parent; up; up = up->parent) {
+      up->leaves_under_me -= total_children_removed;
+    }
+  }
+
+  ASSERT(count_leaves(root) == 1);
+}
+
+void find_nodes_at_level(Node& n, std::vector<Node*>& out, u8 level) {
+  if (n.depth == level) {
+    out.push_back(&n);
+  } else if (n.depth < level) {
+    for (auto& child : n.children) {
+      find_nodes_at_level(child, out, level);
+    }
+  }
+}
+
+void collapse_at_level(Node& root, u8 level, u32 target_leaf_count) {
+  std::vector<Node*> nodes_at_level;
+  find_nodes_at_level(root, nodes_at_level, level);
+  std::stable_sort(nodes_at_level.begin(), nodes_at_level.end(),
+                   [](Node* a, Node* b) { return a->leaves_under_me < b->leaves_under_me; });
+
+  size_t at_level_to_try = 0;
+  while (root.leaves_under_me > target_leaf_count && at_level_to_try < nodes_at_level.size()) {
+    collapse1(*nodes_at_level[at_level_to_try++]);
+  }
+}
+
+void collapse_as_needed(Node& root, u32 target_leaf_count) {
+  u32 level_to_reduce = 6;
+  while (root.leaves_under_me > target_leaf_count) {
+    collapse_at_level(root, level_to_reduce--, target_leaf_count);
+  }
+}
+
+void assign_colors(Node& root, std::vector<Color>& palette_out) {
+  u32 idx = 0;
+  for_each_node(root, [&](Node& n) {
+    if (n.rgb_sum_count) {
+      n.final_idx = idx++;
+      palette_out.emplace_back(n.r_sum / n.rgb_sum_count, n.g_sum / n.rgb_sum_count,
+                               n.b_sum / n.rgb_sum_count);
+    }
+  });
+}
+
+u32 lookup_node_for_color(Node& root, Color c, u8 depth) {
+  if (root.children.empty()) {
+    return root.final_idx;
+  } else {
+    return lookup_node_for_color(root.children[child_index(c, depth)], c, depth + 1);
+  }
+}
+
+}  // namespace
+
+/*!
+ * Quantize colors using an octree for clustering.
+ */
+QuantizedColors quantize_colors_octree(const std::vector<math::Vector<u8, 4>>& in,
+                                       u32 target_count) {
+  Node root;
+  root.depth = 0;
+  for (auto& color : in) {
+    insert(root, color, 0);
+  }
+
+  collapse_as_needed(root, target_count);
+
+  QuantizedColors out;
+  assign_colors(root, out.final_colors);
+  for (auto& color : in) {
+    out.vtx_to_color.push_back(lookup_node_for_color(root, color, 0));
+  }
+
+  float total_error[3] = {0, 0, 0};
+  for (size_t i = 0; i < in.size(); i++) {
+    // fmt::print(" {} -> {}\n", in[i].to_string_hex_byte(),
+    // out.final_colors[out.vtx_to_color[i]].to_string_hex_byte());
+    auto diff = in[i].cast<int>() - out.final_colors[out.vtx_to_color[i]].cast<int>();
+
+    for (int j = 0; j < 3; j++) {
+      total_error[j] += std::abs(diff[j]);
+    }
+  }
+
+  lg::info("Octree quantize average error (as 8-bit ints): r: {}, g: {} b: {}",
+           total_error[0] / in.size(), total_error[1] / in.size(), total_error[2] / in.size());
+  lg::info("Final palette size: {}", out.final_colors.size());
+
+  return out;
+}
\ No newline at end of file
diff --git a/goalc/build_level/color_quantization.h b/goalc/build_level/color_quantization.h
new file mode 100644
index 0000000000..44b5ba68cf
--- /dev/null
+++ b/goalc/build_level/color_quantization.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "common/math/Vector.h"
+
+// TODO: come up with something for time of day colors.
+// time of day colors make the colors effectively 24-channel instead of just 3.
+// the octree approach doesn't work too well here (we'd need a 16777216-tree)
+// but a k-d tree seems like the right approach.
+
+struct QuantizedColors {
+  std::vector<math::Vector<u8, 4>> final_colors;
+  std::vector<u32> vtx_to_color;
+};
+
+QuantizedColors quantize_colors_dumb(const std::vector<math::Vector<u8, 4>>& in);
+
+QuantizedColors quantize_colors_octree(const std::vector<math::Vector<u8, 4>>& in,
+                                       u32 target_count);
\ No newline at end of file
diff --git a/goalc/build_level/gltf_mesh_extract.cpp b/goalc/build_level/gltf_mesh_extract.cpp
new file mode 100644
index 0000000000..a81463c052
--- /dev/null
+++ b/goalc/build_level/gltf_mesh_extract.cpp
@@ -0,0 +1,688 @@
+/*!
+ * Mesh extraction for GLTF meshes.
+ */
+
+#include "gltf_mesh_extract.h"
+#include "goalc/build_level/color_quantization.h"
+#include "third-party/tiny_gltf/tiny_gltf.h"
+#include "common/log/log.h"
+#include "common/util/Timer.h"
+#include "common/math/geometry.h"
+
+namespace gltf_mesh_extract {
+
+namespace {
+
+/*!
+ * Convert a GLTF index buffer to std::vector<u32>
+ */
+template <typename T>
+std::vector<u32> index_list_to_u32(const u8* data, u32 num_verts, u32 offset, u32 stride) {
+  std::vector<u32> result;
+  result.reserve(num_verts);
+  for (u32 i = 0; i < num_verts; i++) {
+    T val;
+    memcpy(&val, data, sizeof(T));
+    result.push_back(offset + val);
+    data += stride;
+  }
+  return result;
+}
+
+/*!
+ * Convert a GLTF position buffer or similar to std::vector<Vec3f>
+ */
+std::vector<math::Vector3f> extract_vec3f(const u8* data, u32 count, u32 stride) {
+  std::vector<math::Vector3f> result;
+  result.reserve(count);
+  for (u32 i = 0; i < count; i++) {
+    memcpy(&result.emplace_back(), data, sizeof(math::Vector3f));
+    data += stride;
+  }
+  return result;
+}
+
+std::vector<math::Vector2f> extract_vec2f(const u8* data, u32 count, u32 stride) {
+  std::vector<math::Vector2f> result;
+  result.reserve(count);
+  for (u32 i = 0; i < count; i++) {
+    memcpy(&result.emplace_back(), data, sizeof(math::Vector2f));
+    data += stride;
+  }
+  return result;
+}
+
+/*!
+ * Convert a GLTF color buffer to u8 colors.
+ */
+std::vector<math::Vector<u8, 4>> extract_color_from_vec4_u16(const u8* data,
+                                                             u32 count,
+                                                             u32 stride) {
+  std::vector<math::Vector<u8, 4>> result;
+  result.reserve(count);
+  for (u32 i = 0; i < count; i++) {
+    math::Vector<u16, 4> temp;
+    memcpy(&temp, data, sizeof(math::Vector<u16, 4>));
+    data += stride;
+    result.emplace_back(temp.x() >> 8, temp.y() >> 8, temp.z() >> 8, temp.w() >> 8);
+  }
+  return result;
+}
+
+/*!
+ * Convert a GLTF index buffer
+ */
+std::vector<u32> gltf_index_buffer(const tinygltf::Model& model,
+                                   int indices_idx,
+                                   u32 index_offset) {
+  const auto& indices_accessor = model.accessors[indices_idx];
+  const auto& buffer_view = model.bufferViews[indices_accessor.bufferView];
+  const auto& buffer = model.buffers[buffer_view.buffer];
+  const auto data_ptr = buffer.data.data() + buffer_view.byteOffset + indices_accessor.byteOffset;
+  const auto stride = indices_accessor.ByteStride(buffer_view);
+  const auto count = indices_accessor.count;
+
+  switch (indices_accessor.componentType) {
+    case TINYGLTF_COMPONENT_TYPE_BYTE:
+      return index_list_to_u32<s8>(data_ptr, count, index_offset, stride);
+    case TINYGLTF_COMPONENT_TYPE_UNSIGNED_BYTE:
+      return index_list_to_u32<u8>(data_ptr, count, index_offset, stride);
+    case TINYGLTF_COMPONENT_TYPE_SHORT:
+      return index_list_to_u32<s16>(data_ptr, count, index_offset, stride);
+    case TINYGLTF_COMPONENT_TYPE_UNSIGNED_SHORT:
+      return index_list_to_u32<u16>(data_ptr, count, index_offset, stride);
+    case TINYGLTF_COMPONENT_TYPE_INT:
+      return index_list_to_u32<s32>(data_ptr, count, index_offset, stride);
+    case TINYGLTF_COMPONENT_TYPE_UNSIGNED_INT:
+      return index_list_to_u32<u32>(data_ptr, count, index_offset, stride);
+    default:
+      ASSERT_MSG(false, "unsupported component type");
+  }
+}
+
+struct ExtractedVertices {
+  std::vector<tfrag3::PreloadedVertex> vtx;
+  std::vector<math::Vector<u8, 4>> vtx_colors;
+  std::vector<math::Vector3f> normals;
+};
+
+ExtractedVertices gltf_vertices(const tinygltf::Model& model,
+                                const std::map<std::string, int>& attributes,
+                                const math::Matrix4f& w_T_local,
+                                bool get_colors,
+                                bool get_normals,
+                                const std::string& debug_name) {
+  std::vector<tfrag3::PreloadedVertex> result;
+  std::vector<math::Vector<u8, 4>> vtx_colors;
+
+  {
+    const auto& position_attrib = attributes.find("POSITION");
+    ASSERT_MSG(position_attrib != attributes.end(), "Did not find position attribute.");
+
+    const auto attrib_accessor = model.accessors[position_attrib->second];
+    const auto& buffer_view = model.bufferViews[attrib_accessor.bufferView];
+    const auto& buffer = model.buffers[buffer_view.buffer];
+    const auto data_ptr = buffer.data.data() + buffer_view.byteOffset + attrib_accessor.byteOffset;
+    const auto byte_stride = attrib_accessor.ByteStride(buffer_view);
+    const auto count = attrib_accessor.count;
+
+    ASSERT_MSG(attrib_accessor.type == TINYGLTF_TYPE_VEC3, "POSITION wasn't vec3");
+    ASSERT_MSG(attrib_accessor.componentType == TINYGLTF_COMPONENT_TYPE_FLOAT,
+               "POSITION wasn't float");
+    // for (auto& attrib : attributes) {
+    // fmt::print("attrib: {}\n", attrib.first);
+    //}
+    auto mesh_verts = extract_vec3f(data_ptr, count, byte_stride);
+    result.reserve(mesh_verts.size());
+    for (auto& vert : mesh_verts) {
+      auto& new_vert = result.emplace_back();
+      math::Vector4f v_in(vert.x(), vert.y(), vert.z(), 1);
+      math::Vector4f v_w = w_T_local * v_in;
+      new_vert.x = v_w.x() * 4096;
+      new_vert.y = v_w.y() * 4096;
+      new_vert.z = v_w.z() * 4096;
+    }
+  }
+
+  if (get_colors) {
+    const auto& color_attrib = attributes.find("COLOR_0");
+    if (color_attrib == attributes.end()) {
+      lg::error("Mesh {} didn't have any colors, using white", debug_name);
+      for (size_t i = 0; i < result.size(); i++) {
+        vtx_colors.emplace_back(0x80, 0x80, 0x80, 0xff);
+      }
+    } else {
+      const auto attrib_accessor = model.accessors[color_attrib->second];
+      const auto& buffer_view = model.bufferViews[attrib_accessor.bufferView];
+      const auto& buffer = model.buffers[buffer_view.buffer];
+      const auto data_ptr =
+          buffer.data.data() + buffer_view.byteOffset + attrib_accessor.byteOffset;
+      const auto byte_stride = attrib_accessor.ByteStride(buffer_view);
+      const auto count = attrib_accessor.count;
+
+      ASSERT_MSG(attrib_accessor.type == TINYGLTF_TYPE_VEC4, "COLOR_0 wasn't vec4");
+      ASSERT_MSG(
+          attrib_accessor.componentType == TINYGLTF_COMPONENT_TYPE_UNSIGNED_SHORT,
+          fmt::format("COLOR_0 wasn't float, got {} instead", attrib_accessor.componentType));
+      auto colors = extract_color_from_vec4_u16(data_ptr, count, byte_stride);
+      vtx_colors.insert(vtx_colors.end(), colors.begin(), colors.end());
+    }
+
+    // ASSERT_MSG(color_attrib != attributes.end(), "Did not find color attribute.");
+  }
+
+  bool got_texture = false;
+  {
+    const auto& texcoord_attrib = attributes.find("TEXCOORD_0");
+    if (texcoord_attrib != attributes.end()) {
+      const auto attrib_accessor = model.accessors[texcoord_attrib->second];
+      const auto& buffer_view = model.bufferViews[attrib_accessor.bufferView];
+      const auto& buffer = model.buffers[buffer_view.buffer];
+      const auto data_ptr =
+          buffer.data.data() + buffer_view.byteOffset + attrib_accessor.byteOffset;
+      const auto byte_stride = attrib_accessor.ByteStride(buffer_view);
+      const auto count = attrib_accessor.count;
+
+      ASSERT_MSG(attrib_accessor.type == TINYGLTF_TYPE_VEC2, "TEXCOORD wasn't vec2");
+      ASSERT_MSG(attrib_accessor.componentType == TINYGLTF_COMPONENT_TYPE_FLOAT,
+                 "TEXCOORD wasn't float");
+      auto mesh_verts = extract_vec2f(data_ptr, count, byte_stride);
+      ASSERT(mesh_verts.size() == result.size());
+      got_texture = true;
+      for (size_t i = 0; i < mesh_verts.size(); i++) {
+        result[i].s = mesh_verts[i].x();
+        result[i].t = mesh_verts[i].y();
+      }
+    } else {
+      if (!get_normals) {
+        // don't warn if we're just getting collision
+        lg::warn("No texcoord attribute for mesh: {}", debug_name);
+      }
+    }
+  }
+
+  std::vector<math::Vector3f> normals;
+  if (get_normals) {
+    const auto& normal_attrib = attributes.find("NORMAL");
+    if (normal_attrib != attributes.end()) {
+      const auto attrib_accessor = model.accessors[normal_attrib->second];
+      const auto& buffer_view = model.bufferViews[attrib_accessor.bufferView];
+      const auto& buffer = model.buffers[buffer_view.buffer];
+      const auto data_ptr =
+          buffer.data.data() + buffer_view.byteOffset + attrib_accessor.byteOffset;
+      const auto byte_stride = attrib_accessor.ByteStride(buffer_view);
+      const auto count = attrib_accessor.count;
+
+      ASSERT_MSG(attrib_accessor.type == TINYGLTF_TYPE_VEC3, "NORMAL wasn't vec3");
+      ASSERT_MSG(attrib_accessor.componentType == TINYGLTF_COMPONENT_TYPE_FLOAT,
+                 "NORMAL wasn't float");
+      normals = extract_vec3f(data_ptr, count, byte_stride);
+      ASSERT(normals.size() == result.size());
+    } else {
+      lg::error("No NORMAL attribute for mesh: {}", debug_name);
+    }
+  }
+
+  for (auto& v : result) {
+    v.color_index = 0;
+    if (!got_texture) {
+      v.s = 0;
+      v.t = 0;
+    }
+
+    v.q_unused = 0;
+    v.pad[0] = 0;
+    v.pad[1] = 0;
+    v.pad[2] = 0;
+  }
+  // TODO: other properties
+  return {result, vtx_colors, normals};
+}
+
+DrawMode make_default_draw_mode() {
+  DrawMode mode;
+  mode.set_depth_write_enable(true);
+  mode.set_depth_test(GsTest::ZTest::GEQUAL);
+  mode.set_alpha_blend(DrawMode::AlphaBlend::DISABLED);
+  mode.set_aref(0);
+  mode.set_alpha_fail(GsTest::AlphaFail::KEEP);
+  mode.set_clamp_s_enable(true);
+  mode.set_clamp_t_enable(true);
+  mode.disable_filt();  // for checkerboard...
+  mode.enable_tcc();    // ?
+  mode.disable_at();
+  mode.enable_zt();
+  mode.disable_ab();
+  mode.disable_decal();
+  mode.enable_fog();
+  return mode;
+}
+
+int texture_pool_debug_checker(TexturePool* pool) {
+  const auto& existing = pool->textures_by_name.find("DEBUG_CHECKERBOARD");
+  if (existing == pool->textures_by_name.end()) {
+    size_t idx = pool->textures_by_idx.size();
+    pool->textures_by_name["DEBUG_CHECKERBOARD"] = idx;
+    auto& tex = pool->textures_by_idx.emplace_back();
+    tex.w = 16;
+    tex.h = 16;
+    tex.debug_name = "DEBUG_CHECKERBOARD";
+    tex.debug_tpage_name = "DEBUG";
+    tex.load_to_pool = false;
+    tex.combo_id = 0;  // doesn't matter, not a pool tex
+    tex.data.resize(16 * 16);
+    u32 c0 = 0xa0303030;
+    u32 c1 = 0xa0e0e0e0;
+    for (int i = 0; i < 16; i++) {
+      for (int j = 0; j < 16; j++) {
+        tex.data[i * 16 + j] = (((i / 4) & 1) ^ ((j / 4) & 1)) ? c1 : c0;
+      }
+    }
+    return idx;
+  } else {
+    return existing->second;
+  }
+}
+
+int texture_pool_add_texture(TexturePool* pool, const tinygltf::Image& tex) {
+  const auto& existing = pool->textures_by_name.find(tex.name);
+  if (existing != pool->textures_by_name.end()) {
+    lg::info("Reusing image: {}", tex.name);
+    return existing->second;
+  }
+
+  ASSERT(tex.bits == 8);
+  ASSERT(tex.component == 4);
+  ASSERT(tex.pixel_type == TINYGLTF_TEXTURE_TYPE_UNSIGNED_BYTE);
+
+  size_t idx = pool->textures_by_idx.size();
+  pool->textures_by_name[tex.name] = idx;
+  auto& tt = pool->textures_by_idx.emplace_back();
+  tt.w = tex.width;
+  tt.h = tex.height;
+  tt.debug_name = tex.name;
+  tt.debug_tpage_name = "custom-level";
+  tt.load_to_pool = false;
+  tt.combo_id = 0;  // doesn't matter, not a pool tex
+  tt.data.resize(tt.w * tt.h * 4);
+  ASSERT(tex.image.size() >= tt.data.size());
+  memcpy(tt.data.data(), tex.image.data(), tt.data.size());
+  return idx;
+}
+}  // namespace
+
+math::Matrix4f affine_translation(const math::Vector3f& translation) {
+  math::Matrix4f result = math::Matrix4f::identity();
+  result(0, 3) = translation[0];
+  result(1, 3) = translation[1];
+  result(2, 3) = translation[2];
+  result(3, 3) = 1;
+  return result;
+}
+
+math::Matrix4f affine_scale(const math::Vector3f& scale) {
+  math::Matrix4f result = math::Matrix4f::zero();
+  result(0, 0) = scale[0];
+  result(1, 1) = scale[1];
+  result(2, 2) = scale[2];
+  result(3, 3) = 1;
+  return result;
+}
+
+math::Matrix4f affine_rot_qxyzw(const math::Vector4f& quat) {
+  math::Matrix4f result = math::Matrix4f::zero();
+  result(3, 3) = 1;
+  result(0, 0) = 1.0 - 2.0 * (quat.y() * quat.y() + quat.z() * quat.z());
+  result(0, 1) = 2.0 * (quat.x() * quat.y() - quat.z() * quat.w());
+  result(0, 2) = 2.0 * (quat.x() * quat.z() + quat.y() * quat.w());
+  result(1, 0) = 2.0 * (quat.x() * quat.y() + quat.z() * quat.w());
+  result(1, 1) = 1.0 - 2.0 * (quat.x() * quat.x() + quat.z() * quat.z());
+  result(1, 2) = 2.0 * (quat.y() * quat.z() - quat.x() * quat.w());
+  result(2, 0) = 2.0 * (quat.x() * quat.z() - quat.y() * quat.w());
+  result(2, 1) = 2.0 * (quat.y() * quat.z() + quat.x() * quat.w());
+  result(2, 2) = 1.0 - 2.0 * (quat.x() * quat.x() + quat.y() * quat.y());
+  return result;
+}
+
+math::Vector3f vector3f_from_gltf(const std::vector<double>& in) {
+  ASSERT(in.size() == 3);
+  return math::Vector3f{in[0], in[1], in[2]};
+}
+
+math::Vector4f vector4f_from_gltf(const std::vector<double>& in) {
+  ASSERT(in.size() == 4);
+  return math::Vector4f{in[0], in[1], in[2], in[3]};
+}
+
+math::Matrix4f matrix_from_node(const tinygltf::Node& node) {
+  if (!node.matrix.empty()) {
+    math::Matrix4f result;
+    for (int i = 0; i < 16; i++) {
+      result.data()[i] = node.matrix[i];
+    }
+    return result;
+  } else {
+    // from trs
+    math::Matrix4f t, r, s;
+    if (!node.translation.empty()) {
+      t = affine_translation(vector3f_from_gltf(node.translation));
+    } else {
+      t = math::Matrix4f::identity();
+    }
+
+    if (!node.rotation.empty()) {
+      r = affine_rot_qxyzw(vector4f_from_gltf(node.rotation));
+    } else {
+      r = math::Matrix4f::identity();
+    }
+
+    if (!node.scale.empty()) {
+      s = affine_scale(vector3f_from_gltf(node.scale));
+    } else {
+      s = math::Matrix4f::identity();
+    }
+
+    return t * r * s;
+  }
+}
+
+struct NodeWithTransform {
+  int node_idx;
+  math::Matrix4f w_T_node;
+};
+
+/*!
+ * Recursively walk the tree of nodes, flatten, and compute w_T_node for each.
+ */
+void node_find_helper(const tinygltf::Model& model,
+                      const math::Matrix4f& w_T_parent,
+                      int node_idx,
+                      std::vector<NodeWithTransform>* out) {
+  const auto& node = model.nodes.at(node_idx);
+  math::Matrix4f w_T_node = w_T_parent * matrix_from_node(node);
+  out->push_back({node_idx, w_T_node});
+  for (auto& child : node.children) {
+    node_find_helper(model, w_T_node, child, out);
+  }
+}
+
+std::vector<NodeWithTransform> flatten_nodes_from_all_scenes(const tinygltf::Model& model) {
+  std::vector<NodeWithTransform> out;
+  for (auto& scene : model.scenes) {
+    for (auto& nidx : scene.nodes) {
+      math::Matrix4f identity = math::Matrix4f::identity();
+      node_find_helper(model, identity, nidx, &out);
+    }
+  }
+  return out;
+}
+
+void dedup_vertices(const std::vector<tfrag3::PreloadedVertex>& vertices_in,
+                    std::vector<tfrag3::PreloadedVertex>& vertices_out,
+                    std::vector<u32>& old_to_new_out) {
+  ASSERT(vertices_out.empty());
+  ASSERT(old_to_new_out.empty());
+  old_to_new_out.resize(vertices_in.size(), -1);
+
+  std::unordered_map<tfrag3::PreloadedVertex, u32, tfrag3::PreloadedVertex::hash> vtx_to_new;
+
+  for (size_t in_idx = 0; in_idx < vertices_in.size(); in_idx++) {
+    auto& vtx = vertices_in[in_idx];
+    const auto& lookup = vtx_to_new.find(vtx);
+    if (lookup == vtx_to_new.end()) {
+      // first time seeing this one
+      size_t new_idx = vertices_out.size();
+      vertices_out.push_back(vtx);
+      old_to_new_out[in_idx] = new_idx;
+      vtx_to_new[vtx] = new_idx;
+    } else {
+      old_to_new_out[in_idx] = lookup->second;
+    }
+  }
+}
+
+void dedup_vertices(TfragOutput& data) {
+  Timer timer;
+  size_t original_size = data.vertices.size();
+  std::vector<tfrag3::PreloadedVertex> new_verts;
+  std::vector<u32> old_to_new;
+
+  dedup_vertices(data.vertices, new_verts, old_to_new);
+  data.vertices = std::move(new_verts);
+
+  for (auto& draw : data.strip_draws) {
+    ASSERT(draw.runs.empty());  // not supported yet
+    for (auto& idx : draw.plain_indices) {
+      idx = old_to_new.at(idx);
+    }
+  }
+
+  lg::info("Deduplication took {:.2f} ms, {} -> {} ({:.2f} %)", timer.getMs(), original_size,
+           data.vertices.size(), 100.f * data.vertices.size() / original_size);
+}
+
+DrawMode draw_mode_from_sampler(const tinygltf::Sampler& sampler) {
+  DrawMode mode = make_default_draw_mode();
+  if (sampler.magFilter == TINYGLTF_TEXTURE_FILTER_NEAREST) {
+    ASSERT(sampler.minFilter == TINYGLTF_TEXTURE_FILTER_NEAREST);
+    mode.set_filt_enable(false);
+  } else {
+    ASSERT(sampler.minFilter != TINYGLTF_TEXTURE_FILTER_NEAREST);
+    mode.set_filt_enable(true);
+  }
+
+  switch (sampler.wrapS) {
+    case TINYGLTF_TEXTURE_WRAP_CLAMP_TO_EDGE:
+      mode.set_clamp_s_enable(true);
+      break;
+    case TINYGLTF_TEXTURE_WRAP_REPEAT:
+      mode.set_clamp_s_enable(false);
+      break;
+    default:
+      ASSERT(false);
+  }
+
+  switch (sampler.wrapT) {
+    case TINYGLTF_TEXTURE_WRAP_CLAMP_TO_EDGE:
+      mode.set_clamp_t_enable(true);
+      break;
+    case TINYGLTF_TEXTURE_WRAP_REPEAT:
+      mode.set_clamp_t_enable(false);
+      break;
+    default:
+      ASSERT(false);
+  }
+
+  return mode;
+}
+
+void extract(const Input& in,
+             TfragOutput& out,
+             const tinygltf::Model& model,
+             const std::vector<NodeWithTransform>& all_nodes) {
+  std::vector<math::Vector<u8, 4>> all_vtx_colors;
+  ASSERT(out.vertices.empty());
+  std::map<int, tfrag3::StripDraw> draw_by_material;
+  int mesh_count = 0;
+  int prim_count = 0;
+
+  for (const auto& n : all_nodes) {
+    const auto& node = model.nodes[n.node_idx];
+    if (node.mesh >= 0) {
+      const auto& mesh = model.meshes[node.mesh];
+      if (!mesh.extras.Has("tfrag")) {
+        // fmt::print("skip tfrag: {}\n", mesh.name);
+        // continue;
+      }
+      mesh_count++;
+      for (const auto& prim : mesh.primitives) {
+        prim_count++;
+        // extract index buffer
+        std::vector<u32> prim_indices = gltf_index_buffer(model, prim.indices, out.vertices.size());
+        ASSERT_MSG(prim.mode == TINYGLTF_MODE_TRIANGLES, "Unsupported triangle mode");
+        // extract vertices
+        auto verts =
+            gltf_vertices(model, prim.attributes, n.w_T_node, in.get_colors, false, mesh.name);
+        out.vertices.insert(out.vertices.end(), verts.vtx.begin(), verts.vtx.end());
+        if (in.get_colors) {
+          all_vtx_colors.insert(all_vtx_colors.end(), verts.vtx_colors.begin(),
+                                verts.vtx_colors.end());
+        }
+
+        // TODO: just putting it all in one material
+        auto& draw = draw_by_material[prim.material];
+        draw.mode = make_default_draw_mode();                        // todo rm
+        draw.tree_tex_id = texture_pool_debug_checker(in.tex_pool);  // todo rm
+        draw.num_triangles += prim_indices.size() / 3;
+        if (draw.vis_groups.empty()) {
+          auto& grp = draw.vis_groups.emplace_back();
+          grp.num_inds += prim_indices.size();
+          grp.num_tris += draw.num_triangles;
+          grp.vis_idx_in_pc_bvh = UINT32_MAX;
+        } else {
+          auto& grp = draw.vis_groups.back();
+          grp.num_inds += prim_indices.size();
+          grp.num_tris += draw.num_triangles;
+          grp.vis_idx_in_pc_bvh = UINT32_MAX;
+        }
+
+        draw.plain_indices.insert(draw.plain_indices.end(), prim_indices.begin(),
+                                  prim_indices.end());
+      }
+    }
+  }
+
+  for (const auto& [mat_idx, d_] : draw_by_material) {
+    out.strip_draws.push_back(d_);
+    auto& draw = out.strip_draws.back();
+    draw.mode = make_default_draw_mode();
+
+    if (mat_idx == -1) {
+      lg::warn("Draw had a material index of -1, using default texture.");
+      draw.tree_tex_id = texture_pool_debug_checker(in.tex_pool);
+      continue;
+    }
+    const auto& mat = model.materials[mat_idx];
+    int tex_idx = mat.pbrMetallicRoughness.baseColorTexture.index;
+    if (tex_idx == -1) {
+      lg::warn("Material {} has no texture, using default texture.", mat.name);
+      draw.tree_tex_id = texture_pool_debug_checker(in.tex_pool);
+      continue;
+    }
+
+    const auto& tex = model.textures[tex_idx];
+    ASSERT(tex.sampler >= 0);
+    ASSERT(tex.source >= 0);
+    draw.mode = draw_mode_from_sampler(model.samplers.at(tex.sampler));
+
+    const auto& img = model.images[tex.source];
+    draw.tree_tex_id = texture_pool_add_texture(in.tex_pool, img);
+  }
+  lg::info("total of {} unique materials", out.strip_draws.size());
+
+  lg::info("Merged {} meshes and {} prims into {} vertices", mesh_count, prim_count,
+           out.vertices.size());
+
+  if (in.get_colors) {
+    Timer quantize_timer;
+    auto quantized = quantize_colors_octree(all_vtx_colors, 1024);
+    for (size_t i = 0; i < out.vertices.size(); i++) {
+      out.vertices[i].color_index = quantized.vtx_to_color[i];
+    }
+    out.color_palette = std::move(quantized.final_colors);
+    lg::info("Color palette generation took {:.2f} ms", quantize_timer.getMs());
+  }
+
+  dedup_vertices(out);
+}
+
+void extract(const Input& in,
+             CollideOutput& out,
+             const tinygltf::Model& model,
+             const std::vector<NodeWithTransform>& all_nodes) {
+  int mesh_count = 0;
+  int prim_count = 0;
+  int suspicious_faces = 0;
+
+  for (const auto& n : all_nodes) {
+    const auto& node = model.nodes[n.node_idx];
+    fmt::print("node: {} {}\n", node.name, node.mesh);
+    if (node.mesh >= 0) {
+      const auto& mesh = model.meshes[node.mesh];
+      if (!mesh.extras.Has("collide")) {
+        // fmt::print("skip collide: {}\n", mesh.name);
+        // continue;
+      }
+      mesh_count++;
+      for (const auto& prim : mesh.primitives) {
+        prim_count++;
+        // extract index buffer
+        std::vector<u32> prim_indices = gltf_index_buffer(model, prim.indices, 0);
+        ASSERT_MSG(prim.mode == TINYGLTF_MODE_TRIANGLES, "Unsupported triangle mode");
+        // extract vertices
+        auto verts = gltf_vertices(model, prim.attributes, n.w_T_node, false, true, mesh.name);
+
+        for (size_t iidx = 0; iidx < prim_indices.size(); iidx += 3) {
+          CollideFace face;
+
+          // get the positions
+          for (int j = 0; j < 3; j++) {
+            auto& vtx = verts.vtx.at(prim_indices.at(iidx + j));
+            face.v[j].x() = vtx.x;
+            face.v[j].y() = vtx.y;
+            face.v[j].z() = vtx.z;
+          }
+
+          // now face normal
+          math::Vector3f face_normal =
+              (face.v[2] - face.v[0]).cross(face.v[1] - face.v[0]).normalized();
+
+          float dots[3];
+          for (int j = 0; j < 3; j++) {
+            dots[j] = face_normal.dot(verts.normals.at(prim_indices.at(iidx + j)).normalized());
+          }
+
+          if (dots[0] > 1e-3 && dots[1] > 1e-3 && dots[2] > 1e-3) {
+            suspicious_faces++;
+            std::swap(face.v[2], face.v[1]);
+          }
+
+          face.bsphere = math::bsphere_of_triangle(face.v);
+          face.bsphere.w() += 1e-1;
+          for (int j = 0; j < 3; j++) {
+            float output_dist = face.bsphere.w() - (face.bsphere.xyz() - face.v[j]).length();
+            if (output_dist < 0) {
+              fmt::print("{}\n", output_dist);
+              fmt::print("BAD:\n{}\n{}\n{}\n", face.v[0].to_string_aligned(),
+                         face.v[1].to_string_aligned(), face.v[2].to_string_aligned());
+              fmt::print("bsphere: {}\n", face.bsphere.to_string_aligned());
+            }
+          }
+
+          out.faces.push_back(face);
+        }
+      }
+    }
+  }
+
+  lg::info("{} out of {} faces were suspicious (a small number is ok)", suspicious_faces,
+           out.faces.size());
+  // lg::info("Collision extract{} {}", mesh_count, prim_count);
+}
+
+void extract(const Input& in, Output& out) {
+  lg::info("Reading gltf mesh: {}", in.filename);
+  Timer read_timer;
+  tinygltf::TinyGLTF loader;
+  tinygltf::Model model;
+  std::string err, warn;
+  bool res = loader.LoadBinaryFromFile(&model, &err, &warn, in.filename);
+  ASSERT_MSG(warn.empty(), warn.c_str());
+  ASSERT_MSG(err.empty(), err.c_str());
+  ASSERT_MSG(res, "Failed to load GLTF file!");
+  auto all_nodes = flatten_nodes_from_all_scenes(model);
+  extract(in, out.tfrag, model, all_nodes);
+  extract(in, out.collide, model, all_nodes);
+  lg::info("GLTF total took {:.2f} ms", read_timer.getMs());
+}
+}  // namespace gltf_mesh_extract
diff --git a/goalc/build_level/gltf_mesh_extract.h b/goalc/build_level/gltf_mesh_extract.h
new file mode 100644
index 0000000000..5bc0e47606
--- /dev/null
+++ b/goalc/build_level/gltf_mesh_extract.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <string>
+
+#include "common/custom_data/Tfrag3Data.h"
+#include "goalc/build_level/TexturePool.h"
+#include "goalc/build_level/collide_common.h"
+
+namespace gltf_mesh_extract {
+
+struct Input {
+  std::string filename;
+  TexturePool* tex_pool = nullptr;
+  bool get_colors = true;
+};
+
+struct TfragOutput {
+  std::vector<tfrag3::StripDraw> strip_draws;
+  std::vector<tfrag3::PreloadedVertex> vertices;
+  std::vector<math::Vector<u8, 4>> color_palette;
+};
+
+struct CollideOutput {
+  std::vector<CollideFace> faces;
+};
+
+struct Output {
+  TfragOutput tfrag;
+  CollideOutput collide;
+};
+
+void extract(const Input& in, Output& out);
+
+}  // namespace gltf_mesh_extract
\ No newline at end of file
diff --git a/goalc/data_compiler/DataObjectGenerator.cpp b/goalc/data_compiler/DataObjectGenerator.cpp
index 12fa9767d7..6864bfa2f6 100644
--- a/goalc/data_compiler/DataObjectGenerator.cpp
+++ b/goalc/data_compiler/DataObjectGenerator.cpp
@@ -51,6 +51,17 @@ int DataObjectGenerator::add_word(u32 word) {
   return result;
 }
 
+int DataObjectGenerator::add_word_float(float f) {
+  auto result = int(m_words.size());
+  m_words.push_back(0);
+  memcpy(&m_words.back(), &f, sizeof(float));
+  return result;
+}
+
+void DataObjectGenerator::set_word(u32 word_idx, u32 val) {
+  m_words.at(word_idx) = val;
+}
+
 void DataObjectGenerator::link_word_to_word(int source, int target, int offset) {
   link_word_to_byte(source, target * 4 + offset);
 }
@@ -68,6 +79,10 @@ int DataObjectGenerator::add_ref_to_string_in_pool(const std::string& str) {
   return result;
 }
 
+void DataObjectGenerator::link_word_to_string_in_pool(const std::string& str, int word_idx) {
+  m_string_pool[str].push_back(word_idx);
+}
+
 int DataObjectGenerator::add_type_tag(const std::string& str) {
   auto result = int(m_words.size());
   m_words.push_back(0);
@@ -82,12 +97,20 @@ int DataObjectGenerator::add_symbol_link(const std::string& str) {
   return result;
 }
 
+void DataObjectGenerator::link_word_to_symbol(const std::string& str, int word_idx) {
+  m_symbol_links[str].push_back(word_idx);
+}
+
 void DataObjectGenerator::align(int alignment_words) {
   while (m_words.size() % alignment_words) {
     m_words.push_back(0);
   }
 }
 
+void DataObjectGenerator::align_to_basic() {
+  align(4);
+}
+
 int DataObjectGenerator::words() const {
   return int(m_words.size());
 }
diff --git a/goalc/data_compiler/DataObjectGenerator.h b/goalc/data_compiler/DataObjectGenerator.h
index 3522b9a3b7..97e5b32dd1 100644
--- a/goalc/data_compiler/DataObjectGenerator.h
+++ b/goalc/data_compiler/DataObjectGenerator.h
@@ -8,15 +8,22 @@
 class DataObjectGenerator {
  public:
   int add_word(u32 word);
+  int add_word_float(float f);
+  void set_word(u32 word_idx, u32 val);
   void link_word_to_word(int source, int target, int offset = 0);
   void link_word_to_byte(int source_word, int target_byte);
   int add_ref_to_string_in_pool(const std::string& str);
+  void link_word_to_string_in_pool(const std::string& str, int word_idx);
   int add_type_tag(const std::string& str);
   int add_symbol_link(const std::string& str);
+  void link_word_to_symbol(const std::string& str, int word_idx);
   std::vector<u8> generate_v2();
   std::vector<u8> generate_v4();
   void align(int alignment_words);
+  void align_to_basic();
   int words() const;
+  size_t current_offset_bytes() const { return m_words.size() * sizeof(u32); }
+  u8* data() { return (u8*)m_words.data(); }
 
  private:
   void add_strings();
diff --git a/goalc/make/MakeSystem.cpp b/goalc/make/MakeSystem.cpp
index a59154d44b..21f9c1b8d8 100644
--- a/goalc/make/MakeSystem.cpp
+++ b/goalc/make/MakeSystem.cpp
@@ -66,6 +66,7 @@ MakeSystem::MakeSystem() {
   add_tool<GroupTool>();
   add_tool<TextTool>();
   add_tool<SubtitleTool>();
+  add_tool<BuildLevelTool>();
 }
 
 /*!
diff --git a/goalc/make/Tools.cpp b/goalc/make/Tools.cpp
index d387bb75bf..01194a4ceb 100644
--- a/goalc/make/Tools.cpp
+++ b/goalc/make/Tools.cpp
@@ -1,5 +1,3 @@
-
-
 #include "Tools.h"
 
 #include <filesystem>
@@ -11,6 +9,7 @@
 #include "goalc/data_compiler/dir_tpages.h"
 #include "goalc/data_compiler/game_count.h"
 #include "goalc/data_compiler/game_text_common.h"
+#include "goalc/build_level/build_level.h"
 
 CompilerTool::CompilerTool(Compiler* compiler) : Tool("goalc"), m_compiler(compiler) {}
 
@@ -186,3 +185,20 @@ bool SubtitleTool::run(const ToolInput& task) {
   }
   return true;
 }
+
+BuildLevelTool::BuildLevelTool() : Tool("build-level") {}
+
+bool BuildLevelTool::needs_run(const ToolInput& task) {
+  if (task.input.size() != 1) {
+    throw std::runtime_error(fmt::format("Invalid amount of inputs to {} tool", name()));
+  }
+  auto deps = get_build_level_deps(task.input.at(0));
+  return Tool::needs_run({task.input, deps, task.output, task.arg});
+}
+
+bool BuildLevelTool::run(const ToolInput& task) {
+  if (task.input.size() != 1) {
+    throw std::runtime_error(fmt::format("Invalid amount of inputs to {} tool", name()));
+  }
+  return run_build_level(task.input.at(0), task.output.at(0));
+}
diff --git a/goalc/make/Tools.h b/goalc/make/Tools.h
index e6d61b2b04..966a1820f2 100644
--- a/goalc/make/Tools.h
+++ b/goalc/make/Tools.h
@@ -62,3 +62,10 @@ class SubtitleTool : public Tool {
   bool run(const ToolInput& task) override;
   bool needs_run(const ToolInput& task) override;
 };
+
+class BuildLevelTool : public Tool {
+ public:
+  BuildLevelTool();
+  bool run(const ToolInput& task) override;
+  bool needs_run(const ToolInput& task) override;
+};
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 678f90c5fe..900f43bd2c 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_subdirectory(level_tools)
-add_subdirectory(build_level)
 
 add_executable(dgo_unpacker
         dgo_unpacker.cpp)
diff --git a/tools/build_level/CMakeLists.txt b/tools/build_level/CMakeLists.txt
deleted file mode 100644
index 03bca07db3..0000000000
--- a/tools/build_level/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_executable(build_level
-        main.cpp)
-
-target_link_libraries(build_level common compiler tiny_gltf)
\ No newline at end of file
diff --git a/tools/build_level/main.cpp b/tools/build_level/main.cpp
deleted file mode 100644
index d269c89436..0000000000
--- a/tools/build_level/main.cpp
+++ /dev/null
@@ -1,3 +0,0 @@
-int main() {
-  return 0;
-}
\ No newline at end of file

From c188efff1086ac2081ad24a2b104b2df6e1ca218 Mon Sep 17 00:00:00 2001
From: Brent Hickey <brent.hickey@icloud.com>
Date: Tue, 21 Jun 2022 15:20:32 -0700
Subject: [PATCH 08/17] [game] 150fps fixes (#1503)

* [game] 150fps fixes

* oops

* missed 1
---
 goal_src/engine/draw/drawable.gc |  4 ++--
 goal_src/engine/gfx/eye.gc       | 15 +++++++++++++--
 goal_src/engine/load/loader.gc   |  5 +++++
 goal_src/engine/target/target.gc | 31 ++++++++++++++++++++++++++-----
 4 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/goal_src/engine/draw/drawable.gc b/goal_src/engine/draw/drawable.gc
index 3068e0b9e1..ccf9200421 100644
--- a/goal_src/engine/draw/drawable.gc
+++ b/goal_src/engine/draw/drawable.gc
@@ -1337,9 +1337,9 @@
         ;; this counts actual frames, not seconds. Will count 2 frames if we lag
         ;; When counting frames while running at > 60fps we need to scale it, but always count at least 1 frame
         (+! (-> disp integral-frame-counter) (if (= (-> *setting-control* current video-mode) '150fps)
-                                                 (min 1 (the int (+ 1 (* 0.4 time-ratio))))
+                                                 (max 1 (the int (* 0.4 time-ratio)))
                                                  (if (= (-> *setting-control* current video-mode) '100fps)
-                                                     (min 1 (the int (+ 1 (* 0.6 time-ratio))))
+                                                     (max 1 (the int (* 0.6 time-ratio)))
                                                      (the int time-ratio))))
         ;; this counts actual frames, not doubling for lag. Will count 1 per frame drawn
         (+! (-> disp actual-frame-counter) 1)
diff --git a/goal_src/engine/gfx/eye.gc b/goal_src/engine/gfx/eye.gc
index a331dad64a..bf8a135f23 100644
--- a/goal_src/engine/gfx/eye.gc
+++ b/goal_src/engine/gfx/eye.gc
@@ -832,7 +832,13 @@
           (when (not (paused?))
             (cond
               ((and (>= (-> s5-3 left lid) 0.0) (>= (-> s5-3 right lid) 0.0))
-               (set! (-> s5-3 random-time) (the-as uint 60))
+               (set! (-> s5-3 random-time) (the int (* (if (or (= (-> *setting-control* current video-mode) '150fps)
+                                                               (= (-> *setting-control* current video-mode) '100fps))
+                                                           (if (= (-> *setting-control* current video-mode) '150fps)
+                                                               2.5
+                                                               1.6667)
+                                                           1.0)
+                                                       (the-as uint 60))))
                (set! (-> s5-3 blink) 0.0)
                )
               (else
@@ -841,7 +847,12 @@
                   (when (< v1-54 (the-as uint 10))
                     (set! (-> s5-3 blink) (-> *eye-work* blink-table v1-54))
                     (if (zero? v1-54)
-                        (set! (-> s5-3 random-time) (the-as uint (the int (rand-vu-float-range 60.0 240.0))))
+                        (set! (-> s5-3 random-time) (the-as uint (the int (* (if (or (= (-> *setting-control* current video-mode) '150fps)
+                                                                                     (= (-> *setting-control* current video-mode) '100fps))
+                                                                                 (if (= (-> *setting-control* current video-mode) '150fps)
+                                                                                     2.5
+                                                                                     1.6667)
+                                                                                 1.0) (rand-vu-float-range 60.0 240.0)))))
                         )
                     )
                   )
diff --git a/goal_src/engine/load/loader.gc b/goal_src/engine/load/loader.gc
index f503b02e0f..68b1c635ee 100644
--- a/goal_src/engine/load/loader.gc
+++ b/goal_src/engine/load/loader.gc
@@ -972,6 +972,11 @@
                                         (string= (-> arg0 name) "eichar-ambient-2")
                                         (string= (-> arg0 name) "eichar-ambient-3")
                                         (string= (-> arg0 name) "eichar-ambient-4")
+                                        (string= (-> arg0 name) "fishermans-boat-ride-to-village1-alt")
+                                        (string= (-> arg0 name) "fishermans-boat-ride-to-village1")
+                                        (string= (-> arg0 name) "fishermans-boat-ride-to-misty")
+                                        (string= (-> arg0 name) "gondola-ride-up")
+                                        (string= (-> arg0 name) "gondola-ride-down")
                                         )
                                     )
                                (if (= (-> *setting-control* current video-mode) '150fps) 0.4 0.6)
diff --git a/goal_src/engine/target/target.gc b/goal_src/engine/target/target.gc
index 406a1f0e03..9f655ccbf5 100644
--- a/goal_src/engine/target/target.gc
+++ b/goal_src/engine/target/target.gc
@@ -809,7 +809,12 @@
                  )
                (until (ja-done? 0)
                  (suspend)
-                 (ja :num! (seek! max (/ (* (fmax 20480.0 (-> self control unknown-float01)) (-> *display* seconds-per-frame))
+                 ;; This controls the slow walk animation that occurs after landing from a jump. > 60fps was broken here so I think should not be tied to seconds-per-frame
+                 ;; unknown-float01 is the magnitude of xz velocity
+                 (ja :num! (seek! max (/ (* (fmax 20480.0 (-> self control unknown-float01)) (if (or (= (-> *setting-control* current video-mode) '150fps)
+                                                                                                     (= (-> *setting-control* current video-mode) '100fps))
+                                                                                                 0.016666668
+                                                                                                 (-> *display* seconds-per-frame)))
                                         (/ (-> *TARGET-bank* run-up-cycle-dist) (-> *TARGET-bank* run-cycle-length))
                                         )
                                  )
@@ -879,7 +884,11 @@
             (set! f30-0 (seek
                           f30-0
                           (fmax 0.0 (fmin 1.0 (* 0.000048828126 (+ -16384.0 (-> self control unknown-float01)))))
-                          (* 2.0 (-> *display* seconds-per-frame))
+                          ;; Jaks' walk animation felt sped up in > 60fps. I'm thinking it shouldn't be tied to seconds-per-frame but I'm not positive
+                          (* 2.0 (if (or (= (-> *setting-control* current video-mode) '150fps)
+                                         (= (-> *setting-control* current video-mode) '100fps))
+                                     0.016666668
+                                     (-> *display* seconds-per-frame)))
                           )
                   )
             (let ((v1-317 (-> self skel effect)))
@@ -2394,7 +2403,13 @@
         (seek!
           (-> self control dynam gravity-length)
           (-> self control unknown-dynamics00 gravity-length)
-          (* 245760.0 (-> *display* seconds-per-frame))
+          (* 245760.0 (if (or (= (-> *setting-control* current video-mode) '150fps)
+                              (= (-> *setting-control* current video-mode) '100fps)
+                              )
+                          0.016666668
+                          (-> *display* seconds-per-frame)
+                          )
+             )
           )
         )
     (when (and (>= (- (-> *display* base-frame-counter) (-> self state-time)) (seconds 0.05))
@@ -2726,9 +2741,15 @@
               (lambda ((arg0 target)) (let ((f0-3 (seek
                                                     (-> arg0 control root-prim local-sphere w)
                                                     (the-as float 28672.0)
-                                                    (* 286720.0 (-> *display* seconds-per-frame))
+                                                    (* 286720.0 0 (if (or (= (-> *setting-control* current video-mode) '150fps)
+                                                                          (= (-> *setting-control* current video-mode) '100fps)
+                                                                          )
+                                                                      0.016666668
+                                                                      (-> *display* seconds-per-frame)
+                                                                      )
+                                                       )
                                                     )
-                                                  )
+                                              )
                                             )
                                         (set! (-> arg0 control root-prim local-sphere w) f0-3)
                                         f0-3

From 65f47f2bfc9dcd1105d1775ff9a036b80c619174 Mon Sep 17 00:00:00 2001
From: Brent Hickey <brent.hickey@icloud.com>
Date: Tue, 21 Jun 2022 15:21:57 -0700
Subject: [PATCH 09/17] [game] Scale first-person and progress HUD for pc 16:9
 (#1504)

* [game] Scale first-person and progress HUD for pc 16:9

* oops
---
 goal_src/engine/target/target2.gc       | 2 +-
 goal_src/engine/ui/progress/progress.gc | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/goal_src/engine/target/target2.gc b/goal_src/engine/target/target2.gc
index 56c5efc115..4cf2be00f4 100644
--- a/goal_src/engine/target/target2.gc
+++ b/goal_src/engine/target/target2.gc
@@ -224,7 +224,7 @@
     )
   (case (get-aspect-ratio)
     (('aspect4x3)
-     (set! (-> self sides-x-scale) 3.5)
+     (set! (-> self sides-x-scale) (if (= (-> *pc-settings* aspect-custom-x) 16) 5.0 3.5))
      (set! (-> self sides-y-scale) 13.0)
      (set! (-> self x-offset) 0)
      0
diff --git a/goal_src/engine/ui/progress/progress.gc b/goal_src/engine/ui/progress/progress.gc
index 68a50db5c8..787da31d20 100644
--- a/goal_src/engine/ui/progress/progress.gc
+++ b/goal_src/engine/ui/progress/progress.gc
@@ -700,9 +700,9 @@
   (let ((f0-1 (* (1/ METER_LENGTH) (the float (-> obj in-out-position)))))
     (set! (-> obj particles 2 init-pos x) (the float (+ (-> obj right-x-offset) 409 (the int (* 301.5 f0-1)))))
     (set! (-> obj particles 1 init-pos x) (the float (+ (-> obj left-x-offset) 59)))
-    (set! (-> obj left-side-x-scale) (meters (+ (/ 3.5 (-> obj sides-x-scale)) (* 10.0 f0-1))))
+    (set! (-> obj left-side-x-scale) (meters (+ (/ (if (= (-> *pc-settings* aspect-custom-x) 16) 5.0 3.5) (-> obj sides-x-scale)) (* 10.0 f0-1))))
     (set! (-> obj left-side-y-scale) (meters (+ (-> obj sides-y-scale) (* 10.0 f0-1))))
-    (set! (-> obj right-side-x-scale) (meters (+ (/ 6.0 (-> obj sides-x-scale)) (* 4.0 f0-1))))
+    (set! (-> obj right-side-x-scale) (meters (+ (/ (if (= (-> *pc-settings* aspect-custom-x) 16) 8.5 6.0) (-> obj sides-x-scale)) (* 4.0 f0-1))))
     (set! (-> obj right-side-y-scale) (meters (+ (-> obj sides-y-scale) (* 4.0 f0-1))))
     )
   (dotimes (s5-0 (-> obj nb-of-particles))

From 818485b5016502e1b6e32856ca8fdc4074e741b3 Mon Sep 17 00:00:00 2001
From: Tyler Wilding <xTVaser@users.noreply.github.com>
Date: Tue, 21 Jun 2022 18:22:31 -0400
Subject: [PATCH 10/17] paths: ensure `imgui` and `game_config` respects the
 project path (#1505)

* paths: ensure `imgui` respects the project path

* paths: remove extra creation of `game_config/` this is done when the settings are saved
---
 common/custom_data/TFrag3Data.cpp  | 3 ++-
 game/graphics/pipelines/opengl.cpp | 5 +++++
 game/runtime.cpp                   | 2 --
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/common/custom_data/TFrag3Data.cpp b/common/custom_data/TFrag3Data.cpp
index bd36741f69..879ee585da 100644
--- a/common/custom_data/TFrag3Data.cpp
+++ b/common/custom_data/TFrag3Data.cpp
@@ -297,7 +297,8 @@ void MercModelGroup::serialize(Serializer& ser) {
 void Level::serialize(Serializer& ser) {
   ser.from_ptr(&version);
   if (ser.is_loading() && version != TFRAG3_VERSION) {
-    ASSERT_MSG(false, fmt::format("version mismatch when loading tfrag3 data. Got {}, expected {}",
+    ASSERT_MSG(false, fmt::format("version mismatch when loading tfrag3 data. Got {}, expected {}, "
+                                  "did you forget to re-decompile?",
                                   version, TFRAG3_VERSION));
   }
 
diff --git a/game/graphics/pipelines/opengl.cpp b/game/graphics/pipelines/opengl.cpp
index 909b628f5f..584130db69 100644
--- a/game/graphics/pipelines/opengl.cpp
+++ b/game/graphics/pipelines/opengl.cpp
@@ -234,6 +234,11 @@ static std::shared_ptr<GfxDisplay> gl_make_display(int width,
   // this does initialization for stuff like the font data
   ImGui::CreateContext();
 
+  // Init ImGui settings
+  ImGuiIO& io = ImGui::GetIO();
+  io.IniFilename = file_util::get_file_path({"imgui.ini"}).c_str();
+  io.LogFilename = file_util::get_file_path({"imgui_log.txt"}).c_str();
+
   // set up to get inputs for this window
   ImGui_ImplGlfw_InitForOpenGL(window, true);
 
diff --git a/game/runtime.cpp b/game/runtime.cpp
index 659652ed0b..8758933212 100644
--- a/game/runtime.cpp
+++ b/game/runtime.cpp
@@ -282,8 +282,6 @@ RuntimeExitStatus exec_runtime(int argc, char** argv) {
   g_argv = argv;
   g_main_thread_id = std::this_thread::get_id();
 
-  file_util::create_dir_if_needed("game_config/");
-
   // parse opengoal arguments
   bool enable_display = true;
   for (int i = 1; i < argc; i++) {

From 80f4b2d02eb58c6a5af4fe06db256230276ae08f Mon Sep 17 00:00:00 2001
From: Tyler Wilding <xTVaser@users.noreply.github.com>
Date: Tue, 21 Jun 2022 18:23:40 -0400
Subject: [PATCH 11/17] game: remove mystery windows chime sound when starting
 up (#1506)

---
 game/overlord/soundcommon.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/game/overlord/soundcommon.cpp b/game/overlord/soundcommon.cpp
index 40a72785a5..d564fc84ef 100644
--- a/game/overlord/soundcommon.cpp
+++ b/game/overlord/soundcommon.cpp
@@ -1,6 +1,7 @@
 #include "soundcommon.h"
 #include <cstdio>
 #include "common/util/Assert.h"
+#include <string>
 
 // TODO strcpy_toupper
 // TODO atoi
@@ -16,7 +17,9 @@ void ReadBankSoundInfo(SoundBank* bank, SoundBank* unk, s32 unk2) {
 void PrintBankInfo(SoundBank* bank) {
   printf("Bank %s\n\n", bank->name);
   for (u32 i = 0; i < bank->sound_count; i++) {
-    printf("%d : %16s : min %d max %d curve %d\n", i, bank->sound[i].name,
+    // Some characters use the full 16 characters (bonelurker-grunt) and dont have a null terminator
+    std::string name = std::string(bank->sound[i].name, 16);
+    printf("%d : %16s : min %d max %d curve %d\n", i, name.c_str(),
            bank->sound[i].fallof_params & 0x3fff, (bank->sound[i].fallof_params >> 14) & 0x3fff,
            bank->sound[i].fallof_params >> 28);
   }

From d373b08e2f7cf87fec1a134a7e40097ac15083fe Mon Sep 17 00:00:00 2001
From: Hat Kid <6624576+Hat-Kid@users.noreply.github.com>
Date: Wed, 22 Jun 2022 00:26:36 +0200
Subject: [PATCH 12/17] readme: update ubuntu and arch dependencies and small
 fixes (#1507)

- Ubuntu: Added `libpulse-dev` as a dependency for audio support
- Arch: Removed `gcc`, `make` and `g++` as those are all already contained in the `base-devel` group, replaced `taskfile-git` with `go-task` as the former is outdated and added `libpulse` for audio
- Removed the section about the files in `goal_src` being placeholders as the game is mostly complete now.
- Added missing description for `iso_data` contents
- Added `discord-rpc` to the third-party library list
---
 README.md | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 00fa072e30..1d9788fc59 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ This project is to port Jak 1 (NTSC, "black label" version) to PC. Over 98% of t
 - create tools to repack game assets into a format that our port uses.
 
 Our objectives are:
-- make the port a "native application" on x86-64, with high performance. It shouldn't emulated, interpreted, or transpiled.
+- make the port a "native application" on x86-64, with high performance. It shouldn't be emulated, interpreted, or transpiled.
 - Our GOAL compiler's performance should be around the same as unoptimized C.
 - try to match things from the original game and development as possible. For example, the original GOAL compiler supported live modification of code while the game is running, so we do the same, even though it's not required for just porting the game.
 - support modifications. It should be possible to make edits to the code without everything else breaking.
@@ -85,7 +85,7 @@ We don't save any assets from the game - you must bring your own copy of the gam
 Install packages and init repository:
 
 ```sh
-sudo apt install gcc make cmake build-essential g++ nasm clang-format libxrandr-dev libxinerama-dev libxcursor-dev libxi-dev python
+sudo apt install gcc make cmake build-essential g++ nasm clang-format libxrandr-dev libxinerama-dev libxcursor-dev libpulse-dev libxi-dev python
 sudo sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin
 ```
 
@@ -118,10 +118,12 @@ cmake -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_EXE_LINKER_FLAGS="-fuse
 Install packages and init repository:
 
 ```sh
-sudo pacman -S gcc make cmake base-devel g++ nasm python
-yay -S taskfile-git
+sudo pacman -S cmake libpulse base-devel nasm python
+yay -S go-task
 ```
 
+For Arch only, replace `task` with `go-task` in the rest of the instructions.
+
 Compile:
 
 ```sh
@@ -293,7 +295,7 @@ The second component to the project is the decompiler. You must have a copy of t
 
 Then run `decomp.sh` (Linux) or `decomp-jak1.bat` (Windows) to run the decompiler. The decompiler will extract assets to the `assets` folder. These assets will be used by the compiler when building the port, and you may want to turn asset extraction off after running it once. The decompiler will output code and other data intended to be inspected by humans in the `decompiler_out` folder. Stuff in this folder will not be used by the compiler.
 
-The third is the game source code, written in OpenGOAL. This is located in `goal_src`. All GOAL and GOOS code should be in this folder. Right now most of this is placeholders or incomplete, but you can take a look at `kernel/gcommon.gc` or `goal-lib.gc` to see some in-progress source code.
+The third is the game source code, written in OpenGOAL. This is located in `goal_src`. All GOAL and GOOS code should be in this folder.
 
 The final component is the "runtime", located in `game`. This is the part of the game that's written in C++. In the port, that includes:
 - The "C Kernel", which contains the GOAL linker and some low-level GOAL language features. GOAL has a completely custom dynamically linked object file format so in order to load the first GOAL code, you need a linker written in C++. Some low-level functions for memory allocation, communicating with the I/O Processor, symbol table, strings, and the type system are also implemented in C, as these are required for the linker. It also listens for incoming messages from the compiler and passes them to the running game. This also initializes the game, by initializing the PS2 hardware, allocating the GOAL heaps, loading the GOAL kernel off of the DVD, and executing the kernel dispatcher function. This is in the `game/kernel` folder. This should be as close as possible to the game, and all differences should be noted with a comment.
@@ -357,7 +359,7 @@ The final component is the "runtime", located in `game`. This is the part of the
     - `listener`: The OpenGOAL listener, which connects the compiler to a running GOAL program for the interactive REPL.
     - `make`: The OpenGOAL build system, builds both code and data files.
     - `regalloc`: Register allocator.
-- `iso_data`:
+- `iso_data`: Location of the user-provided DVD contents of the game that the decompiler extracts game assets and code from.
 - `out`: Outputs from the build process. Only the `iso` subfolder should contain assets used by the game.
     - `iso`: Final outputs that are used by the game.
     - `obj`: Object files generated by the compiler.
@@ -366,7 +368,8 @@ The final component is the "runtime", located in `game`. This is the part of the
 - `test`: Unit tests (run on GitHub Actions).
 - `third-party`: Third party libraries.
     - CMake Code Coverage. For code coverage statistics on GitHub builds.
-    - `fmt`. String formatting library.
+    - `discord-rpc`: Discord Rich Presence library.
+    - `fmt`: String formatting library.
     - `googletest`: Test framework.
     - `inja`: templating library used for generating test code for compiler tests.
     - `lzokay`: decompression code for Jak 2 and later DGOs.

From e44500dcf202bf0397f38ae823b7a2c932123263 Mon Sep 17 00:00:00 2001
From: Hat Kid <6624576+Hat-Kid@users.noreply.github.com>
Date: Wed, 22 Jun 2022 00:29:23 +0200
Subject: [PATCH 13/17] readme: add fedora build instructions (#1508)

---
 README.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/README.md b/README.md
index 1d9788fc59..259f103ae0 100644
--- a/README.md
+++ b/README.md
@@ -136,6 +136,28 @@ Run tests:
 ./test.sh
 ```
 
+### Fedora
+
+Install packages and init repository:
+
+```sh
+sudo dnf install cmake lld clang nasm libX11-devel libXrandr-devel libXinerama-devel libXcursor-devel libXi-devel pulseaudio-libs-devel
+sudo sh -c "$(curl --location https://taskfile.dev/install.sh)" -- -d -b /usr/local/bin
+```
+
+Compile with `clang`:
+
+```sh
+cmake -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -B build
+cmake --build build -j$(nproc)
+```
+
+Run tests:
+
+```sh
+./test.sh
+```
+
 ## Getting Started - Windows
 
 ### Required Software

From 3b2f9191abc8eab414dfbe5b0093e4e30b35d12d Mon Sep 17 00:00:00 2001
From: Matthew Wells <91291346+richarm4@users.noreply.github.com>
Date: Tue, 21 Jun 2022 15:38:25 -0700
Subject: [PATCH 14/17] Simplified time of day logic (#1487)

Slight optimizations by checking only the upper boundary as we check the lower boundary with the previous if statement(s).
---
 game/discord.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/game/discord.cpp b/game/discord.cpp
index 131df52887..b7f79606c3 100644
--- a/game/discord.cpp
+++ b/game/discord.cpp
@@ -70,9 +70,9 @@ const char* time_of_day_str(float time) {
 
   if (hour >= 0 && hour <= 9)
     return "green-sun";
-  else if (hour >= 10 && hour <= 21)
+  else if (hour < 22)
     return "day";
-  else if (hour >= 22 && hour <= 24)
+  else if (hour < 25)
     return "evening";
   else
     return "";

From 69a3007e9b974c8ccbeb2f5f3001c057842f4244 Mon Sep 17 00:00:00 2001
From: ManDude <7569514+ManDude@users.noreply.github.com>
Date: Wed, 22 Jun 2022 00:21:51 +0100
Subject: [PATCH 15/17] windows graphics fixes (#1512)

* fix blurry jp text

* fix weird interrupt lag from setting window size

* add window lock toggle and update settings ver

* better particle hacks
---
 game/graphics/display.h                         |  5 +----
 game/graphics/gfx.cpp                           |  6 ++++++
 game/graphics/gfx.h                             |  1 +
 game/graphics/pipelines/opengl.cpp              |  5 +++++
 game/graphics/pipelines/opengl.h                |  1 +
 game/kernel/kmachine.cpp                        |  5 +++++
 goal_src/engine/gfx/texture.gc                  |  2 +-
 goal_src/engine/sparticle/sparticle-launcher.gc |  7 ++++---
 goal_src/kernel-defs.gc                         |  1 +
 goal_src/pc/pckernel-h.gc                       |  9 ++++++---
 goal_src/pc/pckernel.gc                         | 10 +++++++++-
 11 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/game/graphics/display.h b/game/graphics/display.h
index f0943b8573..76a36aa538 100644
--- a/game/graphics/display.h
+++ b/game/graphics/display.h
@@ -10,10 +10,6 @@
 #include "common/util/Assert.h"
 #include "gfx.h"
 
-// lol hax
-#define __NYI_DEF \
-  { ASSERT_MSG(false, "nyi"); }
-
 // a GfxDisplay class is equivalent to a window that displays stuff. This holds an actual internal
 // window pointer used by whichever renderer. It also contains functions for setting and
 // retrieving certain window parameters.
@@ -49,6 +45,7 @@ class GfxDisplay {
   virtual void get_size(int* w, int* h) = 0;
   virtual GfxDisplayMode get_fullscreen() = 0;
   virtual void render() = 0;
+  virtual void set_lock(bool lock) = 0;
   bool is_active() const { return get_window() != nullptr; }
   void set_title(const char* title);
   const char* title() const { return m_title; }
diff --git a/game/graphics/gfx.cpp b/game/graphics/gfx.cpp
index 889932c8b8..ceb3018883 100644
--- a/game/graphics/gfx.cpp
+++ b/game/graphics/gfx.cpp
@@ -242,6 +242,12 @@ void set_fullscreen(GfxDisplayMode mode, int screen) {
   }
 }
 
+void set_window_lock(bool lock) {
+  if (Display::GetMainDisplay()) {
+    Display::GetMainDisplay()->set_lock(lock);
+  }
+}
+
 void input_mode_set(u32 enable) {
   if (enable == s7.offset + jak1_symbols::FIX_SYM_TRUE) {  // #t
     Pad::g_input_mode_mapping = g_settings.pad_mapping_info;
diff --git a/game/graphics/gfx.h b/game/graphics/gfx.h
index 5556cd2689..92380474b5 100644
--- a/game/graphics/gfx.h
+++ b/game/graphics/gfx.h
@@ -115,6 +115,7 @@ GfxDisplayMode get_fullscreen();
 void get_screen_size(s64 vmode_idx, s32* w, s32* h, s32* c);
 void set_letterbox(int w, int h);
 void set_fullscreen(GfxDisplayMode mode, int screen);
+void set_window_lock(bool lock);
 void input_mode_set(u32 enable);
 void input_mode_save();
 s64 get_mapped_button(s64 pad, s64 button);
diff --git a/game/graphics/pipelines/opengl.cpp b/game/graphics/pipelines/opengl.cpp
index 584130db69..d532e6536d 100644
--- a/game/graphics/pipelines/opengl.cpp
+++ b/game/graphics/pipelines/opengl.cpp
@@ -135,6 +135,7 @@ static int gl_init(GfxSettings& settings) {
   }
   glfwWindowHint(GLFW_DOUBLEBUFFER, GLFW_TRUE);
   glfwWindowHint(GLFW_SAMPLES, 1);
+  glfwWindowHint(GLFW_RESIZABLE, GLFW_FALSE);
 
   return 0;
 }
@@ -432,6 +433,10 @@ void GLDisplay::get_screen_size(int vmode_idx, s32* w_out, s32* h_out, s32* coun
   }
 }
 
+void GLDisplay::set_lock(bool lock) {
+  glfwSetWindowAttrib(m_window, GLFW_RESIZABLE, lock ? GLFW_TRUE : GLFW_FALSE);
+}
+
 void update_global_profiler() {
   if (g_gfx_data->debug_gui.dump_events) {
     prof().set_enable(false);
diff --git a/game/graphics/pipelines/opengl.h b/game/graphics/pipelines/opengl.h
index 318abc36f5..2dd13f6a81 100644
--- a/game/graphics/pipelines/opengl.h
+++ b/game/graphics/pipelines/opengl.h
@@ -34,6 +34,7 @@ class GLDisplay : public GfxDisplay {
   void set_size(int w, int h);
   void update_fullscreen(GfxDisplayMode mode, int screen);
   void render();
+  void set_lock(bool lock);
 };
 
 extern const GfxRendererModule gRendererOpenGL;
diff --git a/game/kernel/kmachine.cpp b/game/kernel/kmachine.cpp
index 574c55709c..b1d11a8956 100644
--- a/game/kernel/kmachine.cpp
+++ b/game/kernel/kmachine.cpp
@@ -964,6 +964,10 @@ void set_fullscreen(u32 symptr, s64 screen) {
   }
 }
 
+void set_window_lock(u32 symptr) {
+  Gfx::set_window_lock(symptr == s7.offset);
+}
+
 void set_collision(u32 symptr) {
   Gfx::g_global_settings.collision_enable = symptr != s7.offset;
 }
@@ -1012,6 +1016,7 @@ void InitMachine_PCPort() {
   make_function_symbol_from_c("pc-get-screen-size", (void*)get_screen_size);
   make_function_symbol_from_c("pc-set-window-size", (void*)Gfx::set_window_size);
   make_function_symbol_from_c("pc-set-fullscreen", (void*)set_fullscreen);
+  make_function_symbol_from_c("pc-set-window-lock", (void*)set_window_lock);
 
   // graphics things
   make_function_symbol_from_c("pc-set-letterbox", (void*)Gfx::set_letterbox);
diff --git a/goal_src/engine/gfx/texture.gc b/goal_src/engine/gfx/texture.gc
index 83b85241d6..b7be88434b 100644
--- a/goal_src/engine/gfx/texture.gc
+++ b/goal_src/engine/gfx/texture.gc
@@ -1960,7 +1960,7 @@
            )
           )
         (let ((font-tx-2 (lookup-texture-by-id (new 'static 'texture-id :index #x2 :page #x4fe)))
-             (font-tx-2-dest #xe0000)
+             (font-tx-2-dest #xe6000)
              (font-tx-2-fmt (gs-psm mt4hh))
              )
           (texture-relocate dma-buff font-tx-2 font-tx-2-dest (the-as gs-psm font-tx-2-fmt) font-clut)
diff --git a/goal_src/engine/sparticle/sparticle-launcher.gc b/goal_src/engine/sparticle/sparticle-launcher.gc
index 118d38ba56..9425466057 100644
--- a/goal_src/engine/sparticle/sparticle-launcher.gc
+++ b/goal_src/engine/sparticle/sparticle-launcher.gc
@@ -938,7 +938,8 @@
           (if (-> *pc-settings* ps2-parts?)
               ;; pc port : launchers have larger bsphere if you have ps2 parts off
               (sphere-in-view-frustum? (the-as sphere gp-1))
-              (sphere-in-view-frustum? (the-as sphere (let ((bsph (new-stack-vector0))) (vector-copy! bsph gp-1) (*! (-> bsph w) 4.0) bsph))))
+              (sphere-in-view-frustum? (the-as sphere (begin (*! (-> gp-1 w) 8.0) gp-1)))
+              )
           )
       )
      )
@@ -990,10 +991,10 @@
           (set! f30-0 0.0)
           )
       
-      ;; pc hack for more particles.
+      ;; if we have ps2 particles off, say we're at the camera
       (with-pc
         (if (not (-> *pc-settings* ps2-parts?))
-            (/! f30-0 256.0)))
+            (set! f30-0 0.0)))
       
       ;; loop over particles in the group.
       (let ((s2-1 (-> obj length)))
diff --git a/goal_src/kernel-defs.gc b/goal_src/kernel-defs.gc
index b5da9d5879..9294b3b37e 100644
--- a/goal_src/kernel-defs.gc
+++ b/goal_src/kernel-defs.gc
@@ -353,6 +353,7 @@
 (define-extern pc-mkdir-file-path (function string none))
 (define-extern pc-sound-set-flava-hack (function int none))
 (define-extern pc-sound-set-fade-hack (function int none))
+(define-extern pc-set-window-lock (function symbol none))
 
 (defenum pc-prof-event
   (begin 0)
diff --git a/goal_src/pc/pckernel-h.gc b/goal_src/pc/pckernel-h.gc
index 9e9f1b5aa1..0435de59e8 100644
--- a/goal_src/pc/pckernel-h.gc
+++ b/goal_src/pc/pckernel-h.gc
@@ -31,7 +31,7 @@
 (defglobalconstant PC_KERNEL_VERSION_BUILD #x0001)
 (defglobalconstant PC_KERNEL_VERSION_REVISION #x0005)
 
-(defglobalconstant PC_KERNEL_VERSION_MINOR #x0004)
+(defglobalconstant PC_KERNEL_VERSION_MINOR #x0005)
 (defglobalconstant PC_KERNEL_VERSION_MAJOR #x0001)
 (defglobalconstant PC_KERNEL_VERSION (logior
                                         (ash PC_KERNEL_VERSION_MAJOR 48)
@@ -213,6 +213,7 @@
    (letterbox? symbol) ;; letterbox. #f = stretched
    (vsync? symbol) ;; vsync.
    (font-scale float) ;; font scaling.
+   (window-lock? symbol) ;; whether the window can be resized by the user or not.
 
    ;; debug settings
    (os symbol) ;; windows, linux, macos
@@ -320,6 +321,7 @@
     (set-size! (_type_ int int) none)
     (set-aspect! (_type_ int int) none)
     (set-aspect-ratio! (_type_ float) none)
+    (set-window-lock! (_type_ symbol) symbol)
     (read-from-file (_type_ string) symbol)
     (write-to-file (_type_ string) symbol)
     (update-cheats (_type_) int)
@@ -389,9 +391,10 @@
   (set! (-> obj mood-override?) #f)
   (set! (-> obj movie?) #f)
   (set! (-> obj font-scale) 1.0)
-  (set! (-> obj aspect-custom-x) 1)
-  (set! (-> obj aspect-custom-y) 1)
+  (set! (-> obj aspect-custom-x) 4)
+  (set! (-> obj aspect-custom-y) 3)
   (set! (-> obj discord-rpc?) #t)
+  (set! (-> obj window-lock?) #t)
 
   (reset-gfx obj)
   (reset-audio obj)
diff --git a/goal_src/pc/pckernel.gc b/goal_src/pc/pckernel.gc
index 455469d9d5..431d0af1ff 100644
--- a/goal_src/pc/pckernel.gc
+++ b/goal_src/pc/pckernel.gc
@@ -91,6 +91,11 @@
   (set! (-> obj aspect-ratio-reciprocal) (/ ASPECT_4X3 aspect))
   (none))
 
+(defmethod set-window-lock! pc-settings ((obj pc-settings) (lock symbol))
+  "set the aspect ratio used for rendering."
+  (pc-set-window-lock lock)
+  (set! (-> obj window-lock?) lock))
+
 (defmethod commit-to-file pc-settings ((obj pc-settings))
   "commits the current settings to the file"
   ;; auto load settings if available
@@ -156,7 +161,10 @@
     )
 
   (pc-set-fullscreen (-> obj display-mode) 0)
-  (if (= 'windowed (-> obj display-mode))
+  (if (and (= 'windowed (-> obj display-mode))
+           (-> obj window-lock?)
+           (or (!= (-> obj win-width) (-> obj real-width))
+               (!= (-> obj win-height) (-> obj real-height))))
       (pc-set-window-size (max 320 (-> obj win-width)) (max 240 (-> obj win-height))))
 
   (pc-discord-rpc-set (if (-> obj discord-rpc?) 1 0))

From 8d8c6ccf392de8a323c596b96a722e9eeed41f64 Mon Sep 17 00:00:00 2001
From: ManDude <7569514+ManDude@users.noreply.github.com>
Date: Wed, 22 Jun 2022 01:11:57 +0100
Subject: [PATCH 16/17] fix bad resolutions being picked in windowed mode
 (#1513)

fix bad resolutions being picked in windowed
---
 game/graphics/pipelines/opengl.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/game/graphics/pipelines/opengl.cpp b/game/graphics/pipelines/opengl.cpp
index d532e6536d..04e551236a 100644
--- a/game/graphics/pipelines/opengl.cpp
+++ b/game/graphics/pipelines/opengl.cpp
@@ -412,13 +412,15 @@ GfxDisplayMode GLDisplay::get_fullscreen() {
 void GLDisplay::get_screen_size(int vmode_idx, s32* w_out, s32* h_out, s32* count_out) {
   int count = 0;
   auto vmode = glfwGetVideoMode(glfwGetPrimaryMonitor());
-  auto vmodes = glfwGetVideoModes(glfwGetPrimaryMonitor(), &count);
-  if (vmode_idx >= 0) {
-    vmode = &vmodes[vmode_idx];
-  } else {
-    for (int i = 0; i < count; ++i) {
-      if (!vmode || vmode->height < vmodes[i].height) {
-        vmode = &vmodes[i];
+  if (get_fullscreen() == GfxDisplayMode::Fullscreen) {
+    auto vmodes = glfwGetVideoModes(glfwGetPrimaryMonitor(), &count);
+    if (vmode_idx >= 0) {
+      vmode = &vmodes[vmode_idx];
+    } else {
+      for (int i = 0; i < count; ++i) {
+        if (!vmode || vmode->height < vmodes[i].height) {
+          vmode = &vmodes[i];
+        }
       }
     }
   }

From 78b6ba3d279901cc657524270234e11ac7f835ec Mon Sep 17 00:00:00 2001
From: ManDude <7569514+ManDude@users.noreply.github.com>
Date: Wed, 22 Jun 2022 01:45:45 +0100
Subject: [PATCH 17/17] add `xdelta3` library for binary file patching (#1514)

---
 common/util/xdelta3_util.h                    |   12 +
 third-party/xdelta3/.gitignore                |   32 +
 third-party/xdelta3/README.md                 |   24 +
 third-party/xdelta3/xdelta3/LICENSE           |  176 +
 third-party/xdelta3/xdelta3/Makefile.am       |  192 +
 third-party/xdelta3/xdelta3/README.md         |   37 +
 third-party/xdelta3/xdelta3/badcopy.c         |  158 +
 third-party/xdelta3/xdelta3/configure.ac      |   51 +
 .../xdelta3/xdelta3/cpp-btree/CMakeLists.txt  |   40 +
 third-party/xdelta3/xdelta3/cpp-btree/COPYING |  202 +
 third-party/xdelta3/xdelta3/cpp-btree/README  |   31 +
 third-party/xdelta3/xdelta3/cpp-btree/btree.h | 2394 ++++++++
 .../xdelta3/xdelta3/cpp-btree/btree_bench.cc  |  593 ++
 .../xdelta3/cpp-btree/btree_container.h       |  349 ++
 .../xdelta3/xdelta3/cpp-btree/btree_map.h     |  130 +
 .../xdelta3/xdelta3/cpp-btree/btree_set.h     |  121 +
 .../xdelta3/xdelta3/cpp-btree/btree_test.cc   |  270 +
 .../xdelta3/xdelta3/cpp-btree/btree_test.h    |  940 ++++
 .../xdelta3/cpp-btree/btree_test_flags.cc     |   20 +
 .../xdelta3/xdelta3/cpp-btree/safe_btree.h    |  395 ++
 .../xdelta3/cpp-btree/safe_btree_map.h        |   89 +
 .../xdelta3/cpp-btree/safe_btree_set.h        |   88 +
 .../xdelta3/cpp-btree/safe_btree_test.cc      |  116 +
 .../xdelta3/xdelta3/draft-korn-vcdiff.txt     | 1322 +++++
 .../xdelta3/xdelta3/examples/README.md        |    8 +
 .../xdelta3/xdelta3/examples/compare_test.c   |  138 +
 .../xdelta3/examples/encode_decode_test.c     |  203 +
 .../project.pbxproj                           |  389 ++
 .../xdelta3-ios-test/Xd3iOSAppDelegate.h      |   23 +
 .../xdelta3-ios-test/Xd3iOSAppDelegate.m      |   68 +
 .../xdelta3-ios-test/Xd3iOSViewController.h   |   28 +
 .../xdelta3-ios-test/Xd3iOSViewController.m   |  177 +
 .../en.lproj/InfoPlist.strings                |    2 +
 .../en.lproj/MainStoryboard_iPad.storyboard   |   77 +
 .../en.lproj/MainStoryboard_iPhone.storyboard |   27 +
 .../xdelta3-ios-test/xdelta3-ios-test/main.m  |   25 +
 .../xdelta3-ios-test-Info.plist               |   52 +
 .../xdelta3-ios-test-Prefix.pch               |   14 +
 .../xdelta3/examples/small_page_test.c        |  215 +
 .../xdelta3/xdelta3/examples/speed_test.c     |   87 +
 third-party/xdelta3/xdelta3/examples/test.h   |   56 +
 .../xdelta3/xdelta3/generate_build_files.sh   |    8 +
 third-party/xdelta3/xdelta3/go/src/regtest.go |  274 +
 .../xdelta3/xdelta3/go/src/xdelta/rstream.go  |   71 +
 .../xdelta3/xdelta3/go/src/xdelta/run.go      |   71 +
 .../xdelta3/xdelta3/go/src/xdelta/test.go     |  164 +
 .../xdelta3/xdelta3/go/src/xdelta/tgroup.go   |   97 +
 third-party/xdelta3/xdelta3/linkxd3lib.c      |   42 +
 .../m4/ax_check_aligned_access_required.m4    |   84 +
 third-party/xdelta3/xdelta3/m4/ax_pkg_swig.m4 |  135 +
 .../xdelta3/xdelta3/m4/ax_python_devel.m4     |  325 ++
 .../xdelta3/xdelta3/m4/ax_swig_python.m4      |   64 +
 third-party/xdelta3/xdelta3/plot.sh           |   25 +
 third-party/xdelta3/xdelta3/rcs_junk.cc       | 1861 +++++++
 third-party/xdelta3/xdelta3/run_release.sh    |  288 +
 .../xdelta3/xdelta3/testing/checksum_test.cc  |  770 +++
 .../xdelta3/xdelta3/testing/checksum_test_c.c |  189 +
 third-party/xdelta3/xdelta3/testing/cmp.h     |   67 +
 third-party/xdelta3/xdelta3/testing/delta.h   |   87 +
 third-party/xdelta3/xdelta3/testing/file.h    |  399 ++
 third-party/xdelta3/xdelta3/testing/modify.h  |  400 ++
 third-party/xdelta3/xdelta3/testing/random.h  |  157 +
 .../xdelta3/xdelta3/testing/regtest.cc        | 1321 +++++
 .../xdelta3/xdelta3/testing/regtest_c.c       |   17 +
 .../xdelta3/xdelta3/testing/run_release.sh    |    2 +
 third-party/xdelta3/xdelta3/testing/segment.h |  112 +
 third-party/xdelta3/xdelta3/testing/sizes.h   |  126 +
 third-party/xdelta3/xdelta3/testing/test.h    |   84 +
 .../xdelta3/testing/xdelta3-regtest.py        | 1264 +++++
 .../xdelta3/xdelta3/testing/xdelta3-test.py   |  153 +
 .../xdelta3/xdelta3/xdelta3-blkcache.h        |  557 ++
 third-party/xdelta3/xdelta3/xdelta3-cfgs.h    |  171 +
 third-party/xdelta3/xdelta3/xdelta3-decode.h  | 1219 +++++
 third-party/xdelta3/xdelta3/xdelta3-djw.h     | 1835 +++++++
 third-party/xdelta3/xdelta3/xdelta3-fgk.h     |  857 +++
 third-party/xdelta3/xdelta3/xdelta3-hash.h    |  159 +
 .../xdelta3/xdelta3/xdelta3-internal.h        |  385 ++
 third-party/xdelta3/xdelta3/xdelta3-list.h    |  127 +
 third-party/xdelta3/xdelta3/xdelta3-lzma.h    |  195 +
 third-party/xdelta3/xdelta3/xdelta3-main.h    | 4062 ++++++++++++++
 third-party/xdelta3/xdelta3/xdelta3-merge.h   |  583 ++
 third-party/xdelta3/xdelta3/xdelta3-second.h  |  321 ++
 third-party/xdelta3/xdelta3/xdelta3-test.h    | 3022 +++++++++++
 third-party/xdelta3/xdelta3/xdelta3.1         |  153 +
 third-party/xdelta3/xdelta3/xdelta3.c         | 4819 +++++++++++++++++
 third-party/xdelta3/xdelta3/xdelta3.h         | 1476 +++++
 third-party/xdelta3/xdelta3/xdelta3.i         |   85 +
 third-party/xdelta3/xdelta3/xdelta3.vcxproj   |  344 ++
 third-party/xdelta3/xdelta3/xdelta3.wxi       |    7 +
 third-party/xdelta3/xdelta3/xdelta3.wxs       |  131 +
 vendor.yaml                                   |    2 +
 91 files changed, 38538 insertions(+)
 create mode 100644 common/util/xdelta3_util.h
 create mode 100644 third-party/xdelta3/.gitignore
 create mode 100644 third-party/xdelta3/README.md
 create mode 100644 third-party/xdelta3/xdelta3/LICENSE
 create mode 100644 third-party/xdelta3/xdelta3/Makefile.am
 create mode 100644 third-party/xdelta3/xdelta3/README.md
 create mode 100644 third-party/xdelta3/xdelta3/badcopy.c
 create mode 100644 third-party/xdelta3/xdelta3/configure.ac
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/CMakeLists.txt
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/COPYING
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/README
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/btree.h
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/btree_bench.cc
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/btree_container.h
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/btree_map.h
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/btree_set.h
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/btree_test.cc
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/btree_test.h
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/btree_test_flags.cc
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/safe_btree.h
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/safe_btree_map.h
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/safe_btree_set.h
 create mode 100644 third-party/xdelta3/xdelta3/cpp-btree/safe_btree_test.cc
 create mode 100644 third-party/xdelta3/xdelta3/draft-korn-vcdiff.txt
 create mode 100644 third-party/xdelta3/xdelta3/examples/README.md
 create mode 100644 third-party/xdelta3/xdelta3/examples/compare_test.c
 create mode 100644 third-party/xdelta3/xdelta3/examples/encode_decode_test.c
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist
 create mode 100644 third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch
 create mode 100644 third-party/xdelta3/xdelta3/examples/small_page_test.c
 create mode 100644 third-party/xdelta3/xdelta3/examples/speed_test.c
 create mode 100644 third-party/xdelta3/xdelta3/examples/test.h
 create mode 100644 third-party/xdelta3/xdelta3/generate_build_files.sh
 create mode 100644 third-party/xdelta3/xdelta3/go/src/regtest.go
 create mode 100644 third-party/xdelta3/xdelta3/go/src/xdelta/rstream.go
 create mode 100644 third-party/xdelta3/xdelta3/go/src/xdelta/run.go
 create mode 100644 third-party/xdelta3/xdelta3/go/src/xdelta/test.go
 create mode 100644 third-party/xdelta3/xdelta3/go/src/xdelta/tgroup.go
 create mode 100644 third-party/xdelta3/xdelta3/linkxd3lib.c
 create mode 100644 third-party/xdelta3/xdelta3/m4/ax_check_aligned_access_required.m4
 create mode 100644 third-party/xdelta3/xdelta3/m4/ax_pkg_swig.m4
 create mode 100644 third-party/xdelta3/xdelta3/m4/ax_python_devel.m4
 create mode 100644 third-party/xdelta3/xdelta3/m4/ax_swig_python.m4
 create mode 100644 third-party/xdelta3/xdelta3/plot.sh
 create mode 100644 third-party/xdelta3/xdelta3/rcs_junk.cc
 create mode 100644 third-party/xdelta3/xdelta3/run_release.sh
 create mode 100644 third-party/xdelta3/xdelta3/testing/checksum_test.cc
 create mode 100644 third-party/xdelta3/xdelta3/testing/checksum_test_c.c
 create mode 100644 third-party/xdelta3/xdelta3/testing/cmp.h
 create mode 100644 third-party/xdelta3/xdelta3/testing/delta.h
 create mode 100644 third-party/xdelta3/xdelta3/testing/file.h
 create mode 100644 third-party/xdelta3/xdelta3/testing/modify.h
 create mode 100644 third-party/xdelta3/xdelta3/testing/random.h
 create mode 100644 third-party/xdelta3/xdelta3/testing/regtest.cc
 create mode 100644 third-party/xdelta3/xdelta3/testing/regtest_c.c
 create mode 100644 third-party/xdelta3/xdelta3/testing/run_release.sh
 create mode 100644 third-party/xdelta3/xdelta3/testing/segment.h
 create mode 100644 third-party/xdelta3/xdelta3/testing/sizes.h
 create mode 100644 third-party/xdelta3/xdelta3/testing/test.h
 create mode 100644 third-party/xdelta3/xdelta3/testing/xdelta3-regtest.py
 create mode 100644 third-party/xdelta3/xdelta3/testing/xdelta3-test.py
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-blkcache.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-cfgs.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-decode.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-djw.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-fgk.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-hash.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-internal.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-list.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-lzma.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-main.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-merge.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-second.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3-test.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3.1
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3.c
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3.h
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3.i
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3.vcxproj
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3.wxi
 create mode 100644 third-party/xdelta3/xdelta3/xdelta3.wxs

diff --git a/common/util/xdelta3_util.h b/common/util/xdelta3_util.h
new file mode 100644
index 0000000000..4f15fed520
--- /dev/null
+++ b/common/util/xdelta3_util.h
@@ -0,0 +1,12 @@
+#pragma once
+
+/*!
+ * @file xdelta3.h
+ * Interface around including xdelta3.
+ * C library weirdness.
+ */
+
+#define SIZEOF_SIZE_T 8
+#define SIZEOF_UNSIGNED_LONG_LONG 8
+#include "third-party/xdelta3/xdelta3/xdelta3.h"
+#include "third-party/xdelta3/xdelta3/xdelta3.c"
diff --git a/third-party/xdelta3/.gitignore b/third-party/xdelta3/.gitignore
new file mode 100644
index 0000000000..a5b59268a1
--- /dev/null
+++ b/third-party/xdelta3/.gitignore
@@ -0,0 +1,32 @@
+*.o
+*~
+.deps
+.dirstamp
+INSTALL
+Makefile
+Makefile.in
+aclocal.m4
+autom4te.cache
+build
+compile
+config.guess
+config.h
+config.h.in
+config.log
+config.status
+config.sub
+config.sub
+configure
+depcomp
+libtool
+libtool.m4
+ltmain.sh
+ltoptions.m4
+ltsugar.m4
+ltversion.m4
+lt~obsolete.m4
+missing
+stamp-h1
+xdelta3/xdelta3
+xdelta3decode
+xdelta3regtest
diff --git a/third-party/xdelta3/README.md b/third-party/xdelta3/README.md
new file mode 100644
index 0000000000..02ee0d538d
--- /dev/null
+++ b/third-party/xdelta3/README.md
@@ -0,0 +1,24 @@
+# Xdelta
+
+Xdelta version 3 is a C library and command-line tool for delta
+compression using VCDIFF/RFC 3284 streams.
+
+# License
+
+This repository contains branches of Xdelta 3.x that were
+re-licensed by the original author under the [Apache Public
+License version 2.0](http://www.apache.org/licenses/LICENSE-2.0),
+namely:
+
+- __release3_0_apl__ Change to APL based on 3.0.11 sources
+- __release3_1_apl__ Merges release3_0_apl with 3.1.0 sources
+
+The original GPL licensed Xdelta lives at http://github.com/jmacd/xdelta-gpl.
+
+# Documentation
+
+See the [command-line usage](https://github.com/jmacd/xdelta/blob/wiki/CommandLineSyntax.md).  See [wiki directory](https://github.com/jmacd/xdelta/tree/wiki).
+
+
+
+
diff --git a/third-party/xdelta3/xdelta3/LICENSE b/third-party/xdelta3/xdelta3/LICENSE
new file mode 100644
index 0000000000..7a774156a6
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/LICENSE
@@ -0,0 +1,176 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction,
+and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by
+the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all
+other entities that control, are controlled by, or are under common
+control with that entity. For the purposes of this definition,
+"control" means (i) the power, direct or indirect, to cause the
+direction or management of such entity, whether by contract or
+otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity
+exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation
+source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical
+transformation or translation of a Source form, including but
+not limited to compiled object code, generated documentation,
+and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or
+Object form, made available under the License, as indicated by a
+copyright notice that is included in or attached to the work
+(an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object
+form, that is based on (or derived from) the Work and for which the
+editorial revisions, annotations, elaborations, or other modifications
+represent, as a whole, an original work of authorship. For the purposes
+of this License, Derivative Works shall not include works that remain
+separable from, or merely link (or bind by name) to the interfaces of,
+the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including
+the original version of the Work and any modifications or additions
+to that Work or Derivative Works thereof, that is intentionally
+submitted to Licensor for inclusion in the Work by the copyright owner
+or by an individual or Legal Entity authorized to submit on behalf of
+the copyright owner. For the purposes of this definition, "submitted"
+means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems,
+and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but
+excluding communication that is conspicuously marked or otherwise
+designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity
+on behalf of whom a Contribution has been received by Licensor and
+subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the
+Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+(except as stated in this section) patent license to make, have made,
+use, offer to sell, sell, import, and otherwise transfer the Work,
+where such license applies only to those patent claims licensable
+by such Contributor that are necessarily infringed by their
+Contribution(s) alone or by combination of their Contribution(s)
+with the Work to which such Contribution(s) was submitted. If You
+institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work
+or a Contribution incorporated within the Work constitutes direct
+or contributory patent infringement, then any patent licenses
+granted to You under this License for that Work shall terminate
+as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+Work or Derivative Works thereof in any medium, with or without
+modifications, and in Source or Object form, provided that You
+meet the following conditions:
+
+(a) You must give any other recipients of the Work or
+Derivative Works a copy of this License; and
+
+(b) You must cause any modified files to carry prominent notices
+stating that You changed the files; and
+
+(c) You must retain, in the Source form of any Derivative Works
+that You distribute, all copyright, patent, trademark, and
+attribution notices from the Source form of the Work,
+excluding those notices that do not pertain to any part of
+the Derivative Works; and
+
+(d) If the Work includes a "NOTICE" text file as part of its
+distribution, then any Derivative Works that You distribute must
+include a readable copy of the attribution notices contained
+within such NOTICE file, excluding those notices that do not
+pertain to any part of the Derivative Works, in at least one
+of the following places: within a NOTICE text file distributed
+as part of the Derivative Works; within the Source form or
+documentation, if provided along with the Derivative Works; or,
+within a display generated by the Derivative Works, if and
+wherever such third-party notices normally appear. The contents
+of the NOTICE file are for informational purposes only and
+do not modify the License. You may add Your own attribution
+notices within Derivative Works that You distribute, alongside
+or as an addendum to the NOTICE text from the Work, provided
+that such additional attribution notices cannot be construed
+as modifying the License.
+
+You may add Your own copyright statement to Your modifications and
+may provide additional or different license terms and conditions
+for use, reproduction, or distribution of Your modifications, or
+for any such Derivative Works as a whole, provided Your use,
+reproduction, and distribution of the Work otherwise complies with
+the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+any Contribution intentionally submitted for inclusion in the Work
+by You to the Licensor shall be under the terms and conditions of
+this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify
+the terms of any separate license agreement you may have executed
+with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+names, trademarks, service marks, or product names of the Licensor,
+except as required for reasonable and customary use in describing the
+origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+agreed to in writing, Licensor provides the Work (and each
+Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+implied, including, without limitation, any warranties or conditions
+of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any
+risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+whether in tort (including negligence), contract, or otherwise,
+unless required by applicable law (such as deliberate and grossly
+negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special,
+incidental, or consequential damages of any character arising as a
+result of this License or out of the use or inability to use the
+Work (including but not limited to damages for loss of goodwill,
+work stoppage, computer failure or malfunction, or any and all
+other commercial damages or losses), even if such Contributor
+has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+the Work or Derivative Works thereof, You may choose to offer,
+and charge a fee for, acceptance of support, warranty, indemnity,
+or other liability obligations and/or rights consistent with this
+License. However, in accepting such obligations, You may act only
+on Your own behalf and on Your sole responsibility, not on behalf
+of any other Contributor, and only if You agree to indemnify,
+defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason
+of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/third-party/xdelta3/xdelta3/Makefile.am b/third-party/xdelta3/xdelta3/Makefile.am
new file mode 100644
index 0000000000..2d9f6db169
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/Makefile.am
@@ -0,0 +1,192 @@
+ACLOCAL_AMFLAGS = -I m4
+AUTOMAKE_OPTIONS = subdir-objects
+
+bin_PROGRAMS = xdelta3
+noinst_PROGRAMS = xdelta3regtest xdelta3decode xdelta3checksum
+
+export AFL_HARDEN
+
+common_SOURCES = \
+	  xdelta3-blkcache.h \
+	  xdelta3-decode.h \
+	  xdelta3-djw.h \
+	  xdelta3-fgk.h \
+	  xdelta3-hash.h \
+	  xdelta3-internal.h \
+	  xdelta3-list.h \
+	  xdelta3-lzma.h \
+	  xdelta3-main.h \
+	  xdelta3-merge.h \
+	  xdelta3-second.h \
+	  xdelta3-test.h \
+          xdelta3-cfgs.h \
+	  xdelta3.h
+
+xdelta3_SOURCES = $(common_SOURCES) xdelta3.c
+
+xdelta3decode_SOURCES = $(common_SOURCES) xdelta3.c
+
+xdelta3regtest_SOURCES = $(common_SOURCES) \
+	testing/cmp.h \
+	testing/delta.h \
+	testing/file.h \
+	testing/modify.h \
+	testing/random.h \
+	testing/regtest.cc \
+	testing/regtest_c.c \
+	testing/segment.h \
+	testing/sizes.h \
+	testing/test.h
+
+xdelta3checksum_SOURCES = $(common_SOURCES) \
+	testing/checksum_test.cc \
+	testing/checksum_test_c.c
+
+# These sources constitute a regression test written in Go, that is
+# not automatically built or run.  Install Go-1.5.x or later, add
+# `pwd`/go in $GOPATH, and (cd go/src && go run regtest.go).
+# TODO(jmacd): replace hard-coded path names in regtest.go w/ flags.
+GOLANG_SRCS = \
+	go/src/xdelta/test.go \
+	go/src/xdelta/rstream.go \
+	go/src/xdelta/tgroup.go \
+	go/src/xdelta/run.go \
+	go/src/regtest.go
+
+# Note: for extra sanity checks, enable -Wconversion. Note there
+# are a lot of false positives.
+WFLAGS = -Wall -Wshadow -fno-builtin -Wextra -Wsign-compare \
+	 -Wformat=2 -Wno-format-nonliteral \
+	 -Wno-unused-parameter -Wno-unused-function
+
+ # -Weverything \
+ # -Wc++11-compat-reserved-user-defined-literal \
+ # -Wno-padded \
+ # -Wno-format-nonliteral \
+ # -Wno-cast-align \
+ # -Wno-unused-parameter \
+ # -Wno-sign-conversion \
+ # -Wno-conversion \
+ # -Wno-switch-enum \
+ # -Wno-covered-switch-default \
+ # -Wno-disabled-macro-expansion \
+ # -Wno-variadic-macros \
+ # -Wno-c++98-compat-pedantic
+
+C_WFLAGS = $(WFLAGS) -std=c99
+CXX_WFLAGS = $(WFLAGS) -std=c++11
+
+common_CFLAGS = \
+	      -DREGRESSION_TEST=1 \
+	      -DSECONDARY_DJW=1 \
+	      -DSECONDARY_FGK=1 \
+	      -DXD3_MAIN=1
+
+if DEBUG_SYMBOLS
+  common_CFLAGS += -g
+endif
+
+#common_CFLAGS += -fsanitize=address -fno-omit-frame-pointer
+#common_CFLAGS += -O2
+
+# For additional debugging, add -DXD3_DEBUG=1, 2, 3, ...
+xdelta3_CFLAGS = $(C_WFLAGS) $(common_CFLAGS) -DXD3_DEBUG=0
+xdelta3_LDADD = -lm
+
+xdelta3decode_CFLAGS = \
+	$(C_WFLAGS) \
+	-DREGRESSION_TEST=0 \
+	-DSECONDARY_DJW=0 \
+	-DSECONDARY_FGK=0 \
+	-DSECONDARY_LZMA=0 \
+	-DXD3_MAIN=1 \
+	-DXD3_ENCODER=0 \
+	-DXD3_STDIO=1 \
+	-DEXTERNAL_COMPRESSION=0 \
+	-DVCDIFF_TOOLS=0
+
+xdelta3regtest_CXXFLAGS = \
+	$(CXX_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_DEBUG=1
+xdelta3regtest_CFLAGS = \
+	$(C_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_DEBUG=1
+xdelta3regtest_LDADD = -lm
+
+xdelta3checksum_CXXFLAGS = \
+	$(CXX_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_MAIN=1 -std=c++11
+xdelta3checksum_CFLAGS = \
+	$(C_WFLAGS) $(common_CFLAGS) -DNOT_MAIN=1 -DXD3_MAIN=1
+
+
+man1_MANS = xdelta3.1
+
+EXTRA_DIST = \
+	README.md \
+	run_release.sh \
+	draft-korn-vcdiff.txt \
+	$(GOLANG_SRCS) \
+	examples/Makefile \
+	examples/README.md \
+	examples/compare_test.c \
+	examples/encode_decode_test.c \
+	examples/small_page_test.c \
+	examples/speed_test.c \
+	examples/test.h \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1.bin \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v1_to_v2.bin \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/file_v2.bin \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist \
+	examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch \
+	cpp-btree/CMakeLists.txt \
+	cpp-btree/COPYING \
+	cpp-btree/README \
+	cpp-btree/btree.h \
+	cpp-btree/btree_bench.cc \
+	cpp-btree/btree_container.h \
+	cpp-btree/btree_map.h \
+	cpp-btree/btree_set.h \
+	cpp-btree/btree_test.cc \
+	cpp-btree/btree_test.h \
+	cpp-btree/btree_test_flags.cc \
+	cpp-btree/safe_btree.h \
+	cpp-btree/safe_btree_map.h \
+	cpp-btree/safe_btree_set.h \
+	cpp-btree/safe_btree_test.cc \
+	testing/xdelta3-regtest.py \
+	testing/xdelta3-test.py \
+	xdelta3.1 \
+	xdelta3.i \
+	xdelta3.vcxproj \
+	xdelta3.wxi \
+	xdelta3.wxs
+
+# Broken, removed from distribution:
+#	xdelta3_pywrap.c
+#	xdelta3.py
+
+#PYFILES = xdelta3_pywrap.c xdelta3.py
+#XDELTA3PY = xdelta3.py
+#XDELTA3PYLIB = xdelta3.la
+
+#BUILT_SOURCES = $(PYFILES)
+
+#xdelta3_pywrap.c xdelta3.py : xdelta3.i
+#	$(SWIG) -python -o xdelta3_pywrap.c xdelta3.i
+
+# OS X for some reason requires:
+# pythondir = $(PYTHON_SITE_PKG)
+# pyexecdir = $(PYTHON_SITE_PKG)
+
+#python_PYTHON = $(XDELTA3PY)
+#pyexec_LTLIBRARIES = $(XDELTA3PYLIB)
+#_xdelta3_la_SOURCES = $(srcdir)/xdelta3_pywrap.c $(xdelta3_SOURCES)
+#_xdelta3_la_CFLAGS = $(common_CFLAGS) -DNOT_MAIN=1 $(PYTHON_CPPFLAGS)
+#_xdelta3_la_LDFLAGS = -module
diff --git a/third-party/xdelta3/xdelta3/README.md b/third-party/xdelta3/xdelta3/README.md
new file mode 100644
index 0000000000..ba6f030b58
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/README.md
@@ -0,0 +1,37 @@
+Xdelta 3.x readme.txt
+Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
+2009, 2010, 2011, 2012, 2013, 2014, 2015
+<josh.macdonald@gmail.com>
+
+
+Thanks for downloading Xdelta!
+
+This directory contains the Xdelta3 command-line interface (CLI) and source
+distribution for VCDIFF differential compression, a.k.a. delta
+compression. The latest information and downloads are available here:
+
+  http://xdelta.org/
+  http://github.com/jmacd/xdelta/
+
+Xdelta can be configured to use XZ Utils for secondary compression:
+
+  http://tukaani.org/xz/
+
+The command-line syntax is detailed here:
+
+  https://github.com/jmacd/xdelta/blob/wiki/CommandLineSyntax.md
+
+Run 'xdelta3 -h' for brief help.  Run 'xdelta3 test' for built-in tests.
+
+Sample commands (like gzip, -e means encode, -d means decode)
+
+  xdelta3 -9 -S lzma -e -f -s OLD_FILE NEW_FILE DELTA_FILE
+  xdelta3 -d -s OLD_FILE DELTA_FILE DECODED_FILE
+
+File bug reports and browse open support issues here:
+
+  https://github.com/jmacd/xdelta/issues
+
+The source distribution contains the C/C++/Python APIs, Unix, Microsoft VC++
+and Cygwin builds.  Xdelta3 is covered under the terms of the APL, see
+LICENSE.
diff --git a/third-party/xdelta3/xdelta3/badcopy.c b/third-party/xdelta3/xdelta3/badcopy.c
new file mode 100644
index 0000000000..03abc634ff
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/badcopy.c
@@ -0,0 +1,158 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define BUFSZ (1 << 22)
+
+#ifdef WIN32
+// whatever
+static 
+double drand48() {
+  double r = rand() / (double)RAND_MAX;
+  return r;
+}
+long lrand48() {
+	long l = 0;
+	int i;
+	for (i = 0; i < 32; i++) {
+		l = l ^ (l << 2) ^ (l << 1) ^ rand();
+	}
+	return l;
+}
+#endif
+
+#ifdef _WIN32
+#define XD3_WIN32 1
+#else
+#define XD3_POSIX 1
+#endif
+#define XD3_MAIN 1
+#define main notmain
+#define EXTERNAL_COMPRESSION 0
+#define XD3_USE_LARGEFILE64 1
+#include "xdelta3.c"
+#undef main
+
+
+double error_prob   = 0.0001;
+usize_t mean_change  = 100;
+xoff_t total_change = 0;
+xoff_t total_size   = 0;
+usize_t max_change   = 0;
+usize_t num_change   = 0;
+
+
+static usize_t
+edist (usize_t mean, usize_t max)
+{
+  double mean_d = mean;
+  double erand  = log (1.0 / drand48 ());
+  usize_t x = (usize_t) (mean_d * erand + 0.5);
+
+  return (x < max) ? (x > 0 ? x : 1) : max;
+}
+
+void modify (char *buf, usize_t size)
+{
+  usize_t bufpos = 0, j;
+  usize_t last_end = 0;
+
+  for (;; /* bufpos and j are incremented in the inner loop */)
+    {
+      /* The size of the next modification. */
+      usize_t next_size = edist (mean_change, 1 << 31);
+      /* The expected interval of such a change. */
+      double expect_interval = ((double) next_size * (1.0 - error_prob)) / error_prob;
+      /* The number of bytes until the next modification. */
+      usize_t next_mod  = edist ((usize_t)expect_interval, 1 << 31);
+
+      if (next_size + next_mod + bufpos > size) { break; }
+
+      if (max_change < next_size) { max_change = next_size; }
+
+      bufpos += next_mod;
+
+      fprintf (stderr, "COPY: %I64u-%I64u (%u)\n", 
+		  total_size + (xoff_t)last_end, 
+		  total_size + (xoff_t)bufpos, 
+		  bufpos - last_end);
+      fprintf (stderr, "ADD:  %I64u-%I64u (%u) is change %u\n", 
+		  total_size + (xoff_t)bufpos, 
+		  total_size + (xoff_t)(bufpos + next_size),
+		  next_size, num_change);
+
+      total_change += next_size;
+      num_change   += 1;
+
+      for (j = 0; j < next_size; j += 1, bufpos += 1)
+	{
+	  buf[bufpos] = (char)(lrand48 () >> 3);
+	}
+
+      last_end = bufpos;
+    }
+
+  fprintf (stderr, "COPY: %I64u-%I64u (%u)\n", 
+	  total_size + last_end, 
+	  total_size + size, size - last_end);
+
+  total_size += size;
+}
+
+int main(int argc, char **argv)
+{
+  main_file inp, out;
+  char *buf = malloc(BUFSZ);
+  int c, ret;
+  main_file_init(&inp);
+  main_file_init(&out);
+  option_force = 1;
+  if (argc > 5)
+    {
+      fprintf (stderr, "usage: badcopy [byte_error_prob [mean_error_size]]\n");
+      return 1;
+    }
+
+  if (argc > 4) { mean_change = atoi (argv[4]); }
+  if (argc > 3) { error_prob  = atof (argv[3]); }
+  fprintf (stderr, "mean change = %u; error_prob = %0.10f\n", mean_change, error_prob);
+
+  if ((ret = main_file_open (&inp, argv[1], XO_READ)) != 0) {
+	  return 1;
+  }
+  if ((ret = main_file_open (&out, argv[2], XO_WRITE)) != 0) {
+	  return 1;
+  }
+
+  if (error_prob < 0.0 || error_prob > 1.0)
+    {
+      fprintf (stderr, "warning: error probability out of range\n");
+      return 1;
+    }
+
+  do
+    {
+		if ((ret = main_file_read (&inp, buf, BUFSZ, &c, "read failed")) != 0) {
+			return 1;
+		}
+
+        if (c == 0) { break; }
+
+        modify (buf, c);
+
+		if ((ret = main_file_write (&out, buf, c, "write failed")) != 0) {
+			return 1;
+		}
+    }
+  while (c == BUFSZ);
+
+  if ((ret = main_file_close (&out)))
+    {
+      return 1;
+    }
+
+  fprintf (stderr, "add_prob %f; %u adds; total_change %u of %u bytes; add percentage %f; max add size %u\n",
+	   error_prob, num_change, total_change, total_size, (double) total_change / (double) total_size, max_change);
+
+  return 0;
+}
diff --git a/third-party/xdelta3/xdelta3/configure.ac b/third-party/xdelta3/xdelta3/configure.ac
new file mode 100644
index 0000000000..5d81f38e88
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/configure.ac
@@ -0,0 +1,51 @@
+AC_INIT([Xdelta3], [3.1.1], [josh.macdonald@gmail.com], 
+	[xdelta3], [http://xdelta.org/])
+AC_PREREQ([2.68])
+AC_CONFIG_MACRO_DIR([m4])
+LT_INIT
+AM_INIT_AUTOMAKE([1.15 no-define foreign tar-ustar subdir-objects])
+AC_CONFIG_MACRO_DIRS([m4])
+
+AX_CHECK_ALIGNED_ACCESS_REQUIRED
+AC_PROG_CC
+AC_PROG_CXX
+
+AC_CHECK_SIZEOF(size_t)
+AC_CHECK_SIZEOF(unsigned int)
+AC_CHECK_SIZEOF(unsigned long)
+AC_CHECK_SIZEOF(unsigned long long)
+
+AC_ARG_WITH(
+    [liblzma],
+    [AC_HELP_STRING(
+        [--with-liblzma],
+        [build with liblzma support @<:@default=autodetect@:>@])],
+    [USE_LIBLZMA=$withval],
+    [USE_LIBLZMA=auto])
+
+if test "x$USE_LIBLZMA" != xno ; then
+    AC_CHECK_HEADERS([lzma.h],,[
+        if test "x$with_liblzma" = xyes ; then
+            AC_MSG_FAILURE([liblzma includes were not found])
+        fi])
+    AC_CHECK_LIB([lzma], [lzma_easy_buffer_encode],,[
+        if test "x$with_liblzma" = xyes ; then
+            AC_MSG_FAILURE([liblzma library were not found])
+        fi])
+fi
+
+#AM_PATH_PYTHON(,, [:])
+#AM_CONDITIONAL([HAVE_PYTHON], [test "$PYTHON" != :])
+#AX_PYTHON_DEVEL()
+#AX_PKG_SWIG(2.0.0,,)
+#AX_SWIG_PYTHON
+
+dnl --enable-debug-symbols : build with debug symbols?
+AC_ARG_ENABLE(debug-symbols,
+   AS_HELP_STRING(--enable-debug-symbols,[Build with debug symbols (default is NO)]),,enableval=no)
+AM_CONDITIONAL([DEBUG_SYMBOLS], [test ${enableval} = "yes"])
+
+
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_FILES([Makefile])
+AC_OUTPUT
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/CMakeLists.txt b/third-party/xdelta3/xdelta3/cpp-btree/CMakeLists.txt
new file mode 100644
index 0000000000..d005e1582e
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright 2013 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 2.6)
+
+project(cppbtree CXX)
+
+option(build_tests "Build B-tree tests" OFF)
+add_definitions(-std=c++11)
+set(CMAKE_CXX_FLAGS "-g -O2")
+
+# CMake doesn't have a way to pure template library, 
+# add_library(cppbtree btree.h btree_map.h btree_set.h 
+#             safe_btree.h safe_btree_map.h safe_btree_set.h)
+# set_target_properties(cppbtree PROPERTIES LINKER_LANGUAGE CXX)
+
+if(build_tests)
+  enable_testing()
+  include_directories($ENV{GTEST_ROOT}/include)
+  link_directories($ENV{GTEST_ROOT})
+  include_directories($ENV{GFLAGS_ROOT}/include)
+  link_directories($ENV{GFLAGS_ROOT}/lib)
+  add_executable(btree_test btree_test.cc btree_test_flags.cc)
+  add_executable(safe_btree_test safe_btree_test.cc btree_test_flags.cc)
+  add_executable(btree_bench btree_bench.cc btree_test_flags.cc)
+  target_link_libraries(btree_test gtest_main gtest gflags)
+  target_link_libraries(safe_btree_test gtest_main gtest gflags)
+  target_link_libraries(btree_bench gflags gtest)
+endif()
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/COPYING b/third-party/xdelta3/xdelta3/cpp-btree/COPYING
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/COPYING
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/README b/third-party/xdelta3/xdelta3/cpp-btree/README
new file mode 100644
index 0000000000..319fe9bb3b
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/README
@@ -0,0 +1,31 @@
+This library is a C++ template library and, as such, there is no
+library to build and install.  Copy the .h files and use them!
+
+See http://code.google.com/p/cpp-btree/wiki/UsageInstructions for
+details.
+
+----
+
+To build and run the provided tests, however, you will need to install
+CMake, the Google C++ Test framework, and the Google flags package.
+
+Download and install CMake from http://www.cmake.org
+
+Download and build the GoogleTest framework from
+http://code.google.com/p/googletest
+
+Download and install gflags from https://code.google.com/p/gflags
+
+Set GTEST_ROOT to the directory where GTEST was built.
+Set GFLAGS_ROOT to the directory prefix where GFLAGS is installed.
+
+export GTEST_ROOT=/path/for/gtest-x.y
+export GFLAGS_ROOT=/opt
+
+cmake . -Dbuild_tests=ON
+
+For example, to build on a Unix system with the clang++ compiler,
+
+export GTEST_ROOT=$(HOME)/src/googletest
+export GFLAGS_ROOT=/opt
+cmake . -G "Unix Makefiles" -Dbuild_tests=ON -DCMAKE_CXX_COMPILER=clang++
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/btree.h b/third-party/xdelta3/xdelta3/cpp-btree/btree.h
new file mode 100644
index 0000000000..cdd2b5265e
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/btree.h
@@ -0,0 +1,2394 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A btree implementation of the STL set and map interfaces. A btree is both
+// smaller and faster than STL set/map. The red-black tree implementation of
+// STL set/map has an overhead of 3 pointers (left, right and parent) plus the
+// node color information for each stored value. So a set<int32> consumes 20
+// bytes for each value stored. This btree implementation stores multiple
+// values on fixed size nodes (usually 256 bytes) and doesn't store child
+// pointers for leaf nodes. The result is that a btree_set<int32> may use much
+// less memory per stored value. For the random insertion benchmark in
+// btree_test.cc, a btree_set<int32> with node-size of 256 uses 4.9 bytes per
+// stored value.
+//
+// The packing of multiple values on to each node of a btree has another effect
+// besides better space utilization: better cache locality due to fewer cache
+// lines being accessed. Better cache locality translates into faster
+// operations.
+//
+// CAVEATS
+//
+// Insertions and deletions on a btree can cause splitting, merging or
+// rebalancing of btree nodes. And even without these operations, insertions
+// and deletions on a btree will move values around within a node. In both
+// cases, the result is that insertions and deletions can invalidate iterators
+// pointing to values other than the one being inserted/deleted. This is
+// notably different from STL set/map which takes care to not invalidate
+// iterators on insert/erase except, of course, for iterators pointing to the
+// value being erased.  A partial workaround when erasing is available:
+// erase() returns an iterator pointing to the item just after the one that was
+// erased (or end() if none exists).  See also safe_btree.
+
+// PERFORMANCE
+//
+//   btree_bench --benchmarks=. 2>&1 | ./benchmarks.awk
+//
+// Run on pmattis-warp.nyc (4 X 2200 MHz CPUs); 2010/03/04-15:23:06
+// Benchmark                 STL(ns) B-Tree(ns) @    <size>
+// --------------------------------------------------------
+// BM_set_int32_insert        1516      608  +59.89%  <256>    [40.0,  5.2]
+// BM_set_int32_lookup        1160      414  +64.31%  <256>    [40.0,  5.2]
+// BM_set_int32_fulllookup     960      410  +57.29%  <256>    [40.0,  4.4]
+// BM_set_int32_delete        1741      528  +69.67%  <256>    [40.0,  5.2]
+// BM_set_int32_queueaddrem   3078     1046  +66.02%  <256>    [40.0,  5.5]
+// BM_set_int32_mixedaddrem   3600     1384  +61.56%  <256>    [40.0,  5.3]
+// BM_set_int32_fifo           227      113  +50.22%  <256>    [40.0,  4.4]
+// BM_set_int32_fwditer        158       26  +83.54%  <256>    [40.0,  5.2]
+// BM_map_int32_insert        1551      636  +58.99%  <256>    [48.0, 10.5]
+// BM_map_int32_lookup        1200      508  +57.67%  <256>    [48.0, 10.5]
+// BM_map_int32_fulllookup     989      487  +50.76%  <256>    [48.0,  8.8]
+// BM_map_int32_delete        1794      628  +64.99%  <256>    [48.0, 10.5]
+// BM_map_int32_queueaddrem   3189     1266  +60.30%  <256>    [48.0, 11.6]
+// BM_map_int32_mixedaddrem   3822     1623  +57.54%  <256>    [48.0, 10.9]
+// BM_map_int32_fifo           151      134  +11.26%  <256>    [48.0,  8.8]
+// BM_map_int32_fwditer        161       32  +80.12%  <256>    [48.0, 10.5]
+// BM_set_int64_insert        1546      636  +58.86%  <256>    [40.0, 10.5]
+// BM_set_int64_lookup        1200      512  +57.33%  <256>    [40.0, 10.5]
+// BM_set_int64_fulllookup     971      487  +49.85%  <256>    [40.0,  8.8]
+// BM_set_int64_delete        1745      616  +64.70%  <256>    [40.0, 10.5]
+// BM_set_int64_queueaddrem   3163     1195  +62.22%  <256>    [40.0, 11.6]
+// BM_set_int64_mixedaddrem   3760     1564  +58.40%  <256>    [40.0, 10.9]
+// BM_set_int64_fifo           146      103  +29.45%  <256>    [40.0,  8.8]
+// BM_set_int64_fwditer        162       31  +80.86%  <256>    [40.0, 10.5]
+// BM_map_int64_insert        1551      720  +53.58%  <256>    [48.0, 20.7]
+// BM_map_int64_lookup        1214      612  +49.59%  <256>    [48.0, 20.7]
+// BM_map_int64_fulllookup     994      592  +40.44%  <256>    [48.0, 17.2]
+// BM_map_int64_delete        1778      764  +57.03%  <256>    [48.0, 20.7]
+// BM_map_int64_queueaddrem   3189     1547  +51.49%  <256>    [48.0, 20.9]
+// BM_map_int64_mixedaddrem   3779     1887  +50.07%  <256>    [48.0, 21.6]
+// BM_map_int64_fifo           147      145   +1.36%  <256>    [48.0, 17.2]
+// BM_map_int64_fwditer        162       41  +74.69%  <256>    [48.0, 20.7]
+// BM_set_string_insert       1989     1966   +1.16%  <256>    [64.0, 44.5]
+// BM_set_string_lookup       1709     1600   +6.38%  <256>    [64.0, 44.5]
+// BM_set_string_fulllookup   1573     1529   +2.80%  <256>    [64.0, 35.4]
+// BM_set_string_delete       2520     1920  +23.81%  <256>    [64.0, 44.5]
+// BM_set_string_queueaddrem  4706     4309   +8.44%  <256>    [64.0, 48.3]
+// BM_set_string_mixedaddrem  5080     4654   +8.39%  <256>    [64.0, 46.7]
+// BM_set_string_fifo          318      512  -61.01%  <256>    [64.0, 35.4]
+// BM_set_string_fwditer       182       93  +48.90%  <256>    [64.0, 44.5]
+// BM_map_string_insert       2600     2227  +14.35%  <256>    [72.0, 55.8]
+// BM_map_string_lookup       2068     1730  +16.34%  <256>    [72.0, 55.8]
+// BM_map_string_fulllookup   1859     1618  +12.96%  <256>    [72.0, 44.0]
+// BM_map_string_delete       3168     2080  +34.34%  <256>    [72.0, 55.8]
+// BM_map_string_queueaddrem  5840     4701  +19.50%  <256>    [72.0, 59.4]
+// BM_map_string_mixedaddrem  6400     5200  +18.75%  <256>    [72.0, 57.8]
+// BM_map_string_fifo          398      596  -49.75%  <256>    [72.0, 44.0]
+// BM_map_string_fwditer       243      113  +53.50%  <256>    [72.0, 55.8]
+
+#ifndef UTIL_BTREE_BTREE_H__
+#define UTIL_BTREE_BTREE_H__
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <iterator>
+#include <limits>
+#include <type_traits>
+#include <new>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#ifndef NDEBUG
+#define NDEBUG 1
+#endif
+
+namespace btree {
+
+// Inside a btree method, if we just call swap(), it will choose the
+// btree::swap method, which we don't want. And we can't say ::swap
+// because then MSVC won't pickup any std::swap() implementations. We
+// can't just use std::swap() directly because then we don't get the
+// specialization for types outside the std namespace. So the solution
+// is to have a special swap helper function whose name doesn't
+// collide with other swap functions defined by the btree classes.
+template <typename T>
+inline void btree_swap_helper(T &a, T &b) {
+  using std::swap;
+  swap(a, b);
+}
+
+// A template helper used to select A or B based on a condition.
+template<bool cond, typename A, typename B>
+struct if_{
+  typedef A type;
+};
+
+template<typename A, typename B>
+struct if_<false, A, B> {
+  typedef B type;
+};
+
+// Types small_ and big_ are promise that sizeof(small_) < sizeof(big_)
+typedef char small_;
+
+struct big_ {
+  char dummy[2];
+};
+
+// A compile-time assertion.
+template <bool>
+struct CompileAssert {
+};
+
+#define COMPILE_ASSERT(expr, msg) \
+  typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+
+// A helper type used to indicate that a key-compare-to functor has been
+// provided. A user can specify a key-compare-to functor by doing:
+//
+//  struct MyStringComparer
+//      : public util::btree::btree_key_compare_to_tag {
+//    int operator()(const string &a, const string &b) const {
+//      return a.compare(b);
+//    }
+//  };
+//
+// Note that the return type is an int and not a bool. There is a
+// COMPILE_ASSERT which enforces this return type.
+struct btree_key_compare_to_tag {
+};
+
+// A helper class that indicates if the Compare parameter is derived from
+// btree_key_compare_to_tag.
+template <typename Compare>
+struct btree_is_key_compare_to
+    : public std::is_convertible<Compare, btree_key_compare_to_tag> {
+};
+
+// A helper class to convert a boolean comparison into a three-way
+// "compare-to" comparison that returns a negative value to indicate
+// less-than, zero to indicate equality and a positive value to
+// indicate greater-than. This helper class is specialized for
+// less<string> and greater<string>. The btree_key_compare_to_adapter
+// class is provided so that btree users automatically get the more
+// efficient compare-to code when using common google string types
+// with common comparison functors.
+template <typename Compare>
+struct btree_key_compare_to_adapter : Compare {
+  btree_key_compare_to_adapter() { }
+  btree_key_compare_to_adapter(const Compare &c) : Compare(c) { }
+  btree_key_compare_to_adapter(const btree_key_compare_to_adapter<Compare> &c)
+      : Compare(c) {
+  }
+};
+
+template <>
+struct btree_key_compare_to_adapter<std::less<std::string> >
+    : public btree_key_compare_to_tag {
+  btree_key_compare_to_adapter() {}
+  btree_key_compare_to_adapter(const std::less<std::string>&) {}
+  btree_key_compare_to_adapter(
+      const btree_key_compare_to_adapter<std::less<std::string> >&) {}
+  int operator()(const std::string &a, const std::string &b) const {
+    return a.compare(b);
+  }
+};
+
+template <>
+struct btree_key_compare_to_adapter<std::greater<std::string> >
+    : public btree_key_compare_to_tag {
+  btree_key_compare_to_adapter() {}
+  btree_key_compare_to_adapter(const std::greater<std::string>&) {}
+  btree_key_compare_to_adapter(
+      const btree_key_compare_to_adapter<std::greater<std::string> >&) {}
+  int operator()(const std::string &a, const std::string &b) const {
+    return b.compare(a);
+  }
+};
+
+// A helper class that allows a compare-to functor to behave like a plain
+// compare functor. This specialization is used when we do not have a
+// compare-to functor.
+template <typename Key, typename Compare, bool HaveCompareTo>
+struct btree_key_comparer {
+  btree_key_comparer() {}
+  btree_key_comparer(Compare c) : comp(c) {}
+  static bool bool_compare(const Compare &comp, const Key &x, const Key &y) {
+    return comp(x, y);
+  }
+  bool operator()(const Key &x, const Key &y) const {
+    return bool_compare(comp, x, y);
+  }
+  Compare comp;
+};
+
+// A specialization of btree_key_comparer when a compare-to functor is
+// present. We need a plain (boolean) comparison in some parts of the btree
+// code, such as insert-with-hint.
+template <typename Key, typename Compare>
+struct btree_key_comparer<Key, Compare, true> {
+  btree_key_comparer() {}
+  btree_key_comparer(Compare c) : comp(c) {}
+  static bool bool_compare(const Compare &comp, const Key &x, const Key &y) {
+    return comp(x, y) < 0;
+  }
+  bool operator()(const Key &x, const Key &y) const {
+    return bool_compare(comp, x, y);
+  }
+  Compare comp;
+};
+
+// A helper function to compare to keys using the specified compare
+// functor. This dispatches to the appropriate btree_key_comparer comparison,
+// depending on whether we have a compare-to functor or not (which depends on
+// whether Compare is derived from btree_key_compare_to_tag).
+template <typename Key, typename Compare>
+static bool btree_compare_keys(
+    const Compare &comp, const Key &x, const Key &y) {
+  typedef btree_key_comparer<Key, Compare,
+      btree_is_key_compare_to<Compare>::value> key_comparer;
+  return key_comparer::bool_compare(comp, x, y);
+}
+
+template <typename Key, typename Compare,
+          typename Alloc, int TargetNodeSize, int ValueSize>
+struct btree_common_params {
+  // If Compare is derived from btree_key_compare_to_tag then use it as the
+  // key_compare type. Otherwise, use btree_key_compare_to_adapter<> which will
+  // fall-back to Compare if we don't have an appropriate specialization.
+  typedef typename if_<
+    btree_is_key_compare_to<Compare>::value,
+    Compare, btree_key_compare_to_adapter<Compare> >::type key_compare;
+  // A type which indicates if we have a key-compare-to functor or a plain old
+  // key-compare functor.
+  typedef btree_is_key_compare_to<key_compare> is_key_compare_to;
+
+  typedef Alloc allocator_type;
+  typedef Key key_type;
+  typedef ssize_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  enum {
+    kTargetNodeSize = TargetNodeSize,
+
+    // Available space for values.  This is largest for leaf nodes,
+    // which has overhead no fewer than two pointers.
+    kNodeValueSpace = TargetNodeSize - 2 * sizeof(void*),
+  };
+
+  // This is an integral type large enough to hold as many
+  // ValueSize-values as will fit a node of TargetNodeSize bytes.
+  typedef typename if_<
+    (kNodeValueSpace / ValueSize) >= 256,
+    uint16_t,
+    uint8_t>::type node_count_type;
+};
+
+// A parameters structure for holding the type parameters for a btree_map.
+template <typename Key, typename Data, typename Compare,
+          typename Alloc, int TargetNodeSize>
+struct btree_map_params
+    : public btree_common_params<Key, Compare, Alloc, TargetNodeSize,
+                                 sizeof(Key) + sizeof(Data)> {
+  typedef Data data_type;
+  typedef Data mapped_type;
+  typedef std::pair<const Key, data_type> value_type;
+  typedef std::pair<Key, data_type> mutable_value_type;
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  enum {
+    kValueSize = sizeof(Key) + sizeof(data_type),
+  };
+
+  static const Key& key(const value_type &x) { return x.first; }
+  static const Key& key(const mutable_value_type &x) { return x.first; }
+  static void swap(mutable_value_type *a, mutable_value_type *b) {
+    btree_swap_helper(a->first, b->first);
+    btree_swap_helper(a->second, b->second);
+  }
+};
+
+// A parameters structure for holding the type parameters for a btree_set.
+template <typename Key, typename Compare, typename Alloc, int TargetNodeSize>
+struct btree_set_params
+    : public btree_common_params<Key, Compare, Alloc, TargetNodeSize,
+                                 sizeof(Key)> {
+  typedef std::false_type data_type;
+  typedef std::false_type mapped_type;
+  typedef Key value_type;
+  typedef value_type mutable_value_type;
+  typedef value_type* pointer;
+  typedef const value_type* const_pointer;
+  typedef value_type& reference;
+  typedef const value_type& const_reference;
+
+  enum {
+    kValueSize = sizeof(Key),
+  };
+
+  static const Key& key(const value_type &x) { return x; }
+  static void swap(mutable_value_type *a, mutable_value_type *b) {
+    btree_swap_helper<mutable_value_type>(*a, *b);
+  }
+};
+
+// An adapter class that converts a lower-bound compare into an upper-bound
+// compare.
+template <typename Key, typename Compare>
+struct btree_upper_bound_adapter : public Compare {
+  btree_upper_bound_adapter(Compare c) : Compare(c) {}
+  bool operator()(const Key &a, const Key &b) const {
+    return !static_cast<const Compare&>(*this)(b, a);
+  }
+};
+
+template <typename Key, typename CompareTo>
+struct btree_upper_bound_compare_to_adapter : public CompareTo {
+  btree_upper_bound_compare_to_adapter(CompareTo c) : CompareTo(c) {}
+  int operator()(const Key &a, const Key &b) const {
+    return static_cast<const CompareTo&>(*this)(b, a);
+  }
+};
+
+// Dispatch helper class for using linear search with plain compare.
+template <typename K, typename N, typename Compare>
+struct btree_linear_search_plain_compare {
+  static int lower_bound(const K &k, const N &n, Compare comp)  {
+    return n.linear_search_plain_compare(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, Compare comp)  {
+    typedef btree_upper_bound_adapter<K, Compare> upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using linear search with compare-to
+template <typename K, typename N, typename CompareTo>
+struct btree_linear_search_compare_to {
+  static int lower_bound(const K &k, const N &n, CompareTo comp)  {
+    return n.linear_search_compare_to(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, CompareTo comp)  {
+    typedef btree_upper_bound_adapter<K,
+        btree_key_comparer<K, CompareTo, true> > upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using binary search with plain compare.
+template <typename K, typename N, typename Compare>
+struct btree_binary_search_plain_compare {
+  static int lower_bound(const K &k, const N &n, Compare comp)  {
+    return n.binary_search_plain_compare(k, 0, n.count(), comp);
+  }
+  static int upper_bound(const K &k, const N &n, Compare comp)  {
+    typedef btree_upper_bound_adapter<K, Compare> upper_compare;
+    return n.binary_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// Dispatch helper class for using binary search with compare-to.
+template <typename K, typename N, typename CompareTo>
+struct btree_binary_search_compare_to {
+  static int lower_bound(const K &k, const N &n, CompareTo comp)  {
+    return n.binary_search_compare_to(k, 0, n.count(), CompareTo());
+  }
+  static int upper_bound(const K &k, const N &n, CompareTo comp)  {
+    typedef btree_upper_bound_adapter<K,
+        btree_key_comparer<K, CompareTo, true> > upper_compare;
+    return n.linear_search_plain_compare(k, 0, n.count(), upper_compare(comp));
+  }
+};
+
+// A node in the btree holding. The same node type is used for both internal
+// and leaf nodes in the btree, though the nodes are allocated in such a way
+// that the children array is only valid in internal nodes.
+template <typename Params>
+class btree_node {
+ public:
+  typedef Params params_type;
+  typedef btree_node<Params> self_type;
+  typedef typename Params::key_type key_type;
+  typedef typename Params::data_type data_type;
+  typedef typename Params::value_type value_type;
+  typedef typename Params::mutable_value_type mutable_value_type;
+  typedef typename Params::pointer pointer;
+  typedef typename Params::const_pointer const_pointer;
+  typedef typename Params::reference reference;
+  typedef typename Params::const_reference const_reference;
+  typedef typename Params::key_compare key_compare;
+  typedef typename Params::size_type size_type;
+  typedef typename Params::difference_type difference_type;
+  // Typedefs for the various types of node searches.
+  typedef btree_linear_search_plain_compare<
+    key_type, self_type, key_compare> linear_search_plain_compare_type;
+  typedef btree_linear_search_compare_to<
+    key_type, self_type, key_compare> linear_search_compare_to_type;
+  typedef btree_binary_search_plain_compare<
+    key_type, self_type, key_compare> binary_search_plain_compare_type;
+  typedef btree_binary_search_compare_to<
+    key_type, self_type, key_compare> binary_search_compare_to_type;
+  // If we have a valid key-compare-to type, use linear_search_compare_to,
+  // otherwise use linear_search_plain_compare.
+  typedef typename if_<
+    Params::is_key_compare_to::value,
+    linear_search_compare_to_type,
+    linear_search_plain_compare_type>::type linear_search_type;
+  // If we have a valid key-compare-to type, use binary_search_compare_to,
+  // otherwise use binary_search_plain_compare.
+  typedef typename if_<
+    Params::is_key_compare_to::value,
+    binary_search_compare_to_type,
+    binary_search_plain_compare_type>::type binary_search_type;
+  // If the key is an integral or floating point type, use linear search which
+  // is faster than binary search for such types. Might be wise to also
+  // configure linear search based on node-size.
+  typedef typename if_<
+    std::is_integral<key_type>::value ||
+    std::is_floating_point<key_type>::value,
+    linear_search_type, binary_search_type>::type search_type;
+
+  struct base_fields {
+    typedef typename Params::node_count_type field_type;
+
+    // A boolean indicating whether the node is a leaf or not.
+    bool leaf;
+    // The position of the node in the node's parent.
+    field_type position;
+    // The maximum number of values the node can hold.
+    field_type max_count;
+    // The count of the number of values in the node.
+    field_type count;
+    // A pointer to the node's parent.
+    btree_node *parent;
+  };
+
+  enum {
+    kValueSize = params_type::kValueSize,
+    kTargetNodeSize = params_type::kTargetNodeSize,
+
+    // Compute how many values we can fit onto a leaf node.
+    kNodeTargetValues = (kTargetNodeSize - sizeof(base_fields)) / kValueSize,
+    // We need a minimum of 3 values per internal node in order to perform
+    // splitting (1 value for the two nodes involved in the split and 1 value
+    // propagated to the parent as the delimiter for the split).
+    kNodeValues = kNodeTargetValues >= 3 ? kNodeTargetValues : 3,
+
+    kExactMatch = 1 << 30,
+    kMatchMask = kExactMatch - 1,
+  };
+
+  struct leaf_fields : public base_fields {
+    // The array of values. Only the first count of these values have been
+    // constructed and are valid.
+    mutable_value_type values[kNodeValues];
+  };
+
+  struct internal_fields : public leaf_fields {
+    // The array of child pointers. The keys in children_[i] are all less than
+    // key(i). The keys in children_[i + 1] are all greater than key(i). There
+    // are always count + 1 children.
+    btree_node *children[kNodeValues + 1];
+  };
+
+  struct root_fields : public internal_fields {
+    btree_node *rightmost;
+    size_type size;
+  };
+
+ public:
+  // Getter/setter for whether this is a leaf node or not. This value doesn't
+  // change after the node is created.
+  bool leaf() const { return fields_.leaf; }
+
+  // Getter for the position of this node in its parent.
+  int position() const { return fields_.position; }
+  void set_position(int v) { fields_.position = v; }
+
+  // Getter/setter for the number of values stored in this node.
+  int count() const { return fields_.count; }
+  void set_count(int v) { fields_.count = v; }
+  int max_count() const { return fields_.max_count; }
+
+  // Getter for the parent of this node.
+  btree_node* parent() const { return fields_.parent; }
+  // Getter for whether the node is the root of the tree. The parent of the
+  // root of the tree is the leftmost node in the tree which is guaranteed to
+  // be a leaf.
+  bool is_root() const { return parent()->leaf(); }
+  void make_root() {
+    assert(parent()->is_root());
+    fields_.parent = fields_.parent->parent();
+  }
+
+  // Getter for the rightmost root node field. Only valid on the root node.
+  btree_node* rightmost() const { return fields_.rightmost; }
+  btree_node** mutable_rightmost() { return &fields_.rightmost; }
+
+  // Getter for the size root node field. Only valid on the root node.
+  size_type size() const { return fields_.size; }
+  size_type* mutable_size() { return &fields_.size; }
+
+  // Getters for the key/value at position i in the node.
+  const key_type& key(int i) const {
+    return params_type::key(fields_.values[i]);
+  }
+  reference value(int i) {
+    return reinterpret_cast<reference>(fields_.values[i]);
+  }
+  const_reference value(int i) const {
+    return reinterpret_cast<const_reference>(fields_.values[i]);
+  }
+  mutable_value_type* mutable_value(int i) {
+    return &fields_.values[i];
+  }
+
+  // Swap value i in this node with value j in node x.
+  void value_swap(int i, btree_node *x, int j) {
+    params_type::swap(mutable_value(i), x->mutable_value(j));
+  }
+
+  // Getters/setter for the child at position i in the node.
+  btree_node* child(int i) const { return fields_.children[i]; }
+  btree_node** mutable_child(int i) { return &fields_.children[i]; }
+  void set_child(int i, btree_node *c) {
+    *mutable_child(i) = c;
+    c->fields_.parent = this;
+    c->fields_.position = i;
+  }
+
+  // Returns the position of the first value whose key is not less than k.
+  template <typename Compare>
+  int lower_bound(const key_type &k, const Compare &comp) const {
+    return search_type::lower_bound(k, *this, comp);
+  }
+  // Returns the position of the first value whose key is greater than k.
+  template <typename Compare>
+  int upper_bound(const key_type &k, const Compare &comp) const {
+    return search_type::upper_bound(k, *this, comp);
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // linear search performed using plain compare.
+  template <typename Compare>
+  int linear_search_plain_compare(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s < e) {
+      if (!btree_compare_keys(comp, key(s), k)) {
+        break;
+      }
+      ++s;
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // linear search performed using compare-to.
+  template <typename Compare>
+  int linear_search_compare_to(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s < e) {
+      int c = comp(key(s), k);
+      if (c == 0) {
+        return s | kExactMatch;
+      } else if (c > 0) {
+        break;
+      }
+      ++s;
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // binary search performed using plain compare.
+  template <typename Compare>
+  int binary_search_plain_compare(
+      const key_type &k, int s, int e, const Compare &comp) const {
+    while (s != e) {
+      int mid = (s + e) / 2;
+      if (btree_compare_keys(comp, key(mid), k)) {
+        s = mid + 1;
+      } else {
+        e = mid;
+      }
+    }
+    return s;
+  }
+
+  // Returns the position of the first value whose key is not less than k using
+  // binary search performed using compare-to.
+  template <typename CompareTo>
+  int binary_search_compare_to(
+      const key_type &k, int s, int e, const CompareTo &comp) const {
+    while (s != e) {
+      int mid = (s + e) / 2;
+      int c = comp(key(mid), k);
+      if (c < 0) {
+        s = mid + 1;
+      } else if (c > 0) {
+        e = mid;
+      } else {
+        // Need to return the first value whose key is not less than k, which
+        // requires continuing the binary search. Note that we are guaranteed
+        // that the result is an exact match because if "key(mid-1) < k" the
+        // call to binary_search_compare_to() will return "mid".
+        s = binary_search_compare_to(k, s, mid, comp);
+        return s | kExactMatch;
+      }
+    }
+    return s;
+  }
+
+  // Inserts the value x at position i, shifting all existing values and
+  // children at positions >= i to the right by 1.
+  void insert_value(int i, const value_type &x);
+
+  // Removes the value at position i, shifting all existing values and children
+  // at positions > i to the left by 1.
+  void remove_value(int i);
+
+  // Rebalances a node with its right sibling.
+  void rebalance_right_to_left(btree_node *sibling, int to_move);
+  void rebalance_left_to_right(btree_node *sibling, int to_move);
+
+  // Splits a node, moving a portion of the node's values to its right sibling.
+  void split(btree_node *sibling, int insert_position);
+
+  // Merges a node with its right sibling, moving all of the values and the
+  // delimiting key in the parent node onto itself.
+  void merge(btree_node *sibling);
+
+  // Swap the contents of "this" and "src".
+  void swap(btree_node *src);
+
+  // Node allocation/deletion routines.
+  static btree_node* init_leaf(
+      leaf_fields *f, btree_node *parent, int max_count) {
+    btree_node *n = reinterpret_cast<btree_node*>(f);
+    f->leaf = 1;
+    f->position = 0;
+    f->max_count = max_count;
+    f->count = 0;
+    f->parent = parent;
+    if (!NDEBUG) {
+      memset(&f->values, 0, max_count * sizeof(value_type));
+    }
+    return n;
+  }
+  static btree_node* init_internal(internal_fields *f, btree_node *parent) {
+    btree_node *n = init_leaf(f, parent, kNodeValues);
+    f->leaf = 0;
+    if (!NDEBUG) {
+      memset(f->children, 0, sizeof(f->children));
+    }
+    return n;
+  }
+  static btree_node* init_root(root_fields *f, btree_node *parent) {
+    btree_node *n = init_internal(f, parent);
+    f->rightmost = parent;
+    f->size = parent->count();
+    return n;
+  }
+  void destroy() {
+    for (int i = 0; i < count(); ++i) {
+      value_destroy(i);
+    }
+  }
+
+ private:
+  void value_init(int i) {
+    new (&fields_.values[i]) mutable_value_type;
+  }
+  void value_init(int i, const value_type &x) {
+    new (&fields_.values[i]) mutable_value_type(x);
+  }
+  void value_destroy(int i) {
+    fields_.values[i].~mutable_value_type();
+  }
+
+ private:
+  root_fields fields_;
+
+ private:
+  btree_node(const btree_node&);
+  void operator=(const btree_node&);
+};
+
+template <typename Node, typename Reference, typename Pointer>
+struct btree_iterator {
+  typedef typename Node::key_type key_type;
+  typedef typename Node::size_type size_type;
+  typedef typename Node::difference_type difference_type;
+  typedef typename Node::params_type params_type;
+
+  typedef Node node_type;
+  typedef typename std::remove_const<Node>::type normal_node;
+  typedef const Node const_node;
+  typedef typename params_type::value_type value_type;
+  typedef typename params_type::pointer normal_pointer;
+  typedef typename params_type::reference normal_reference;
+  typedef typename params_type::const_pointer const_pointer;
+  typedef typename params_type::const_reference const_reference;
+
+  typedef Pointer pointer;
+  typedef Reference reference;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  typedef btree_iterator<
+    normal_node, normal_reference, normal_pointer> iterator;
+  typedef btree_iterator<
+    const_node, const_reference, const_pointer> const_iterator;
+  typedef btree_iterator<Node, Reference, Pointer> self_type;
+
+  btree_iterator()
+      : node(NULL),
+        position(-1) {
+  }
+  btree_iterator(Node *n, int p)
+      : node(n),
+        position(p) {
+  }
+  btree_iterator(const iterator &x)
+      : node(x.node),
+        position(x.position) {
+  }
+
+  // Increment/decrement the iterator.
+  void increment() {
+    if (node->leaf() && ++position < node->count()) {
+      return;
+    }
+    increment_slow();
+  }
+  void increment_by(int count);
+  void increment_slow();
+
+  void decrement() {
+    if (node->leaf() && --position >= 0) {
+      return;
+    }
+    decrement_slow();
+  }
+  void decrement_slow();
+
+  bool operator==(const const_iterator &x) const {
+    return node == x.node && position == x.position;
+  }
+  bool operator!=(const const_iterator &x) const {
+    return node != x.node || position != x.position;
+  }
+
+  // Accessors for the key/value the iterator is pointing at.
+  const key_type& key() const {
+    return node->key(position);
+  }
+  reference operator*() const {
+    return node->value(position);
+  }
+  pointer operator->() const {
+    return &node->value(position);
+  }
+
+  self_type& operator++() {
+    increment();
+    return *this;
+  }
+  self_type& operator--() {
+    decrement();
+    return *this;
+  }
+  self_type operator++(int) {
+    self_type tmp = *this;
+    ++*this;
+    return tmp;
+  }
+  self_type operator--(int) {
+    self_type tmp = *this;
+    --*this;
+    return tmp;
+  }
+
+  // The node in the tree the iterator is pointing at.
+  Node *node;
+  // The position within the node of the tree the iterator is pointing at.
+  int position;
+};
+
+// Dispatch helper class for using btree::internal_locate with plain compare.
+struct btree_internal_locate_plain_compare {
+  template <typename K, typename T, typename Iter>
+  static std::pair<Iter, int> dispatch(const K &k, const T &t, Iter iter) {
+    return t.internal_locate_plain_compare(k, iter);
+  }
+};
+
+// Dispatch helper class for using btree::internal_locate with compare-to.
+struct btree_internal_locate_compare_to {
+  template <typename K, typename T, typename Iter>
+  static std::pair<Iter, int> dispatch(const K &k, const T &t, Iter iter) {
+    return t.internal_locate_compare_to(k, iter);
+  }
+};
+
+template <typename Params>
+class btree : public Params::key_compare {
+  typedef btree<Params> self_type;
+  typedef btree_node<Params> node_type;
+  typedef typename node_type::base_fields base_fields;
+  typedef typename node_type::leaf_fields leaf_fields;
+  typedef typename node_type::internal_fields internal_fields;
+  typedef typename node_type::root_fields root_fields;
+  typedef typename Params::is_key_compare_to is_key_compare_to;
+
+  friend struct btree_internal_locate_plain_compare;
+  friend struct btree_internal_locate_compare_to;
+  typedef typename if_<
+    is_key_compare_to::value,
+    btree_internal_locate_compare_to,
+    btree_internal_locate_plain_compare>::type internal_locate_type;
+
+  enum {
+    kNodeValues = node_type::kNodeValues,
+    kMinNodeValues = kNodeValues / 2,
+    kValueSize = node_type::kValueSize,
+    kExactMatch = node_type::kExactMatch,
+    kMatchMask = node_type::kMatchMask,
+  };
+
+  // A helper class to get the empty base class optimization for 0-size
+  // allocators. Base is internal_allocator_type.
+  // (e.g. empty_base_handle<internal_allocator_type, node_type*>). If Base is
+  // 0-size, the compiler doesn't have to reserve any space for it and
+  // sizeof(empty_base_handle) will simply be sizeof(Data). Google [empty base
+  // class optimization] for more details.
+  template <typename Base, typename Data>
+  struct empty_base_handle : public Base {
+    empty_base_handle(const Base &b, const Data &d)
+        : Base(b),
+          data(d) {
+    }
+    Data data;
+  };
+
+  struct node_stats {
+    node_stats(ssize_t l, ssize_t i)
+        : leaf_nodes(l),
+          internal_nodes(i) {
+    }
+
+    node_stats& operator+=(const node_stats &x) {
+      leaf_nodes += x.leaf_nodes;
+      internal_nodes += x.internal_nodes;
+      return *this;
+    }
+
+    ssize_t leaf_nodes;
+    ssize_t internal_nodes;
+  };
+
+ public:
+  typedef Params params_type;
+  typedef typename Params::key_type key_type;
+  typedef typename Params::data_type data_type;
+  typedef typename Params::mapped_type mapped_type;
+  typedef typename Params::value_type value_type;
+  typedef typename Params::key_compare key_compare;
+  typedef typename Params::pointer pointer;
+  typedef typename Params::const_pointer const_pointer;
+  typedef typename Params::reference reference;
+  typedef typename Params::const_reference const_reference;
+  typedef typename Params::size_type size_type;
+  typedef typename Params::difference_type difference_type;
+  typedef btree_iterator<node_type, reference, pointer> iterator;
+  typedef typename iterator::const_iterator const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+
+  typedef typename Params::allocator_type allocator_type;
+  typedef typename allocator_type::template rebind<char>::other
+    internal_allocator_type;
+
+ public:
+  // Default constructor.
+  btree(const key_compare &comp, const allocator_type &alloc);
+
+  // Copy constructor.
+  btree(const self_type &x);
+
+  // Destructor.
+  ~btree() {
+    clear();
+  }
+
+  // Iterator routines.
+  iterator begin() {
+    return iterator(leftmost(), 0);
+  }
+  const_iterator begin() const {
+    return const_iterator(leftmost(), 0);
+  }
+  iterator end() {
+    return iterator(rightmost(), rightmost() ? rightmost()->count() : 0);
+  }
+  const_iterator end() const {
+    return const_iterator(rightmost(), rightmost() ? rightmost()->count() : 0);
+  }
+  reverse_iterator rbegin() {
+    return reverse_iterator(end());
+  }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+  reverse_iterator rend() {
+    return reverse_iterator(begin());
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  // Finds the first element whose key is not less than key.
+  iterator lower_bound(const key_type &key) {
+    return internal_end(
+        internal_lower_bound(key, iterator(root(), 0)));
+  }
+  const_iterator lower_bound(const key_type &key) const {
+    return internal_end(
+        internal_lower_bound(key, const_iterator(root(), 0)));
+  }
+
+  // Finds the first element whose key is greater than key.
+  iterator upper_bound(const key_type &key) {
+    return internal_end(
+        internal_upper_bound(key, iterator(root(), 0)));
+  }
+  const_iterator upper_bound(const key_type &key) const {
+    return internal_end(
+        internal_upper_bound(key, const_iterator(root(), 0)));
+  }
+
+  // Finds the range of values which compare equal to key. The first member of
+  // the returned pair is equal to lower_bound(key). The second member pair of
+  // the pair is equal to upper_bound(key).
+  std::pair<iterator,iterator> equal_range(const key_type &key) {
+    return std::make_pair(lower_bound(key), upper_bound(key));
+  }
+  std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
+    return std::make_pair(lower_bound(key), upper_bound(key));
+  }
+
+  // Inserts a value into the btree only if it does not already exist. The
+  // boolean return value indicates whether insertion succeeded or failed. The
+  // ValuePointer type is used to avoid instatiating the value unless the key
+  // is being inserted. Value is not dereferenced if the key already exists in
+  // the btree. See btree_map::operator[].
+  template <typename ValuePointer>
+  std::pair<iterator,bool> insert_unique(const key_type &key, ValuePointer value);
+
+  // Inserts a value into the btree only if it does not already exist. The
+  // boolean return value indicates whether insertion succeeded or failed.
+  std::pair<iterator,bool> insert_unique(const value_type &v) {
+    return insert_unique(params_type::key(v), &v);
+  }
+
+  // Insert with hint. Check to see if the value should be placed immediately
+  // before position in the tree. If it does, then the insertion will take
+  // amortized constant time. If not, the insertion will take amortized
+  // logarithmic time as if a call to insert_unique(v) were made.
+  iterator insert_unique(iterator position, const value_type &v);
+
+  // Insert a range of values into the btree.
+  template <typename InputIterator>
+  void insert_unique(InputIterator b, InputIterator e);
+
+  // Inserts a value into the btree. The ValuePointer type is used to avoid
+  // instatiating the value unless the key is being inserted. Value is not
+  // dereferenced if the key already exists in the btree. See
+  // btree_map::operator[].
+  template <typename ValuePointer>
+  iterator insert_multi(const key_type &key, ValuePointer value);
+
+  // Inserts a value into the btree.
+  iterator insert_multi(const value_type &v) {
+    return insert_multi(params_type::key(v), &v);
+  }
+
+  // Insert with hint. Check to see if the value should be placed immediately
+  // before position in the tree. If it does, then the insertion will take
+  // amortized constant time. If not, the insertion will take amortized
+  // logarithmic time as if a call to insert_multi(v) were made.
+  iterator insert_multi(iterator position, const value_type &v);
+
+  // Insert a range of values into the btree.
+  template <typename InputIterator>
+  void insert_multi(InputIterator b, InputIterator e);
+
+  void assign(const self_type &x);
+
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(iterator iter);
+
+  // Erases range. Returns the number of keys erased.
+  int erase(iterator begin, iterator end);
+
+  // Erases the specified key from the btree. Returns 1 if an element was
+  // erased and 0 otherwise.
+  int erase_unique(const key_type &key);
+
+  // Erases all of the entries matching the specified key from the
+  // btree. Returns the number of elements erased.
+  int erase_multi(const key_type &key);
+
+  // Finds the iterator corresponding to a key or returns end() if the key is
+  // not present.
+  iterator find_unique(const key_type &key) {
+    return internal_end(
+        internal_find_unique(key, iterator(root(), 0)));
+  }
+  const_iterator find_unique(const key_type &key) const {
+    return internal_end(
+        internal_find_unique(key, const_iterator(root(), 0)));
+  }
+  iterator find_multi(const key_type &key) {
+    return internal_end(
+        internal_find_multi(key, iterator(root(), 0)));
+  }
+  const_iterator find_multi(const key_type &key) const {
+    return internal_end(
+        internal_find_multi(key, const_iterator(root(), 0)));
+  }
+
+  // Returns a count of the number of times the key appears in the btree.
+  size_type count_unique(const key_type &key) const {
+    const_iterator b = internal_find_unique(
+        key, const_iterator(root(), 0));
+    if (!b.node) {
+      // The key doesn't exist in the tree.
+      return 0;
+    }
+    return 1;
+  }
+  // Returns a count of the number of times the key appears in the btree.
+  size_type count_multi(const key_type &key) const {
+    return distance(lower_bound(key), upper_bound(key));
+  }
+
+  // Clear the btree, deleting all of the values it contains.
+  void clear();
+
+  // Swap the contents of *this and x.
+  void swap(self_type &x);
+
+  // Assign the contents of x to *this.
+  self_type& operator=(const self_type &x) {
+    if (&x == this) {
+      // Don't copy onto ourselves.
+      return *this;
+    }
+    assign(x);
+    return *this;
+  }
+
+  key_compare* mutable_key_comp() {
+    return this;
+  }
+  const key_compare& key_comp() const {
+    return *this;
+  }
+  bool compare_keys(const key_type &x, const key_type &y) const {
+    return btree_compare_keys(key_comp(), x, y);
+  }
+
+  // Dump the btree to the specified ostream. Requires that operator<< is
+  // defined for Key and Value.
+  void dump(std::ostream &os) const {
+    if (root() != NULL) {
+      internal_dump(os, root(), 0);
+    }
+  }
+
+  // Verifies the structure of the btree.
+  void verify() const;
+
+  // Size routines. Note that empty() is slightly faster than doing size()==0.
+  size_type size() const {
+    if (empty()) return 0;
+    if (root()->leaf()) return root()->count();
+    return root()->size();
+  }
+  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
+  bool empty() const { return root() == NULL; }
+
+  // The height of the btree. An empty tree will have height 0.
+  size_type height() const {
+    size_type h = 0;
+    if (root()) {
+      // Count the length of the chain from the leftmost node up to the
+      // root. We actually count from the root back around to the level below
+      // the root, but the calculation is the same because of the circularity
+      // of that traversal.
+      const node_type *n = root();
+      do {
+        ++h;
+        n = n->parent();
+      } while (n != root());
+    }
+    return h;
+  }
+
+  // The number of internal, leaf and total nodes used by the btree.
+  size_type leaf_nodes() const {
+    return internal_stats(root()).leaf_nodes;
+  }
+  size_type internal_nodes() const {
+    return internal_stats(root()).internal_nodes;
+  }
+  size_type nodes() const {
+    node_stats stats = internal_stats(root());
+    return stats.leaf_nodes + stats.internal_nodes;
+  }
+
+  // The total number of bytes used by the btree.
+  size_type bytes_used() const {
+    node_stats stats = internal_stats(root());
+    if (stats.leaf_nodes == 1 && stats.internal_nodes == 0) {
+      return sizeof(*this) +
+          sizeof(base_fields) + root()->max_count() * sizeof(value_type);
+    } else {
+      return sizeof(*this) +
+          sizeof(root_fields) - sizeof(internal_fields) +
+          stats.leaf_nodes * sizeof(leaf_fields) +
+          stats.internal_nodes * sizeof(internal_fields);
+    }
+  }
+
+  // The average number of bytes used per value stored in the btree.
+  static double average_bytes_per_value() {
+    // Returns the number of bytes per value on a leaf node that is 75%
+    // full. Experimentally, this matches up nicely with the computed number of
+    // bytes per value in trees that had their values inserted in random order.
+    return sizeof(leaf_fields) / (kNodeValues * 0.75);
+  }
+
+  // The fullness of the btree. Computed as the number of elements in the btree
+  // divided by the maximum number of elements a tree with the current number
+  // of nodes could hold. A value of 1 indicates perfect space
+  // utilization. Smaller values indicate space wastage.
+  double fullness() const {
+    return double(size()) / (nodes() * kNodeValues);
+  }
+  // The overhead of the btree structure in bytes per node. Computed as the
+  // total number of bytes used by the btree minus the number of bytes used for
+  // storing elements divided by the number of elements.
+  double overhead() const {
+    if (empty()) {
+      return 0.0;
+    }
+    return (bytes_used() - size() * kValueSize) / double(size());
+  }
+
+ private:
+  // Internal accessor routines.
+  node_type* root() { return root_.data; }
+  const node_type* root() const { return root_.data; }
+  node_type** mutable_root() { return &root_.data; }
+
+  // The rightmost node is stored in the root node.
+  node_type* rightmost() {
+    return (!root() || root()->leaf()) ? root() : root()->rightmost();
+  }
+  const node_type* rightmost() const {
+    return (!root() || root()->leaf()) ? root() : root()->rightmost();
+  }
+  node_type** mutable_rightmost() { return root()->mutable_rightmost(); }
+
+  // The leftmost node is stored as the parent of the root node.
+  node_type* leftmost() { return root() ? root()->parent() : NULL; }
+  const node_type* leftmost() const { return root() ? root()->parent() : NULL; }
+
+  // The size of the tree is stored in the root node.
+  size_type* mutable_size() { return root()->mutable_size(); }
+
+  // Allocator routines.
+  internal_allocator_type* mutable_internal_allocator() {
+    return static_cast<internal_allocator_type*>(&root_);
+  }
+  const internal_allocator_type& internal_allocator() const {
+    return *static_cast<const internal_allocator_type*>(&root_);
+  }
+
+  // Node creation/deletion routines.
+  node_type* new_internal_node(node_type *parent) {
+    internal_fields *p = reinterpret_cast<internal_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(internal_fields)));
+    return node_type::init_internal(p, parent);
+  }
+  node_type* new_internal_root_node() {
+    root_fields *p = reinterpret_cast<root_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(root_fields)));
+    return node_type::init_root(p, root()->parent());
+  }
+  node_type* new_leaf_node(node_type *parent) {
+    leaf_fields *p = reinterpret_cast<leaf_fields*>(
+        mutable_internal_allocator()->allocate(sizeof(leaf_fields)));
+    return node_type::init_leaf(p, parent, kNodeValues);
+  }
+  node_type* new_leaf_root_node(int max_count) {
+    leaf_fields *p = reinterpret_cast<leaf_fields*>(
+        mutable_internal_allocator()->allocate(
+            sizeof(base_fields) + max_count * sizeof(value_type)));
+    return node_type::init_leaf(p, reinterpret_cast<node_type*>(p), max_count);
+  }
+  void delete_internal_node(node_type *node) {
+    node->destroy();
+    assert(node != root());
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(node), sizeof(internal_fields));
+  }
+  void delete_internal_root_node() {
+    root()->destroy();
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(root()), sizeof(root_fields));
+  }
+  void delete_leaf_node(node_type *node) {
+    node->destroy();
+    mutable_internal_allocator()->deallocate(
+        reinterpret_cast<char*>(node),
+        sizeof(base_fields) + node->max_count() * sizeof(value_type));
+  }
+
+  // Rebalances or splits the node iter points to.
+  void rebalance_or_split(iterator *iter);
+
+  // Merges the values of left, right and the delimiting key on their parent
+  // onto left, removing the delimiting key and deleting right.
+  void merge_nodes(node_type *left, node_type *right);
+
+  // Tries to merge node with its left or right sibling, and failing that,
+  // rebalance with its left or right sibling. Returns true if a merge
+  // occurred, at which point it is no longer valid to access node. Returns
+  // false if no merging took place.
+  bool try_merge_or_rebalance(iterator *iter);
+
+  // Tries to shrink the height of the tree by 1.
+  void try_shrink();
+
+  iterator internal_end(iterator iter) {
+    return iter.node ? iter : end();
+  }
+  const_iterator internal_end(const_iterator iter) const {
+    return iter.node ? iter : end();
+  }
+
+  // Inserts a value into the btree immediately before iter. Requires that
+  // key(v) <= iter.key() and (--iter).key() <= key(v).
+  iterator internal_insert(iterator iter, const value_type &v);
+
+  // Returns an iterator pointing to the first value >= the value "iter" is
+  // pointing at. Note that "iter" might be pointing to an invalid location as
+  // iter.position == iter.node->count(). This routine simply moves iter up in
+  // the tree to a valid location.
+  template <typename IterType>
+  static IterType internal_last(IterType iter);
+
+  // Returns an iterator pointing to the leaf position at which key would
+  // reside in the tree. We provide 2 versions of internal_locate. The first
+  // version (internal_locate_plain_compare) always returns 0 for the second
+  // field of the pair. The second version (internal_locate_compare_to) is for
+  // the key-compare-to specialization and returns either kExactMatch (if the
+  // key was found in the tree) or -kExactMatch (if it wasn't) in the second
+  // field of the pair. The compare_to specialization allows the caller to
+  // avoid a subsequent comparison to determine if an exact match was made,
+  // speeding up string keys.
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate(
+      const key_type &key, IterType iter) const;
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate_plain_compare(
+      const key_type &key, IterType iter) const;
+  template <typename IterType>
+  std::pair<IterType, int> internal_locate_compare_to(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements lower_bound().
+  template <typename IterType>
+  IterType internal_lower_bound(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements upper_bound().
+  template <typename IterType>
+  IterType internal_upper_bound(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements find_unique().
+  template <typename IterType>
+  IterType internal_find_unique(
+      const key_type &key, IterType iter) const;
+
+  // Internal routine which implements find_multi().
+  template <typename IterType>
+  IterType internal_find_multi(
+      const key_type &key, IterType iter) const;
+
+  // Deletes a node and all of its children.
+  void internal_clear(node_type *node);
+
+  // Dumps a node and all of its children to the specified ostream.
+  void internal_dump(std::ostream &os, const node_type *node, int level) const;
+
+  // Verifies the tree structure of node.
+  int internal_verify(const node_type *node,
+                      const key_type *lo, const key_type *hi) const;
+
+  node_stats internal_stats(const node_type *node) const {
+    if (!node) {
+      return node_stats(0, 0);
+    }
+    if (node->leaf()) {
+      return node_stats(1, 0);
+    }
+    node_stats res(0, 1);
+    for (int i = 0; i <= node->count(); ++i) {
+      res += internal_stats(node->child(i));
+    }
+    return res;
+  }
+
+ private:
+  empty_base_handle<internal_allocator_type, node_type*> root_;
+
+ private:
+  // A never instantiated helper function that returns big_ if we have a
+  // key-compare-to functor or if R is bool and small_ otherwise.
+  template <typename R>
+  static typename if_<
+   if_<is_key_compare_to::value,
+             std::is_same<R, int>,
+             std::is_same<R, bool> >::type::value,
+   big_, small_>::type key_compare_checker(R);
+
+  // A never instantiated helper function that returns the key comparison
+  // functor.
+  static key_compare key_compare_helper();
+
+  // Verify that key_compare returns a bool. This is similar to the way
+  // is_convertible in base/type_traits.h works. Note that key_compare_checker
+  // is never actually invoked. The compiler will select which
+  // key_compare_checker() to instantiate and then figure out the size of the
+  // return type of key_compare_checker() at compile time which we then check
+  // against the sizeof of big_.
+  COMPILE_ASSERT(
+      sizeof(key_compare_checker(key_compare_helper()(key_type(), key_type()))) ==
+      sizeof(big_),
+      key_comparison_function_must_return_bool);
+
+  // Note: We insist on kTargetValues, which is computed from
+  // Params::kTargetNodeSize, must fit the base_fields::field_type.
+  COMPILE_ASSERT(kNodeValues <
+                 (1 << (8 * sizeof(typename base_fields::field_type))),
+                 target_node_size_too_large);
+
+  // Test the assumption made in setting kNodeValueSpace.
+  COMPILE_ASSERT(sizeof(base_fields) >= 2 * sizeof(void*),
+                 node_space_assumption_incorrect);
+};
+
+////
+// btree_node methods
+template <typename P>
+inline void btree_node<P>::insert_value(int i, const value_type &x) {
+  assert(i <= count());
+  value_init(count(), x);
+  for (int j = count(); j > i; --j) {
+    value_swap(j, this, j - 1);
+  }
+  set_count(count() + 1);
+
+  if (!leaf()) {
+    ++i;
+    for (int j = count(); j > i; --j) {
+      *mutable_child(j) = child(j - 1);
+      child(j)->set_position(j);
+    }
+    *mutable_child(i) = NULL;
+  }
+}
+
+template <typename P>
+inline void btree_node<P>::remove_value(int i) {
+  if (!leaf()) {
+    assert(child(i + 1)->count() == 0);
+    for (int j = i + 1; j < count(); ++j) {
+      *mutable_child(j) = child(j + 1);
+      child(j)->set_position(j);
+    }
+    *mutable_child(count()) = NULL;
+  }
+
+  set_count(count() - 1);
+  for (; i < count(); ++i) {
+    value_swap(i, this, i + 1);
+  }
+  value_destroy(i);
+}
+
+template <typename P>
+void btree_node<P>::rebalance_right_to_left(btree_node *src, int to_move) {
+  assert(parent() == src->parent());
+  assert(position() + 1 == src->position());
+  assert(src->count() >= count());
+  assert(to_move >= 1);
+  assert(to_move <= src->count());
+
+  // Make room in the left node for the new values.
+  for (int i = 0; i < to_move; ++i) {
+    value_init(i + count());
+  }
+
+  // Move the delimiting value to the left node and the new delimiting value
+  // from the right node.
+  value_swap(count(), parent(), position());
+  parent()->value_swap(position(), src, to_move - 1);
+
+  // Move the values from the right to the left node.
+  for (int i = 1; i < to_move; ++i) {
+    value_swap(count() + i, src, i - 1);
+  }
+  // Shift the values in the right node to their correct position.
+  for (int i = to_move; i < src->count(); ++i) {
+    src->value_swap(i - to_move, src, i);
+  }
+  for (int i = 1; i <= to_move; ++i) {
+    src->value_destroy(src->count() - i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the right to the left node.
+    for (int i = 0; i < to_move; ++i) {
+      set_child(1 + count() + i, src->child(i));
+    }
+    for (int i = 0; i <= src->count() - to_move; ++i) {
+      assert(i + to_move <= src->max_count());
+      src->set_child(i, src->child(i + to_move));
+      *src->mutable_child(i + to_move) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(count() + to_move);
+  src->set_count(src->count() - to_move);
+}
+
+template <typename P>
+void btree_node<P>::rebalance_left_to_right(btree_node *dest, int to_move) {
+  assert(parent() == dest->parent());
+  assert(position() + 1 == dest->position());
+  assert(count() >= dest->count());
+  assert(to_move >= 1);
+  assert(to_move <= count());
+
+  // Make room in the right node for the new values.
+  for (int i = 0; i < to_move; ++i) {
+    dest->value_init(i + dest->count());
+  }
+  for (int i = dest->count() - 1; i >= 0; --i) {
+    dest->value_swap(i, dest, i + to_move);
+  }
+
+  // Move the delimiting value to the right node and the new delimiting value
+  // from the left node.
+  dest->value_swap(to_move - 1, parent(), position());
+  parent()->value_swap(position(), this, count() - to_move);
+  value_destroy(count() - to_move);
+
+  // Move the values from the left to the right node.
+  for (int i = 1; i < to_move; ++i) {
+    value_swap(count() - to_move + i, dest, i - 1);
+    value_destroy(count() - to_move + i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the left to the right node.
+    for (int i = dest->count(); i >= 0; --i) {
+      dest->set_child(i + to_move, dest->child(i));
+      *dest->mutable_child(i) = NULL;
+    }
+    for (int i = 1; i <= to_move; ++i) {
+      dest->set_child(i - 1, child(count() - to_move + i));
+      *mutable_child(count() - to_move + i) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(count() - to_move);
+  dest->set_count(dest->count() + to_move);
+}
+
+template <typename P>
+void btree_node<P>::split(btree_node *dest, int insert_position) {
+  assert(dest->count() == 0);
+
+  // We bias the split based on the position being inserted. If we're
+  // inserting at the beginning of the left node then bias the split to put
+  // more values on the right node. If we're inserting at the end of the
+  // right node then bias the split to put more values on the left node.
+  if (insert_position == 0) {
+    dest->set_count(count() - 1);
+  } else if (insert_position == max_count()) {
+    dest->set_count(0);
+  } else {
+    dest->set_count(count() / 2);
+  }
+  set_count(count() - dest->count());
+  assert(count() >= 1);
+
+  // Move values from the left sibling to the right sibling.
+  for (int i = 0; i < dest->count(); ++i) {
+    dest->value_init(i);
+    value_swap(count() + i, dest, i);
+    value_destroy(count() + i);
+  }
+
+  // The split key is the largest value in the left sibling.
+  set_count(count() - 1);
+  parent()->insert_value(position(), value_type());
+  value_swap(count(), parent(), position());
+  value_destroy(count());
+  parent()->set_child(position() + 1, dest);
+
+  if (!leaf()) {
+    for (int i = 0; i <= dest->count(); ++i) {
+      assert(child(count() + i + 1) != NULL);
+      dest->set_child(i, child(count() + i + 1));
+      *mutable_child(count() + i + 1) = NULL;
+    }
+  }
+}
+
+template <typename P>
+void btree_node<P>::merge(btree_node *src) {
+  assert(parent() == src->parent());
+  assert(position() + 1 == src->position());
+
+  // Move the delimiting value to the left node.
+  value_init(count());
+  value_swap(count(), parent(), position());
+
+  // Move the values from the right to the left node.
+  for (int i = 0; i < src->count(); ++i) {
+    value_init(1 + count() + i);
+    value_swap(1 + count() + i, src, i);
+    src->value_destroy(i);
+  }
+
+  if (!leaf()) {
+    // Move the child pointers from the right to the left node.
+    for (int i = 0; i <= src->count(); ++i) {
+      set_child(1 + count() + i, src->child(i));
+      *src->mutable_child(i) = NULL;
+    }
+  }
+
+  // Fixup the counts on the src and dest nodes.
+  set_count(1 + count() + src->count());
+  src->set_count(0);
+
+  // Remove the value on the parent node.
+  parent()->remove_value(position());
+}
+
+template <typename P>
+void btree_node<P>::swap(btree_node *x) {
+  assert(leaf() == x->leaf());
+
+  // Swap the values.
+  for (int i = count(); i < x->count(); ++i) {
+    value_init(i);
+  }
+  for (int i = x->count(); i < count(); ++i) {
+    x->value_init(i);
+  }
+  int n = std::max(count(), x->count());
+  for (int i = 0; i < n; ++i) {
+    value_swap(i, x, i);
+  }
+  for (int i = count(); i < x->count(); ++i) {
+    x->value_destroy(i);
+  }
+  for (int i = x->count(); i < count(); ++i) {
+    value_destroy(i);
+  }
+
+  if (!leaf()) {
+    // Swap the child pointers.
+    for (int i = 0; i <= n; ++i) {
+      btree_swap_helper(*mutable_child(i), *x->mutable_child(i));
+    }
+    for (int i = 0; i <= count(); ++i) {
+      x->child(i)->fields_.parent = x;
+    }
+    for (int i = 0; i <= x->count(); ++i) {
+      child(i)->fields_.parent = this;
+    }
+  }
+
+  // Swap the counts.
+  btree_swap_helper(fields_.count, x->fields_.count);
+}
+
+////
+// btree_iterator methods
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::increment_slow() {
+  if (node->leaf()) {
+    assert(position >= node->count());
+    self_type save(*this);
+    while (position == node->count() && !node->is_root()) {
+      assert(node->parent()->child(node->position()) == node);
+      position = node->position();
+      node = node->parent();
+    }
+    if (position == node->count()) {
+      *this = save;
+    }
+  } else {
+    assert(position < node->count());
+    node = node->child(position + 1);
+    while (!node->leaf()) {
+      node = node->child(0);
+    }
+    position = 0;
+  }
+}
+
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::increment_by(int count) {
+  while (count > 0) {
+    if (node->leaf()) {
+      int rest = node->count() - position;
+      position += std::min(rest, count);
+      count = count - rest;
+      if (position < node->count()) {
+        return;
+      }
+    } else {
+      --count;
+    }
+    increment_slow();
+  }
+}
+
+template <typename N, typename R, typename P>
+void btree_iterator<N, R, P>::decrement_slow() {
+  if (node->leaf()) {
+    assert(position <= -1);
+    self_type save(*this);
+    while (position < 0 && !node->is_root()) {
+      assert(node->parent()->child(node->position()) == node);
+      position = node->position() - 1;
+      node = node->parent();
+    }
+    if (position < 0) {
+      *this = save;
+    }
+  } else {
+    assert(position >= 0);
+    node = node->child(position);
+    while (!node->leaf()) {
+      node = node->child(node->count());
+    }
+    position = node->count() - 1;
+  }
+}
+
+////
+// btree methods
+template <typename P>
+btree<P>::btree(const key_compare &comp, const allocator_type &alloc)
+    : key_compare(comp),
+      root_(alloc, NULL) {
+}
+
+template <typename P>
+btree<P>::btree(const self_type &x)
+    : key_compare(x.key_comp()),
+      root_(x.internal_allocator(), NULL) {
+  assign(x);
+}
+
+template <typename P> template <typename ValuePointer>
+std::pair<typename btree<P>::iterator, bool>
+btree<P>::insert_unique(const key_type &key, ValuePointer value) {
+  if (empty()) {
+    *mutable_root() = new_leaf_root_node(1);
+  }
+
+  std::pair<iterator, int> res = internal_locate(key, iterator(root(), 0));
+  iterator &iter = res.first;
+  if (res.second == kExactMatch) {
+    // The key already exists in the tree, do nothing.
+    return std::make_pair(internal_last(iter), false);
+  } else if (!res.second) {
+    iterator last = internal_last(iter);
+    if (last.node && !compare_keys(key, last.key())) {
+      // The key already exists in the tree, do nothing.
+      return std::make_pair(last, false);
+    }
+  }
+
+  return std::make_pair(internal_insert(iter, *value), true);
+}
+
+template <typename P>
+inline typename btree<P>::iterator
+btree<P>::insert_unique(iterator position, const value_type &v) {
+  if (!empty()) {
+    const key_type &key = params_type::key(v);
+    if (position == end() || compare_keys(key, position.key())) {
+      iterator prev = position;
+      if (position == begin() || compare_keys((--prev).key(), key)) {
+        // prev.key() < key < position.key()
+        return internal_insert(position, v);
+      }
+    } else if (compare_keys(position.key(), key)) {
+      iterator next = position;
+      ++next;
+      if (next == end() || compare_keys(key, next.key())) {
+        // position.key() < key < next.key()
+        return internal_insert(next, v);
+      }
+    } else {
+      // position.key() == key
+      return position;
+    }
+  }
+  return insert_unique(v).first;
+}
+
+template <typename P> template <typename InputIterator>
+void btree<P>::insert_unique(InputIterator b, InputIterator e) {
+  for (; b != e; ++b) {
+    insert_unique(end(), *b);
+  }
+}
+
+template <typename P> template <typename ValuePointer>
+typename btree<P>::iterator
+btree<P>::insert_multi(const key_type &key, ValuePointer value) {
+  if (empty()) {
+    *mutable_root() = new_leaf_root_node(1);
+  }
+
+  iterator iter = internal_upper_bound(key, iterator(root(), 0));
+  if (!iter.node) {
+    iter = end();
+  }
+  return internal_insert(iter, *value);
+}
+
+template <typename P>
+typename btree<P>::iterator
+btree<P>::insert_multi(iterator position, const value_type &v) {
+  if (!empty()) {
+    const key_type &key = params_type::key(v);
+    if (position == end() || !compare_keys(position.key(), key)) {
+      iterator prev = position;
+      if (position == begin() || !compare_keys(key, (--prev).key())) {
+        // prev.key() <= key <= position.key()
+        return internal_insert(position, v);
+      }
+    } else {
+      iterator next = position;
+      ++next;
+      if (next == end() || !compare_keys(next.key(), key)) {
+        // position.key() < key <= next.key()
+        return internal_insert(next, v);
+      }
+    }
+  }
+  return insert_multi(v);
+}
+
+template <typename P> template <typename InputIterator>
+void btree<P>::insert_multi(InputIterator b, InputIterator e) {
+  for (; b != e; ++b) {
+    insert_multi(end(), *b);
+  }
+}
+
+template <typename P>
+void btree<P>::assign(const self_type &x) {
+  clear();
+
+  *mutable_key_comp() = x.key_comp();
+  *mutable_internal_allocator() = x.internal_allocator();
+
+  // Assignment can avoid key comparisons because we know the order of the
+  // values is the same order we'll store them in.
+  for (const_iterator iter = x.begin(); iter != x.end(); ++iter) {
+    if (empty()) {
+      insert_multi(*iter);
+    } else {
+      // If the btree is not empty, we can just insert the new value at the end
+      // of the tree!
+      internal_insert(end(), *iter);
+    }
+  }
+}
+
+template <typename P>
+typename btree<P>::iterator btree<P>::erase(iterator iter) {
+  bool internal_delete = false;
+  if (!iter.node->leaf()) {
+    // Deletion of a value on an internal node. Swap the key with the largest
+    // value of our left child. This is easy, we just decrement iter.
+    iterator tmp_iter(iter--);
+    assert(iter.node->leaf());
+    assert(!compare_keys(tmp_iter.key(), iter.key()));
+    iter.node->value_swap(iter.position, tmp_iter.node, tmp_iter.position);
+    internal_delete = true;
+    --*mutable_size();
+  } else if (!root()->leaf()) {
+    --*mutable_size();
+  }
+
+  // Delete the key from the leaf.
+  iter.node->remove_value(iter.position);
+
+  // We want to return the next value after the one we just erased. If we
+  // erased from an internal node (internal_delete == true), then the next
+  // value is ++(++iter). If we erased from a leaf node (internal_delete ==
+  // false) then the next value is ++iter. Note that ++iter may point to an
+  // internal node and the value in the internal node may move to a leaf node
+  // (iter.node) when rebalancing is performed at the leaf level.
+
+  // Merge/rebalance as we walk back up the tree.
+  iterator res(iter);
+  for (;;) {
+    if (iter.node == root()) {
+      try_shrink();
+      if (empty()) {
+        return end();
+      }
+      break;
+    }
+    if (iter.node->count() >= kMinNodeValues) {
+      break;
+    }
+    bool merged = try_merge_or_rebalance(&iter);
+    if (iter.node->leaf()) {
+      res = iter;
+    }
+    if (!merged) {
+      break;
+    }
+    iter.node = iter.node->parent();
+  }
+
+  // Adjust our return value. If we're pointing at the end of a node, advance
+  // the iterator.
+  if (res.position == res.node->count()) {
+    res.position = res.node->count() - 1;
+    ++res;
+  }
+  // If we erased from an internal node, advance the iterator.
+  if (internal_delete) {
+    ++res;
+  }
+  return res;
+}
+
+template <typename P>
+int btree<P>::erase(iterator b, iterator e) {
+  int count = distance(b, e);
+  for (int i = 0; i < count; i++) {
+    b = erase(b);
+  }
+  return count;
+}
+
+template <typename P>
+int btree<P>::erase_unique(const key_type &key) {
+  iterator iter = internal_find_unique(key, iterator(root(), 0));
+  if (!iter.node) {
+    // The key doesn't exist in the tree, return nothing done.
+    return 0;
+  }
+  erase(iter);
+  return 1;
+}
+
+template <typename P>
+int btree<P>::erase_multi(const key_type &key) {
+  iterator b = internal_lower_bound(key, iterator(root(), 0));
+  if (!b.node) {
+    // The key doesn't exist in the tree, return nothing done.
+    return 0;
+  }
+  // Delete all of the keys between begin and upper_bound(key).
+  iterator e = internal_end(
+      internal_upper_bound(key, iterator(root(), 0)));
+  return erase(b, e);
+}
+
+template <typename P>
+void btree<P>::clear() {
+  if (root() != NULL) {
+    internal_clear(root());
+  }
+  *mutable_root() = NULL;
+}
+
+template <typename P>
+void btree<P>::swap(self_type &x) {
+  std::swap(static_cast<key_compare&>(*this), static_cast<key_compare&>(x));
+  std::swap(root_, x.root_);
+}
+
+template <typename P>
+void btree<P>::verify() const {
+  if (root() != NULL) {
+    assert(size() == internal_verify(root(), NULL, NULL));
+    assert(leftmost() == (++const_iterator(root(), -1)).node);
+    assert(rightmost() == (--const_iterator(root(), root()->count())).node);
+    assert(leftmost()->leaf());
+    assert(rightmost()->leaf());
+  } else {
+    assert(size() == 0);
+    assert(leftmost() == NULL);
+    assert(rightmost() == NULL);
+  }
+}
+
+template <typename P>
+void btree<P>::rebalance_or_split(iterator *iter) {
+  node_type *&node = iter->node;
+  int &insert_position = iter->position;
+  assert(node->count() == node->max_count());
+
+  // First try to make room on the node by rebalancing.
+  node_type *parent = node->parent();
+  if (node != root()) {
+    if (node->position() > 0) {
+      // Try rebalancing with our left sibling.
+      node_type *left = parent->child(node->position() - 1);
+      if (left->count() < left->max_count()) {
+        // We bias rebalancing based on the position being inserted. If we're
+        // inserting at the end of the right node then we bias rebalancing to
+        // fill up the left node.
+        int to_move = (left->max_count() - left->count()) /
+            (1 + (insert_position < left->max_count()));
+        to_move = std::max(1, to_move);
+
+        if (((insert_position - to_move) >= 0) ||
+            ((left->count() + to_move) < left->max_count())) {
+          left->rebalance_right_to_left(node, to_move);
+
+          assert(node->max_count() - node->count() == to_move);
+          insert_position = insert_position - to_move;
+          if (insert_position < 0) {
+            insert_position = insert_position + left->count() + 1;
+            node = left;
+          }
+
+          assert(node->count() < node->max_count());
+          return;
+        }
+      }
+    }
+
+    if (node->position() < parent->count()) {
+      // Try rebalancing with our right sibling.
+      node_type *right = parent->child(node->position() + 1);
+      if (right->count() < right->max_count()) {
+        // We bias rebalancing based on the position being inserted. If we're
+        // inserting at the beginning of the left node then we bias rebalancing
+        // to fill up the right node.
+        int to_move = (right->max_count() - right->count()) /
+            (1 + (insert_position > 0));
+        to_move = std::max(1, to_move);
+
+        if ((insert_position <= (node->count() - to_move)) ||
+            ((right->count() + to_move) < right->max_count())) {
+          node->rebalance_left_to_right(right, to_move);
+
+          if (insert_position > node->count()) {
+            insert_position = insert_position - node->count() - 1;
+            node = right;
+          }
+
+          assert(node->count() < node->max_count());
+          return;
+        }
+      }
+    }
+
+    // Rebalancing failed, make sure there is room on the parent node for a new
+    // value.
+    if (parent->count() == parent->max_count()) {
+      iterator parent_iter(node->parent(), node->position());
+      rebalance_or_split(&parent_iter);
+    }
+  } else {
+    // Rebalancing not possible because this is the root node.
+    if (root()->leaf()) {
+      // The root node is currently a leaf node: create a new root node and set
+      // the current root node as the child of the new root.
+      parent = new_internal_root_node();
+      parent->set_child(0, root());
+      *mutable_root() = parent;
+      assert(*mutable_rightmost() == parent->child(0));
+    } else {
+      // The root node is an internal node. We do not want to create a new root
+      // node because the root node is special and holds the size of the tree
+      // and a pointer to the rightmost node. So we create a new internal node
+      // and move all of the items on the current root into the new node.
+      parent = new_internal_node(parent);
+      parent->set_child(0, parent);
+      parent->swap(root());
+      node = parent;
+    }
+  }
+
+  // Split the node.
+  node_type *split_node;
+  if (node->leaf()) {
+    split_node = new_leaf_node(parent);
+    node->split(split_node, insert_position);
+    if (rightmost() == node) {
+      *mutable_rightmost() = split_node;
+    }
+  } else {
+    split_node = new_internal_node(parent);
+    node->split(split_node, insert_position);
+  }
+
+  if (insert_position > node->count()) {
+    insert_position = insert_position - node->count() - 1;
+    node = split_node;
+  }
+}
+
+template <typename P>
+void btree<P>::merge_nodes(node_type *left, node_type *right) {
+  left->merge(right);
+  if (right->leaf()) {
+    if (rightmost() == right) {
+      *mutable_rightmost() = left;
+    }
+    delete_leaf_node(right);
+  } else {
+    delete_internal_node(right);
+  }
+}
+
+template <typename P>
+bool btree<P>::try_merge_or_rebalance(iterator *iter) {
+  node_type *parent = iter->node->parent();
+  if (iter->node->position() > 0) {
+    // Try merging with our left sibling.
+    node_type *left = parent->child(iter->node->position() - 1);
+    if ((1 + left->count() + iter->node->count()) <= left->max_count()) {
+      iter->position += 1 + left->count();
+      merge_nodes(left, iter->node);
+      iter->node = left;
+      return true;
+    }
+  }
+  if (iter->node->position() < parent->count()) {
+    // Try merging with our right sibling.
+    node_type *right = parent->child(iter->node->position() + 1);
+    if ((1 + iter->node->count() + right->count()) <= right->max_count()) {
+      merge_nodes(iter->node, right);
+      return true;
+    }
+    // Try rebalancing with our right sibling. We don't perform rebalancing if
+    // we deleted the first element from iter->node and the node is not
+    // empty. This is a small optimization for the common pattern of deleting
+    // from the front of the tree.
+    if ((right->count() > kMinNodeValues) &&
+        ((iter->node->count() == 0) ||
+         (iter->position > 0))) {
+      int to_move = (right->count() - iter->node->count()) / 2;
+      to_move = std::min(to_move, right->count() - 1);
+      iter->node->rebalance_right_to_left(right, to_move);
+      return false;
+    }
+  }
+  if (iter->node->position() > 0) {
+    // Try rebalancing with our left sibling. We don't perform rebalancing if
+    // we deleted the last element from iter->node and the node is not
+    // empty. This is a small optimization for the common pattern of deleting
+    // from the back of the tree.
+    node_type *left = parent->child(iter->node->position() - 1);
+    if ((left->count() > kMinNodeValues) &&
+        ((iter->node->count() == 0) ||
+         (iter->position < iter->node->count()))) {
+      int to_move = (left->count() - iter->node->count()) / 2;
+      to_move = std::min(to_move, left->count() - 1);
+      left->rebalance_left_to_right(iter->node, to_move);
+      iter->position += to_move;
+      return false;
+    }
+  }
+  return false;
+}
+
+template <typename P>
+void btree<P>::try_shrink() {
+  if (root()->count() > 0) {
+    return;
+  }
+  // Deleted the last item on the root node, shrink the height of the tree.
+  if (root()->leaf()) {
+    assert(size() == 0);
+    delete_leaf_node(root());
+    *mutable_root() = NULL;
+  } else {
+    node_type *child = root()->child(0);
+    if (child->leaf()) {
+      // The child is a leaf node so simply make it the root node in the tree.
+      child->make_root();
+      delete_internal_root_node();
+      *mutable_root() = child;
+    } else {
+      // The child is an internal node. We want to keep the existing root node
+      // so we move all of the values from the child node into the existing
+      // (empty) root node.
+      child->swap(root());
+      delete_internal_node(child);
+    }
+  }
+}
+
+template <typename P> template <typename IterType>
+inline IterType btree<P>::internal_last(IterType iter) {
+  while (iter.node && iter.position == iter.node->count()) {
+    iter.position = iter.node->position();
+    iter.node = iter.node->parent();
+    if (iter.node->leaf()) {
+      iter.node = NULL;
+    }
+  }
+  return iter;
+}
+
+template <typename P>
+inline typename btree<P>::iterator
+btree<P>::internal_insert(iterator iter, const value_type &v) {
+  if (!iter.node->leaf()) {
+    // We can't insert on an internal node. Instead, we'll insert after the
+    // previous value which is guaranteed to be on a leaf node.
+    --iter;
+    ++iter.position;
+  }
+  if (iter.node->count() == iter.node->max_count()) {
+    // Make room in the leaf for the new item.
+    if (iter.node->max_count() < kNodeValues) {
+      // Insertion into the root where the root is smaller that the full node
+      // size. Simply grow the size of the root node.
+      assert(iter.node == root());
+      iter.node = new_leaf_root_node(
+          std::min<int>(kNodeValues, 2 * iter.node->max_count()));
+      iter.node->swap(root());
+      delete_leaf_node(root());
+      *mutable_root() = iter.node;
+    } else {
+      rebalance_or_split(&iter);
+      ++*mutable_size();
+    }
+  } else if (!root()->leaf()) {
+    ++*mutable_size();
+  }
+  iter.node->insert_value(iter.position, v);
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate(
+    const key_type &key, IterType iter) const {
+  return internal_locate_type::dispatch(key, *this, iter);
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate_plain_compare(
+    const key_type &key, IterType iter) const {
+  for (;;) {
+    iter.position = iter.node->lower_bound(key, key_comp());
+    if (iter.node->leaf()) {
+      break;
+    }
+    iter.node = iter.node->child(iter.position);
+  }
+  return std::make_pair(iter, 0);
+}
+
+template <typename P> template <typename IterType>
+inline std::pair<IterType, int> btree<P>::internal_locate_compare_to(
+    const key_type &key, IterType iter) const {
+  for (;;) {
+    int res = iter.node->lower_bound(key, key_comp());
+    iter.position = res & kMatchMask;
+    if (res & kExactMatch) {
+      return std::make_pair(iter, static_cast<int>(kExactMatch));
+    }
+    if (iter.node->leaf()) {
+      break;
+    }
+    iter.node = iter.node->child(iter.position);
+  }
+  return std::make_pair(iter, -kExactMatch);
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_lower_bound(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    for (;;) {
+      iter.position =
+          iter.node->lower_bound(key, key_comp()) & kMatchMask;
+      if (iter.node->leaf()) {
+        break;
+      }
+      iter.node = iter.node->child(iter.position);
+    }
+    iter = internal_last(iter);
+  }
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_upper_bound(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    for (;;) {
+      iter.position = iter.node->upper_bound(key, key_comp());
+      if (iter.node->leaf()) {
+        break;
+      }
+      iter.node = iter.node->child(iter.position);
+    }
+    iter = internal_last(iter);
+  }
+  return iter;
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_find_unique(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    std::pair<IterType, int> res = internal_locate(key, iter);
+    if (res.second == kExactMatch) {
+      return res.first;
+    }
+    if (!res.second) {
+      iter = internal_last(res.first);
+      if (iter.node && !compare_keys(key, iter.key())) {
+        return iter;
+      }
+    }
+  }
+  return IterType(NULL, 0);
+}
+
+template <typename P> template <typename IterType>
+IterType btree<P>::internal_find_multi(
+    const key_type &key, IterType iter) const {
+  if (iter.node) {
+    iter = internal_lower_bound(key, iter);
+    if (iter.node) {
+      iter = internal_last(iter);
+      if (iter.node && !compare_keys(key, iter.key())) {
+        return iter;
+      }
+    }
+  }
+  return IterType(NULL, 0);
+}
+
+template <typename P>
+void btree<P>::internal_clear(node_type *node) {
+  if (!node->leaf()) {
+    for (int i = 0; i <= node->count(); ++i) {
+      internal_clear(node->child(i));
+    }
+    if (node == root()) {
+      delete_internal_root_node();
+    } else {
+      delete_internal_node(node);
+    }
+  } else {
+    delete_leaf_node(node);
+  }
+}
+
+template <typename P>
+void btree<P>::internal_dump(
+    std::ostream &os, const node_type *node, int level) const {
+  for (int i = 0; i < node->count(); ++i) {
+    if (!node->leaf()) {
+      internal_dump(os, node->child(i), level + 1);
+    }
+    for (int j = 0; j < level; ++j) {
+      os << "  ";
+    }
+    os << node->key(i) << " [" << level << "]\n";
+  }
+  if (!node->leaf()) {
+    internal_dump(os, node->child(node->count()), level + 1);
+  }
+}
+
+template <typename P>
+int btree<P>::internal_verify(
+    const node_type *node, const key_type *lo, const key_type *hi) const {
+  assert(node->count() > 0);
+  assert(node->count() <= node->max_count());
+  if (lo) {
+    assert(!compare_keys(node->key(0), *lo));
+  }
+  if (hi) {
+    assert(!compare_keys(*hi, node->key(node->count() - 1)));
+  }
+  for (int i = 1; i < node->count(); ++i) {
+    assert(!compare_keys(node->key(i), node->key(i - 1)));
+  }
+  int count = node->count();
+  if (!node->leaf()) {
+    for (int i = 0; i <= node->count(); ++i) {
+      assert(node->child(i) != NULL);
+      assert(node->child(i)->parent() == node);
+      assert(node->child(i)->position() == i);
+      count += internal_verify(
+          node->child(i),
+          (i == 0) ? lo : &node->key(i - 1),
+          (i == node->count()) ? hi : &node->key(i));
+    }
+  }
+  return count;
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_H__
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/btree_bench.cc b/third-party/xdelta3/xdelta3/cpp-btree/btree_bench.cc
new file mode 100644
index 0000000000..6eaed99470
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/btree_bench.cc
@@ -0,0 +1,593 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
+#include <string>
+#include <sys/time.h>
+#include <type_traits>
+#include <vector>
+
+#include "gflags/gflags.h"
+#include "btree_map.h"
+#include "btree_set.h"
+#include "btree_test.h"
+
+DEFINE_int32(test_random_seed, 123456789, "Seed for srand()");
+DEFINE_int32(benchmark_max_iters, 10000000, "Maximum test iterations");
+DEFINE_int32(benchmark_min_iters, 100, "Minimum test iterations");
+DEFINE_int32(benchmark_target_seconds, 1,
+	     "Attempt to benchmark for this many seconds");
+
+using std::allocator;
+using std::less;
+using std::map;
+using std::max;
+using std::min;
+using std::multimap;
+using std::multiset;
+using std::set;
+using std::string;
+using std::vector;
+
+namespace btree {
+namespace {
+
+struct RandGen {
+  typedef ptrdiff_t result_type;
+  RandGen(result_type seed) {
+    srand(seed);
+  }
+  result_type operator()(result_type l) {
+    return rand() % l;
+  }
+};
+
+struct BenchmarkRun {
+  BenchmarkRun(const char *name, void (*func)(int));
+  void Run();
+  void Stop();
+  void Start();
+  void Reset();
+
+  BenchmarkRun *next_benchmark;
+  const char *benchmark_name;
+  void (*benchmark_func)(int);
+  int64_t accum_micros;
+  int64_t last_started;
+};
+
+BenchmarkRun *first_benchmark;
+BenchmarkRun *current_benchmark;
+
+int64_t get_micros () {
+  timeval tv;
+  gettimeofday(&tv, NULL);
+  return tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+BenchmarkRun::BenchmarkRun(const char *name, void (*func)(int))
+  : next_benchmark(first_benchmark),
+    benchmark_name(name),
+    benchmark_func(func),
+    accum_micros(0),
+    last_started(0) {
+  first_benchmark = this;
+}
+
+#define BTREE_BENCHMARK(name) \
+  BTREE_BENCHMARK2(#name, name, __COUNTER__)
+#define BTREE_BENCHMARK2(name, func, counter)	\
+  BTREE_BENCHMARK3(name, func, counter)
+#define BTREE_BENCHMARK3(name, func, counter)	\
+  BenchmarkRun bench ## counter (name, func)
+
+void StopBenchmarkTiming() {
+  current_benchmark->Stop();
+}
+
+void StartBenchmarkTiming() {
+  current_benchmark->Start();
+}
+
+void RunBenchmarks() {
+  for (BenchmarkRun *bench = first_benchmark; bench; 
+       bench = bench->next_benchmark) {
+    bench->Run();
+  }
+}
+
+void BenchmarkRun::Start() {
+  assert(!last_started);
+  last_started = get_micros();
+}
+
+void BenchmarkRun::Stop() {
+  if (last_started == 0) {
+    return;
+  }
+  accum_micros += get_micros() - last_started;
+  last_started = 0;
+}
+
+void BenchmarkRun::Reset() {
+  last_started = 0;
+  accum_micros = 0;
+}
+
+void BenchmarkRun::Run() {
+  assert(current_benchmark == NULL);
+  current_benchmark = this;
+  int iters = FLAGS_benchmark_min_iters;
+  for (;;) {
+    Reset();
+    Start();
+    benchmark_func(iters);
+    Stop();
+    if (accum_micros > FLAGS_benchmark_target_seconds * 1000000 ||
+	iters >= FLAGS_benchmark_max_iters) {
+      break;
+    } else if (accum_micros == 0) {
+      iters *= 100;
+    } else {
+      int64_t target_micros = FLAGS_benchmark_target_seconds * 1000000;
+      iters = target_micros * iters / accum_micros;
+    }
+    iters = min(iters, FLAGS_benchmark_max_iters);
+  }
+  std::cout << benchmark_name << "\t"
+	    << accum_micros * 1000 / iters << "\t"
+	    << iters;
+  current_benchmark = NULL;
+}
+
+// Used to avoid compiler optimizations for these benchmarks.
+template <typename T>
+void sink(const T& t0) {
+  volatile T t = t0;
+}
+
+// Benchmark insertion of values into a container.
+template <typename T>
+void BM_Insert(int n) {
+  typedef typename std::remove_const<typename T::value_type>::type V;
+  typename KeyOfValue<typename T::key_type, V>::type key_of_value;
+
+  // Disable timing while we perform some initialization.
+  StopBenchmarkTiming();
+
+  T container;
+  vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
+  for (int i = 0; i < values.size(); i++) {
+    container.insert(values[i]);
+  }
+
+  for (int i = 0; i < n; ) {
+    // Remove and re-insert 10% of the keys
+    int m = min(n - i, FLAGS_benchmark_values / 10);
+
+    for (int j = i; j < i + m; j++) {
+      int x = j % FLAGS_benchmark_values;
+      container.erase(key_of_value(values[x]));
+    }
+
+    StartBenchmarkTiming();
+
+    for (int j = i; j < i + m; j++) {
+      int x = j % FLAGS_benchmark_values;
+      container.insert(values[x]);
+    }
+
+    StopBenchmarkTiming();
+
+    i += m;
+  }
+}
+
+// Benchmark lookup of values in a container.
+template <typename T>
+void BM_Lookup(int n) {
+  typedef typename std::remove_const<typename T::value_type>::type V;
+  typename KeyOfValue<typename T::key_type, V>::type key_of_value;
+
+  // Disable timing while we perform some initialization.
+  StopBenchmarkTiming();
+
+  T container;
+  vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
+
+  for (int i = 0; i < values.size(); i++) {
+    container.insert(values[i]);
+  }
+
+  V r = V();
+
+  StartBenchmarkTiming();
+
+  for (int i = 0; i < n; i++) {
+    int m = i % values.size();
+    r = *container.find(key_of_value(values[m]));
+  }
+
+  StopBenchmarkTiming();
+
+  sink(r); // Keep compiler from optimizing away r.
+}
+
+// Benchmark lookup of values in a full container, meaning that values
+// are inserted in-order to take advantage of biased insertion, which
+// yields a full tree.
+template <typename T>
+void BM_FullLookup(int n) {
+  typedef typename std::remove_const<typename T::value_type>::type V;
+  typename KeyOfValue<typename T::key_type, V>::type key_of_value;
+
+  // Disable timing while we perform some initialization.
+  StopBenchmarkTiming();
+
+  T container;
+  vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
+  vector<V> sorted(values);
+  sort(sorted.begin(), sorted.end());
+
+  for (int i = 0; i < sorted.size(); i++) {
+    container.insert(sorted[i]);
+  }
+
+  V r = V();
+
+  StartBenchmarkTiming();
+
+  for (int i = 0; i < n; i++) {
+    int m = i % values.size();
+    r = *container.find(key_of_value(values[m]));
+  }
+
+  StopBenchmarkTiming();
+
+  sink(r); // Keep compiler from optimizing away r.
+}
+
+// Benchmark deletion of values from a container.
+template <typename T>
+void BM_Delete(int n) {
+  typedef typename std::remove_const<typename T::value_type>::type V;
+  typename KeyOfValue<typename T::key_type, V>::type key_of_value;
+
+  // Disable timing while we perform some initialization.
+  StopBenchmarkTiming();
+
+  T container;
+  vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
+  for (int i = 0; i < values.size(); i++) {
+    container.insert(values[i]);
+  }
+
+  for (int i = 0; i < n; ) {
+    // Remove and re-insert 10% of the keys
+    int m = min(n - i, FLAGS_benchmark_values / 10);
+
+    StartBenchmarkTiming();
+
+    for (int j = i; j < i + m; j++) {
+      int x = j % FLAGS_benchmark_values;
+      container.erase(key_of_value(values[x]));
+    }
+
+    StopBenchmarkTiming();
+
+    for (int j = i; j < i + m; j++) {
+      int x = j % FLAGS_benchmark_values;
+      container.insert(values[x]);
+    }
+
+    i += m;
+  }
+}
+
+// Benchmark steady-state insert (into first half of range) and remove
+// (from second second half of range), treating the container
+// approximately like a queue with log-time access for all elements.
+// This benchmark does not test the case where insertion and removal
+// happen in the same region of the tree.  This benchmark counts two
+// value constructors.
+template <typename T>
+void BM_QueueAddRem(int n) {
+  typedef typename std::remove_const<typename T::value_type>::type V;
+  typename KeyOfValue<typename T::key_type, V>::type key_of_value;
+
+  // Disable timing while we perform some initialization.
+  StopBenchmarkTiming();
+  assert(FLAGS_benchmark_values % 2 == 0);
+
+  T container;
+
+  const int half = FLAGS_benchmark_values / 2;
+  vector<int> remove_keys(half);
+  vector<int> add_keys(half);
+
+  for (int i = 0; i < half; i++) {
+    remove_keys[i] = i;
+    add_keys[i] = i;
+  }
+
+  RandGen rand(FLAGS_test_random_seed);
+
+  random_shuffle(remove_keys.begin(), remove_keys.end(), rand);
+  random_shuffle(add_keys.begin(), add_keys.end(), rand);
+
+  Generator<V> g(FLAGS_benchmark_values + FLAGS_benchmark_max_iters);
+
+  for (int i = 0; i < half; i++) {
+    container.insert(g(add_keys[i]));
+    container.insert(g(half + remove_keys[i]));
+  }
+
+  // There are three parts each of size "half":
+  // 1 is being deleted from  [offset - half, offset)
+  // 2 is standing            [offset, offset + half)
+  // 3 is being inserted into [offset + half, offset + 2 * half)
+  int offset = 0;
+
+  StartBenchmarkTiming();
+
+  for (int i = 0; i < n; i++) {
+    int idx = i % half;
+
+    if (idx == 0) {
+      StopBenchmarkTiming();
+      random_shuffle(remove_keys.begin(), remove_keys.end(), rand);
+      random_shuffle(add_keys.begin(), add_keys.end(), rand);
+      offset += half;
+      StartBenchmarkTiming();
+    }
+
+    int e = container.erase(key_of_value(g(offset - half + remove_keys[idx])));
+    assert(e == 1);
+    container.insert(g(offset + half + add_keys[idx]));
+  }
+
+  StopBenchmarkTiming();
+}
+
+// Mixed insertion and deletion in the same range using pre-constructed values.
+template <typename T>
+void BM_MixedAddRem(int n) {
+  typedef typename std::remove_const<typename T::value_type>::type V;
+  typename KeyOfValue<typename T::key_type, V>::type key_of_value;
+
+  // Disable timing while we perform some initialization.
+  StopBenchmarkTiming();
+  assert(FLAGS_benchmark_values % 2 == 0);
+
+  T container;
+  RandGen rand(FLAGS_test_random_seed);
+
+  vector<V> values = GenerateValues<V>(FLAGS_benchmark_values * 2);
+
+  // Create two random shuffles
+  vector<int> remove_keys(FLAGS_benchmark_values);
+  vector<int> add_keys(FLAGS_benchmark_values);
+
+  // Insert the first half of the values (already in random order)
+  for (int i = 0; i < FLAGS_benchmark_values; i++) {
+    container.insert(values[i]);
+
+    // remove_keys and add_keys will be swapped before each round,
+    // therefore fill add_keys here w/ the keys being inserted, so
+    // they'll be the first to be removed.
+    remove_keys[i] = i + FLAGS_benchmark_values;
+    add_keys[i] = i;
+  }
+
+  StartBenchmarkTiming();
+
+  for (int i = 0; i < n; i++) {
+    int idx = i % FLAGS_benchmark_values;
+
+    if (idx == 0) {
+      StopBenchmarkTiming();
+      remove_keys.swap(add_keys);
+      random_shuffle(remove_keys.begin(), remove_keys.end(), rand);
+      random_shuffle(add_keys.begin(), add_keys.end(), rand);
+      StartBenchmarkTiming();
+    }
+
+    int e = container.erase(key_of_value(values[remove_keys[idx]]));
+    assert(e == 1);
+    container.insert(values[add_keys[idx]]);
+  }
+
+  StopBenchmarkTiming();
+}
+
+// Insertion at end, removal from the beginning.  This benchmark
+// counts two value constructors.
+template <typename T>
+void BM_Fifo(int n) {
+  typedef typename std::remove_const<typename T::value_type>::type V;
+
+  // Disable timing while we perform some initialization.
+  StopBenchmarkTiming();
+
+  T container;
+  Generator<V> g(FLAGS_benchmark_values + FLAGS_benchmark_max_iters);
+
+  for (int i = 0; i < FLAGS_benchmark_values; i++) {
+    container.insert(g(i));
+  }
+
+  StartBenchmarkTiming();
+
+  for (int i = 0; i < n; i++) {
+    container.erase(container.begin());
+    container.insert(container.end(), g(i + FLAGS_benchmark_values));
+  }
+
+  StopBenchmarkTiming();
+}
+
+// Iteration (forward) through the tree
+template <typename T>
+void BM_FwdIter(int n) {
+  typedef typename std::remove_const<typename T::value_type>::type V;
+
+  // Disable timing while we perform some initialization.
+  StopBenchmarkTiming();
+
+  T container;
+  vector<V> values = GenerateValues<V>(FLAGS_benchmark_values);
+
+  for (int i = 0; i < FLAGS_benchmark_values; i++) {
+    container.insert(values[i]);
+  }
+
+  typename T::iterator iter;
+
+  V r = V();
+
+  StartBenchmarkTiming();
+
+  for (int i = 0; i < n; i++) {
+    int idx = i % FLAGS_benchmark_values;
+
+    if (idx == 0) {
+      iter = container.begin();
+    }
+    r = *iter;
+    ++iter;
+  }
+
+  StopBenchmarkTiming();
+
+  sink(r); // Keep compiler from optimizing away r.
+}
+
+typedef set<int32_t> stl_set_int32;
+typedef set<int64_t> stl_set_int64;
+typedef set<string> stl_set_string;
+
+typedef map<int32_t, intptr_t> stl_map_int32;
+typedef map<int64_t, intptr_t> stl_map_int64;
+typedef map<string, intptr_t> stl_map_string;
+
+typedef multiset<int32_t> stl_multiset_int32;
+typedef multiset<int64_t> stl_multiset_int64;
+typedef multiset<string> stl_multiset_string;
+
+typedef multimap<int32_t, intptr_t> stl_multimap_int32;
+typedef multimap<int64_t, intptr_t> stl_multimap_int64;
+typedef multimap<string, intptr_t> stl_multimap_string;
+
+#define MY_BENCHMARK_TYPES2(value, name, size)                                \
+  typedef btree ## _set<value, less<value>, allocator<value>, size>           \
+    btree ## _ ## size ## _set_ ## name;                                      \
+  typedef btree ## _map<value, int, less<value>, allocator<value>, size>      \
+    btree ## _ ## size ## _map_ ## name;                                      \
+  typedef btree ## _multiset<value, less<value>, allocator<value>, size>      \
+    btree ## _ ## size ## _multiset_ ## name;                                 \
+  typedef btree ## _multimap<value, int, less<value>, allocator<value>, size> \
+    btree ## _ ## size ## _multimap_ ## name
+
+#define MY_BENCHMARK_TYPES(value, name)  \
+  MY_BENCHMARK_TYPES2(value, name, 128); \
+  MY_BENCHMARK_TYPES2(value, name, 160); \
+  MY_BENCHMARK_TYPES2(value, name, 192); \
+  MY_BENCHMARK_TYPES2(value, name, 224); \
+  MY_BENCHMARK_TYPES2(value, name, 256); \
+  MY_BENCHMARK_TYPES2(value, name, 288); \
+  MY_BENCHMARK_TYPES2(value, name, 320); \
+  MY_BENCHMARK_TYPES2(value, name, 352); \
+  MY_BENCHMARK_TYPES2(value, name, 384); \
+  MY_BENCHMARK_TYPES2(value, name, 416); \
+  MY_BENCHMARK_TYPES2(value, name, 448); \
+  MY_BENCHMARK_TYPES2(value, name, 480); \
+  MY_BENCHMARK_TYPES2(value, name, 512); \
+  MY_BENCHMARK_TYPES2(value, name, 1024); \
+  MY_BENCHMARK_TYPES2(value, name, 1536); \
+  MY_BENCHMARK_TYPES2(value, name, 2048)
+
+MY_BENCHMARK_TYPES(int32_t, int32);
+MY_BENCHMARK_TYPES(int64_t, int64);
+MY_BENCHMARK_TYPES(string, string);
+
+#define MY_BENCHMARK4(type, name, func)                            \
+  void BM_ ## type ## _ ## name(int n) { BM_ ## func <type>(n); }  \
+  BTREE_BENCHMARK(BM_ ## type ## _ ## name)
+
+// Define NODESIZE_TESTING when running btree_perf.py.
+
+#ifdef NODESIZE_TESTING
+#define MY_BENCHMARK3(tree, type, name, func) \
+  MY_BENCHMARK4(tree ## _128_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _160_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _192_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _224_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _256_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _288_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _320_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _352_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _384_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _416_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _448_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _480_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _512_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _1024_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _1536_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _2048_ ## type, name, func)
+#else
+#define MY_BENCHMARK3(tree, type, name, func) \
+  MY_BENCHMARK4(tree ## _256_ ## type, name, func); \
+  MY_BENCHMARK4(tree ## _2048_ ## type, name, func)
+#endif
+
+#define MY_BENCHMARK2(type, name, func)    \
+  MY_BENCHMARK4(stl_ ## type, name, func); \
+  MY_BENCHMARK3(btree, type, name, func)
+
+#define MY_BENCHMARK(type)                        \
+  MY_BENCHMARK2(type, insert, Insert);            \
+  MY_BENCHMARK2(type, lookup, Lookup);            \
+  MY_BENCHMARK2(type, fulllookup, FullLookup);    \
+  MY_BENCHMARK2(type, delete, Delete);            \
+  MY_BENCHMARK2(type, queueaddrem, QueueAddRem);  \
+  MY_BENCHMARK2(type, mixedaddrem, MixedAddRem);  \
+  MY_BENCHMARK2(type, fifo, Fifo);                \
+  MY_BENCHMARK2(type, fwditer, FwdIter)
+
+MY_BENCHMARK(set_int32);
+MY_BENCHMARK(map_int32);
+MY_BENCHMARK(set_int64);
+MY_BENCHMARK(map_int64);
+MY_BENCHMARK(set_string);
+MY_BENCHMARK(map_string);
+
+MY_BENCHMARK(multiset_int32);
+MY_BENCHMARK(multimap_int32);
+MY_BENCHMARK(multiset_int64);
+MY_BENCHMARK(multimap_int64);
+MY_BENCHMARK(multiset_string);
+MY_BENCHMARK(multimap_string);
+
+} // namespace
+} // namespace btree
+
+int main(int argc, char **argv) {
+  btree::RunBenchmarks();
+  return 0;
+}
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/btree_container.h b/third-party/xdelta3/xdelta3/cpp-btree/btree_container.h
new file mode 100644
index 0000000000..fb617abe8e
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/btree_container.h
@@ -0,0 +1,349 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef UTIL_BTREE_BTREE_CONTAINER_H__
+#define UTIL_BTREE_BTREE_CONTAINER_H__
+
+#include <iosfwd>
+#include <utility>
+
+#include "btree.h"
+
+namespace btree {
+
+// A common base class for btree_set, btree_map, btree_multiset and
+// btree_multimap.
+template <typename Tree>
+class btree_container {
+  typedef btree_container<Tree> self_type;
+
+ public:
+  typedef typename Tree::params_type params_type;
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::pointer pointer;
+  typedef typename Tree::const_pointer const_pointer;
+  typedef typename Tree::reference reference;
+  typedef typename Tree::const_reference const_reference;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::difference_type difference_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+  typedef typename Tree::reverse_iterator reverse_iterator;
+  typedef typename Tree::const_reverse_iterator const_reverse_iterator;
+
+ public:
+  // Default constructor.
+  btree_container(const key_compare &comp, const allocator_type &alloc)
+      : tree_(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_container(const self_type &x)
+      : tree_(x.tree_) {
+  }
+
+  // Iterator routines.
+  iterator begin() { return tree_.begin(); }
+  const_iterator begin() const { return tree_.begin(); }
+  iterator end() { return tree_.end(); }
+  const_iterator end() const { return tree_.end(); }
+  reverse_iterator rbegin() { return tree_.rbegin(); }
+  const_reverse_iterator rbegin() const { return tree_.rbegin(); }
+  reverse_iterator rend() { return tree_.rend(); }
+  const_reverse_iterator rend() const { return tree_.rend(); }
+
+  // Lookup routines.
+  iterator lower_bound(const key_type &key) {
+    return tree_.lower_bound(key);
+  }
+  const_iterator lower_bound(const key_type &key) const {
+    return tree_.lower_bound(key);
+  }
+  iterator upper_bound(const key_type &key) {
+    return tree_.upper_bound(key);
+  }
+  const_iterator upper_bound(const key_type &key) const {
+    return tree_.upper_bound(key);
+  }
+  std::pair<iterator,iterator> equal_range(const key_type &key) {
+    return tree_.equal_range(key);
+  }
+  std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
+    return tree_.equal_range(key);
+  }
+
+  // Utility routines.
+  void clear() {
+    tree_.clear();
+  }
+  void swap(self_type &x) {
+    tree_.swap(x.tree_);
+  }
+  void dump(std::ostream &os) const {
+    tree_.dump(os);
+  }
+  void verify() const {
+    tree_.verify();
+  }
+
+  // Size routines.
+  size_type size() const { return tree_.size(); }
+  size_type max_size() const { return tree_.max_size(); }
+  bool empty() const { return tree_.empty(); }
+  size_type height() const { return tree_.height(); }
+  size_type internal_nodes() const { return tree_.internal_nodes(); }
+  size_type leaf_nodes() const { return tree_.leaf_nodes(); }
+  size_type nodes() const { return tree_.nodes(); }
+  size_type bytes_used() const { return tree_.bytes_used(); }
+  static double average_bytes_per_value() {
+    return Tree::average_bytes_per_value();
+  }
+  double fullness() const { return tree_.fullness(); }
+  double overhead() const { return tree_.overhead(); }
+
+  bool operator==(const self_type& x) const {
+    if (size() != x.size()) {
+      return false;
+    }
+    for (const_iterator i = begin(), xi = x.begin(); i != end(); ++i, ++xi) {
+      if (*i != *xi) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool operator!=(const self_type& other) const {
+    return !operator==(other);
+  }
+
+
+ protected:
+  Tree tree_;
+};
+
+template <typename T>
+inline std::ostream& operator<<(std::ostream &os, const btree_container<T> &b) {
+  b.dump(os);
+  return os;
+}
+
+// A common base class for btree_set and safe_btree_set.
+template <typename Tree>
+class btree_unique_container : public btree_container<Tree> {
+  typedef btree_unique_container<Tree> self_type;
+  typedef btree_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+
+ public:
+  // Default constructor.
+  btree_unique_container(const key_compare &comp = key_compare(),
+                         const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_unique_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_unique_container(InputIterator b, InputIterator e,
+                         const key_compare &comp = key_compare(),
+                         const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+    insert(b, e);
+  }
+
+  // Lookup routines.
+  iterator find(const key_type &key) {
+    return this->tree_.find_unique(key);
+  }
+  const_iterator find(const key_type &key) const {
+    return this->tree_.find_unique(key);
+  }
+  size_type count(const key_type &key) const {
+    return this->tree_.count_unique(key);
+  }
+
+  // Insertion routines.
+  std::pair<iterator,bool> insert(const value_type &x) {
+    return this->tree_.insert_unique(x);
+  }
+  iterator insert(iterator position, const value_type &x) {
+    return this->tree_.insert_unique(position, x);
+  }
+  template <typename InputIterator>
+  void insert(InputIterator b, InputIterator e) {
+    this->tree_.insert_unique(b, e);
+  }
+
+  // Deletion routines.
+  int erase(const key_type &key) {
+    return this->tree_.erase_unique(key);
+  }
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(const iterator &iter) {
+    return this->tree_.erase(iter);
+  }
+  void erase(const iterator &first, const iterator &last) {
+    this->tree_.erase(first, last);
+  }
+};
+
+// A common base class for btree_map and safe_btree_map.
+template <typename Tree>
+class btree_map_container : public btree_unique_container<Tree> {
+  typedef btree_map_container<Tree> self_type;
+  typedef btree_unique_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::data_type data_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::mapped_type mapped_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+
+ private:
+  // A pointer-like object which only generates its value when
+  // dereferenced. Used by operator[] to avoid constructing an empty data_type
+  // if the key already exists in the map.
+  struct generate_value {
+    generate_value(const key_type &k)
+        : key(k) {
+    }
+    value_type operator*() const {
+      return std::make_pair(key, data_type());
+    }
+    const key_type &key;
+  };
+
+ public:
+  // Default constructor.
+  btree_map_container(const key_compare &comp = key_compare(),
+                      const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_map_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_map_container(InputIterator b, InputIterator e,
+                      const key_compare &comp = key_compare(),
+                      const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+
+  // Insertion routines.
+  data_type& operator[](const key_type &key) {
+    return this->tree_.insert_unique(key, generate_value(key)).first->second;
+  }
+};
+
+// A common base class for btree_multiset and btree_multimap.
+template <typename Tree>
+class btree_multi_container : public btree_container<Tree> {
+  typedef btree_multi_container<Tree> self_type;
+  typedef btree_container<Tree> super_type;
+
+ public:
+  typedef typename Tree::key_type key_type;
+  typedef typename Tree::value_type value_type;
+  typedef typename Tree::size_type size_type;
+  typedef typename Tree::key_compare key_compare;
+  typedef typename Tree::allocator_type allocator_type;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+
+ public:
+  // Default constructor.
+  btree_multi_container(const key_compare &comp = key_compare(),
+                        const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_multi_container(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_multi_container(InputIterator b, InputIterator e,
+                        const key_compare &comp = key_compare(),
+                        const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+    insert(b, e);
+  }
+
+  // Lookup routines.
+  iterator find(const key_type &key) {
+    return this->tree_.find_multi(key);
+  }
+  const_iterator find(const key_type &key) const {
+    return this->tree_.find_multi(key);
+  }
+  size_type count(const key_type &key) const {
+    return this->tree_.count_multi(key);
+  }
+
+  // Insertion routines.
+  iterator insert(const value_type &x) {
+    return this->tree_.insert_multi(x);
+  }
+  iterator insert(iterator position, const value_type &x) {
+    return this->tree_.insert_multi(position, x);
+  }
+  template <typename InputIterator>
+  void insert(InputIterator b, InputIterator e) {
+    this->tree_.insert_multi(b, e);
+  }
+
+  // Deletion routines.
+  int erase(const key_type &key) {
+    return this->tree_.erase_multi(key);
+  }
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(const iterator &iter) {
+    return this->tree_.erase(iter);
+  }
+  void erase(const iterator &first, const iterator &last) {
+    this->tree_.erase(first, last);
+  }
+};
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_CONTAINER_H__
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/btree_map.h b/third-party/xdelta3/xdelta3/cpp-btree/btree_map.h
new file mode 100644
index 0000000000..b83489f07c
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/btree_map.h
@@ -0,0 +1,130 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A btree_map<> implements the STL unique sorted associative container
+// interface and the pair associative container interface (a.k.a map<>) using a
+// btree. A btree_multimap<> implements the STL multiple sorted associative
+// container interface and the pair associtive container interface (a.k.a
+// multimap<>) using a btree. See btree.h for details of the btree
+// implementation and caveats.
+
+#ifndef UTIL_BTREE_BTREE_MAP_H__
+#define UTIL_BTREE_BTREE_MAP_H__
+
+#include <algorithm>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "btree.h"
+#include "btree_container.h"
+
+namespace btree {
+
+// The btree_map class is needed mainly for its constructors.
+template <typename Key, typename Value,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<std::pair<const Key, Value> >,
+          int TargetNodeSize = 256>
+class btree_map : public btree_map_container<
+  btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_map_params<
+    Key, Value, Compare, Alloc, TargetNodeSize> params_type;
+  typedef btree<params_type> btree_type;
+  typedef btree_map_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+
+ public:
+  // Default constructor.
+  btree_map(const key_compare &comp = key_compare(),
+            const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_map(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_map(InputIterator b, InputIterator e,
+            const key_compare &comp = key_compare(),
+            const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename V, typename C, typename A, int N>
+inline void swap(btree_map<K, V, C, A, N> &x,
+                 btree_map<K, V, C, A, N> &y) {
+  x.swap(y);
+}
+
+// The btree_multimap class is needed mainly for its constructors.
+template <typename Key, typename Value,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<std::pair<const Key, Value> >,
+          int TargetNodeSize = 256>
+class btree_multimap : public btree_multi_container<
+  btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef btree_multimap<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_map_params<
+    Key, Value, Compare, Alloc, TargetNodeSize> params_type;
+  typedef btree<params_type> btree_type;
+  typedef btree_multi_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+  typedef typename btree_type::data_type data_type;
+  typedef typename btree_type::mapped_type mapped_type;
+
+ public:
+  // Default constructor.
+  btree_multimap(const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_multimap(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_multimap(InputIterator b, InputIterator e,
+                 const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename V, typename C, typename A, int N>
+inline void swap(btree_multimap<K, V, C, A, N> &x,
+                 btree_multimap<K, V, C, A, N> &y) {
+  x.swap(y);
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_MAP_H__
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/btree_set.h b/third-party/xdelta3/xdelta3/cpp-btree/btree_set.h
new file mode 100644
index 0000000000..f9b2e75d8e
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/btree_set.h
@@ -0,0 +1,121 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A btree_set<> implements the STL unique sorted associative container
+// interface (a.k.a set<>) using a btree. A btree_multiset<> implements the STL
+// multiple sorted associative container interface (a.k.a multiset<>) using a
+// btree. See btree.h for details of the btree implementation and caveats.
+
+#ifndef UTIL_BTREE_BTREE_SET_H__
+#define UTIL_BTREE_BTREE_SET_H__
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "btree.h"
+#include "btree_container.h"
+
+namespace btree {
+
+// The btree_set class is needed mainly for its constructors.
+template <typename Key,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<Key>,
+          int TargetNodeSize = 256>
+class btree_set : public btree_unique_container<
+  btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
+  typedef btree<params_type> btree_type;
+  typedef btree_unique_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+
+ public:
+  // Default constructor.
+  btree_set(const key_compare &comp = key_compare(),
+            const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_set(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_set(InputIterator b, InputIterator e,
+            const key_compare &comp = key_compare(),
+            const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename C, typename A, int N>
+inline void swap(btree_set<K, C, A, N> &x, btree_set<K, C, A, N> &y) {
+  x.swap(y);
+}
+
+// The btree_multiset class is needed mainly for its constructors.
+template <typename Key,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<Key>,
+          int TargetNodeSize = 256>
+class btree_multiset : public btree_multi_container<
+  btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef btree_multiset<Key, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
+  typedef btree<params_type> btree_type;
+  typedef btree_multi_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+
+ public:
+  // Default constructor.
+  btree_multiset(const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  btree_multiset(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  btree_multiset(InputIterator b, InputIterator e,
+                 const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename C, typename A, int N>
+inline void swap(btree_multiset<K, C, A, N> &x,
+                 btree_multiset<K, C, A, N> &y) {
+  x.swap(y);
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_SET_H__
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/btree_test.cc b/third-party/xdelta3/xdelta3/cpp-btree/btree_test.cc
new file mode 100644
index 0000000000..6b1837d334
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/btree_test.cc
@@ -0,0 +1,270 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gtest/gtest.h"
+#include "btree_map.h"
+#include "btree_set.h"
+#include "btree_test.h"
+
+namespace btree {
+namespace {
+
+template <typename K, int N>
+void SetTest() {
+  typedef TestAllocator<K> TestAlloc;
+  ASSERT_EQ(sizeof(btree_set<K>), sizeof(void*));
+  BtreeTest<btree_set<K, std::less<K>, std::allocator<K>, N>, std::set<K> >();
+  BtreeAllocatorTest<btree_set<K, std::less<K>, TestAlloc, N> >();
+}
+
+template <typename K, int N>
+void MapTest() {
+  typedef TestAllocator<K> TestAlloc;
+  ASSERT_EQ(sizeof(btree_map<K, K>), sizeof(void*));
+  BtreeTest<btree_map<K, K, std::less<K>, std::allocator<K>, N>, std::map<K, K> >();
+  BtreeAllocatorTest<btree_map<K, K, std::less<K>, TestAlloc, N> >();
+  BtreeMapTest<btree_map<K, K, std::less<K>, std::allocator<K>, N> >();
+}
+
+TEST(Btree, set_int32_32)   { SetTest<int32_t, 32>(); }
+TEST(Btree, set_int32_64)   { SetTest<int32_t, 64>(); }
+TEST(Btree, set_int32_128)  { SetTest<int32_t, 128>(); }
+TEST(Btree, set_int32_256)  { SetTest<int32_t, 256>(); }
+TEST(Btree, set_int64_256)  { SetTest<int64_t, 256>(); }
+TEST(Btree, set_string_256) { SetTest<std::string, 256>(); }
+TEST(Btree, set_pair_256)   { SetTest<std::pair<int, int>, 256>(); }
+TEST(Btree, map_int32_256)  { MapTest<int32_t, 256>(); }
+TEST(Btree, map_int64_256)  { MapTest<int64_t, 256>(); }
+TEST(Btree, map_string_256) { MapTest<std::string, 256>(); }
+TEST(Btree, map_pair_256)   { MapTest<std::pair<int, int>, 256>(); }
+
+// Large-node tests
+TEST(Btree, map_int32_1024)   { MapTest<int32_t, 1024>(); }
+TEST(Btree, map_int32_1032)   { MapTest<int32_t, 1032>(); }
+TEST(Btree, map_int32_1040)   { MapTest<int32_t, 1040>(); }
+TEST(Btree, map_int32_1048)   { MapTest<int32_t, 1048>(); }
+TEST(Btree, map_int32_1056)   { MapTest<int32_t, 1056>(); }
+
+TEST(Btree, map_int32_2048)   { MapTest<int32_t, 2048>(); }
+TEST(Btree, map_int32_4096)   { MapTest<int32_t, 4096>(); }
+TEST(Btree, set_int32_1024)   { SetTest<int32_t, 1024>(); }
+TEST(Btree, set_int32_2048)   { SetTest<int32_t, 2048>(); }
+TEST(Btree, set_int32_4096)   { SetTest<int32_t, 4096>(); }
+TEST(Btree, map_string_1024)   { MapTest<std::string, 1024>(); }
+TEST(Btree, map_string_2048)   { MapTest<std::string, 2048>(); }
+TEST(Btree, map_string_4096)   { MapTest<std::string, 4096>(); }
+TEST(Btree, set_string_1024)   { SetTest<std::string, 1024>(); }
+TEST(Btree, set_string_2048)   { SetTest<std::string, 2048>(); }
+TEST(Btree, set_string_4096)   { SetTest<std::string, 4096>(); }
+
+template <typename K, int N>
+void MultiSetTest() {
+  typedef TestAllocator<K> TestAlloc;
+  ASSERT_EQ(sizeof(btree_multiset<K>), sizeof(void*));
+  BtreeMultiTest<btree_multiset<K, std::less<K>, std::allocator<K>, N>,
+      std::multiset<K> >();
+  BtreeAllocatorTest<btree_multiset<K, std::less<K>, TestAlloc, N> >();
+}
+
+template <typename K, int N>
+void MultiMapTest() {
+  typedef TestAllocator<K> TestAlloc;
+  ASSERT_EQ(sizeof(btree_multimap<K, K>), sizeof(void*));
+  BtreeMultiTest<btree_multimap<K, K, std::less<K>, std::allocator<K>, N>,
+      std::multimap<K, K> >();
+  BtreeMultiMapTest<btree_multimap<K, K, std::less<K>, std::allocator<K>, N> >();
+  BtreeAllocatorTest<btree_multimap<K, K, std::less<K>, TestAlloc, N> >();
+}
+
+TEST(Btree, multiset_int32_256)  { MultiSetTest<int32_t, 256>(); }
+TEST(Btree, multiset_int64_256)  { MultiSetTest<int64_t, 256>(); }
+TEST(Btree, multiset_string_256) { MultiSetTest<std::string, 256>(); }
+TEST(Btree, multiset_pair_256)   { MultiSetTest<std::pair<int, int>, 256>(); }
+TEST(Btree, multimap_int32_256)  { MultiMapTest<int32_t, 256>(); }
+TEST(Btree, multimap_int64_256)  { MultiMapTest<int64_t, 256>(); }
+TEST(Btree, multimap_string_256) { MultiMapTest<std::string, 256>(); }
+TEST(Btree, multimap_pair_256)   { MultiMapTest<std::pair<int, int>, 256>(); }
+
+// Large-node tests
+TEST(Btree, multimap_int32_1024)   { MultiMapTest<int32_t, 1024>(); }
+TEST(Btree, multimap_int32_2048)   { MultiMapTest<int32_t, 2048>(); }
+TEST(Btree, multimap_int32_4096)   { MultiMapTest<int32_t, 4096>(); }
+TEST(Btree, multiset_int32_1024)   { MultiSetTest<int32_t, 1024>(); }
+TEST(Btree, multiset_int32_2048)   { MultiSetTest<int32_t, 2048>(); }
+TEST(Btree, multiset_int32_4096)   { MultiSetTest<int32_t, 4096>(); }
+TEST(Btree, multimap_string_1024)   { MultiMapTest<std::string, 1024>(); }
+TEST(Btree, multimap_string_2048)   { MultiMapTest<std::string, 2048>(); }
+TEST(Btree, multimap_string_4096)   { MultiMapTest<std::string, 4096>(); }
+TEST(Btree, multiset_string_1024)   { MultiSetTest<std::string, 1024>(); }
+TEST(Btree, multiset_string_2048)   { MultiSetTest<std::string, 2048>(); }
+TEST(Btree, multiset_string_4096)   { MultiSetTest<std::string, 4096>(); }
+
+// Verify that swapping btrees swaps the key comparision functors.
+struct SubstringLess {
+  SubstringLess() : n(2) {}
+  SubstringLess(size_t length)
+      : n(length) {
+  }
+  bool operator()(const std::string &a, const std::string &b) const {
+    std::string as(a.data(), std::min(n, a.size()));
+    std::string bs(b.data(), std::min(n, b.size()));
+    return as < bs;
+  }
+  size_t n;
+};
+
+TEST(Btree, SwapKeyCompare) {
+  typedef btree_set<std::string, SubstringLess> SubstringSet;
+  SubstringSet s1(SubstringLess(1), SubstringSet::allocator_type());
+  SubstringSet s2(SubstringLess(2), SubstringSet::allocator_type());
+
+  ASSERT_TRUE(s1.insert("a").second);
+  ASSERT_FALSE(s1.insert("aa").second);
+
+  ASSERT_TRUE(s2.insert("a").second);
+  ASSERT_TRUE(s2.insert("aa").second);
+  ASSERT_FALSE(s2.insert("aaa").second);
+
+  swap(s1, s2);
+
+  ASSERT_TRUE(s1.insert("b").second);
+  ASSERT_TRUE(s1.insert("bb").second);
+  ASSERT_FALSE(s1.insert("bbb").second);
+
+  ASSERT_TRUE(s2.insert("b").second);
+  ASSERT_FALSE(s2.insert("bb").second);
+}
+
+TEST(Btree, UpperBoundRegression) {
+  // Regress a bug where upper_bound would default-construct a new key_compare
+  // instead of copying the existing one.
+  typedef btree_set<std::string, SubstringLess> SubstringSet;
+  SubstringSet my_set(SubstringLess(3));
+  my_set.insert("aab");
+  my_set.insert("abb");
+  // We call upper_bound("aaa").  If this correctly uses the length 3
+  // comparator, aaa < aab < abb, so we should get aab as the result.
+  // If it instead uses the default-constructed length 2 comparator,
+  // aa == aa < ab, so we'll get abb as our result.
+  SubstringSet::iterator it = my_set.upper_bound("aaa");
+  ASSERT_TRUE(it != my_set.end());
+  EXPECT_EQ("aab", *it);
+}
+
+
+TEST(Btree, IteratorIncrementBy) {
+  // Test that increment_by returns the same position as increment.
+  const int kSetSize = 2341;
+  btree_set<int32_t> my_set;
+  for (int i = 0; i < kSetSize; ++i) {
+    my_set.insert(i);
+  }
+
+  {
+    // Simple increment vs. increment by.
+    btree_set<int32_t>::iterator a = my_set.begin();
+    btree_set<int32_t>::iterator b = my_set.begin();
+    a.increment();
+    b.increment_by(1);
+    EXPECT_EQ(*a, *b);
+  }
+
+  btree_set<int32_t>::iterator a = my_set.begin();
+  for (int i = 1; i < kSetSize; ++i) {
+    ++a;
+    // increment_by
+    btree_set<int32_t>::iterator b = my_set.begin();
+    b.increment_by(i);
+    EXPECT_EQ(*a, *b) << ": i=" << i;
+  }
+}
+
+TEST(Btree, Comparison) {
+  const int kSetSize = 1201;
+  btree_set<int64_t> my_set;
+  for (int i = 0; i < kSetSize; ++i) {
+    my_set.insert(i);
+  }
+  btree_set<int64_t> my_set_copy(my_set);
+  EXPECT_TRUE(my_set_copy == my_set);
+  EXPECT_TRUE(my_set == my_set_copy);
+  EXPECT_FALSE(my_set_copy != my_set);
+  EXPECT_FALSE(my_set != my_set_copy);
+
+  my_set.insert(kSetSize);
+  EXPECT_FALSE(my_set_copy == my_set);
+  EXPECT_FALSE(my_set == my_set_copy);
+  EXPECT_TRUE(my_set_copy != my_set);
+  EXPECT_TRUE(my_set != my_set_copy);
+
+  my_set.erase(kSetSize - 1);
+  EXPECT_FALSE(my_set_copy == my_set);
+  EXPECT_FALSE(my_set == my_set_copy);
+  EXPECT_TRUE(my_set_copy != my_set);
+  EXPECT_TRUE(my_set != my_set_copy);
+
+  btree_map<std::string, int64_t> my_map;
+  for (int i = 0; i < kSetSize; ++i) {
+    my_map[std::string(i, 'a')] = i;
+  }
+  btree_map<std::string, int64_t> my_map_copy(my_map);
+  EXPECT_TRUE(my_map_copy == my_map);
+  EXPECT_TRUE(my_map == my_map_copy);
+  EXPECT_FALSE(my_map_copy != my_map);
+  EXPECT_FALSE(my_map != my_map_copy);
+
+  ++my_map_copy[std::string(7, 'a')];
+  EXPECT_FALSE(my_map_copy == my_map);
+  EXPECT_FALSE(my_map == my_map_copy);
+  EXPECT_TRUE(my_map_copy != my_map);
+  EXPECT_TRUE(my_map != my_map_copy);
+
+  my_map_copy = my_map;
+  my_map["hello"] = kSetSize;
+  EXPECT_FALSE(my_map_copy == my_map);
+  EXPECT_FALSE(my_map == my_map_copy);
+  EXPECT_TRUE(my_map_copy != my_map);
+  EXPECT_TRUE(my_map != my_map_copy);
+
+  my_map.erase(std::string(kSetSize - 1, 'a'));
+  EXPECT_FALSE(my_map_copy == my_map);
+  EXPECT_FALSE(my_map == my_map_copy);
+  EXPECT_TRUE(my_map_copy != my_map);
+  EXPECT_TRUE(my_map != my_map_copy);
+}
+
+TEST(Btree, RangeCtorSanity) {
+  typedef btree_set<int, std::less<int>, std::allocator<int>, 256> test_set;
+  typedef btree_map<int, int, std::less<int>, std::allocator<int>, 256> 
+      test_map;
+  typedef btree_multiset<int, std::less<int>, std::allocator<int>, 256> 
+      test_mset;
+  typedef btree_multimap<int, int, std::less<int>, std::allocator<int>, 256> 
+      test_mmap;
+  std::vector<int> ivec;
+  ivec.push_back(1);
+  std::map<int, int> imap;
+  imap.insert(std::make_pair(1, 2));
+  test_mset tmset(ivec.begin(), ivec.end());
+  test_mmap tmmap(imap.begin(), imap.end());
+  test_set tset(ivec.begin(), ivec.end());
+  test_map tmap(imap.begin(), imap.end());
+  EXPECT_EQ(1, tmset.size());
+  EXPECT_EQ(1, tmmap.size());
+  EXPECT_EQ(1, tset.size());
+  EXPECT_EQ(1, tmap.size());
+}
+
+} // namespace
+} // namespace btree
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/btree_test.h b/third-party/xdelta3/xdelta3/cpp-btree/btree_test.h
new file mode 100644
index 0000000000..413dc3c784
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/btree_test.h
@@ -0,0 +1,940 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef UTIL_BTREE_BTREE_TEST_H__
+#define UTIL_BTREE_BTREE_TEST_H__
+
+#include <stdio.h>
+#include <algorithm>
+#include <functional>
+#include <type_traits>
+#include <iosfwd>
+#include <map>
+#include <set>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "gflags/gflags.h"
+#include "btree_container.h"
+
+DECLARE_int32(test_values);
+DECLARE_int32(benchmark_values);
+
+namespace std {
+
+// Provide operator<< support for std::pair<T, U>.
+template <typename T, typename U>
+ostream& operator<<(ostream &os, const std::pair<T, U> &p) {
+  os << "(" << p.first << "," << p.second << ")";
+  return os;
+}
+
+// Provide pair equality testing that works as long as x.first is comparable to
+// y.first and x.second is comparable to y.second. Needed in the test for
+// comparing std::pair<T, U> to std::pair<const T, U>.
+template <typename T, typename U, typename V, typename W>
+bool operator==(const std::pair<T, U> &x, const std::pair<V, W> &y) {
+  return x.first == y.first && x.second == y.second;
+}
+
+// Partial specialization of remove_const that propagates the removal through
+// std::pair.
+template <typename T, typename U>
+struct remove_const<pair<T, U> > {
+  typedef pair<typename remove_const<T>::type,
+               typename remove_const<U>::type> type;
+};
+
+} // namespace std
+
+namespace btree {
+
+// Select the first member of a pair.
+template <class _Pair>
+struct select1st : public std::unary_function<_Pair, typename _Pair::first_type> {
+  const typename _Pair::first_type& operator()(const _Pair& __x) const {
+    return __x.first;
+  }
+};
+
+// Utility class to provide an accessor for a key given a value. The default
+// behavior is to treat the value as a pair and return the first element.
+template <typename K, typename V>
+struct KeyOfValue {
+  typedef select1st<V> type;
+};
+
+template <typename T>
+struct identity {
+  inline const T& operator()(const T& t) const { return t; }
+};
+
+// Partial specialization of KeyOfValue class for when the key and value are
+// the same type such as in set<> and btree_set<>.
+template <typename K>
+struct KeyOfValue<K, K> {
+  typedef identity<K> type;
+};
+
+// Counts the number of occurances of "c" in a buffer.
+inline ptrdiff_t strcount(const char* buf_begin, const char* buf_end, char c) {
+  if (buf_begin == NULL)
+    return 0;
+  if (buf_end <= buf_begin)
+    return 0;
+  ptrdiff_t num = 0;
+  for (const char* bp = buf_begin; bp != buf_end; bp++) {
+    if (*bp == c)
+      num++;
+  }
+  return num;
+}
+
+// for when the string is not null-terminated.
+inline ptrdiff_t strcount(const char* buf, size_t len, char c) {
+  return strcount(buf, buf + len, c);
+}
+
+inline ptrdiff_t strcount(const std::string& buf, char c) {
+  return strcount(buf.c_str(), buf.size(), c);
+}
+
+// The base class for a sorted associative container checker. TreeType is the
+// container type to check and CheckerType is the container type to check
+// against. TreeType is expected to be btree_{set,map,multiset,multimap} and
+// CheckerType is expected to be {set,map,multiset,multimap}.
+template <typename TreeType, typename CheckerType>
+class base_checker {
+  typedef base_checker<TreeType, CheckerType> self_type;
+
+ public:
+  typedef typename TreeType::key_type key_type;
+  typedef typename TreeType::value_type value_type;
+  typedef typename TreeType::key_compare key_compare;
+  typedef typename TreeType::pointer pointer;
+  typedef typename TreeType::const_pointer const_pointer;
+  typedef typename TreeType::reference reference;
+  typedef typename TreeType::const_reference const_reference;
+  typedef typename TreeType::size_type size_type;
+  typedef typename TreeType::difference_type difference_type;
+  typedef typename TreeType::iterator iterator;
+  typedef typename TreeType::const_iterator const_iterator;
+  typedef typename TreeType::reverse_iterator reverse_iterator;
+  typedef typename TreeType::const_reverse_iterator const_reverse_iterator;
+
+ public:
+  // Default constructor.
+  base_checker()
+      : const_tree_(tree_) {
+  }
+  // Copy constructor.
+  base_checker(const self_type &x)
+      : tree_(x.tree_),
+        const_tree_(tree_),
+        checker_(x.checker_) {
+  }
+  // Range constructor.
+  template <typename InputIterator>
+  base_checker(InputIterator b, InputIterator e)
+      : tree_(b, e),
+        const_tree_(tree_),
+        checker_(b, e) {
+  }
+
+  // Iterator routines.
+  iterator begin() { return tree_.begin(); }
+  const_iterator begin() const { return tree_.begin(); }
+  iterator end() { return tree_.end(); }
+  const_iterator end() const { return tree_.end(); }
+  reverse_iterator rbegin() { return tree_.rbegin(); }
+  const_reverse_iterator rbegin() const { return tree_.rbegin(); }
+  reverse_iterator rend() { return tree_.rend(); }
+  const_reverse_iterator rend() const { return tree_.rend(); }
+
+  // Helper routines.
+  template <typename IterType, typename CheckerIterType>
+  IterType iter_check(
+      IterType tree_iter, CheckerIterType checker_iter) const {
+    if (tree_iter == tree_.end()) {
+      EXPECT_EQ(checker_iter, checker_.end());
+    } else {
+      EXPECT_EQ(*tree_iter, *checker_iter);
+    }
+    return tree_iter;
+  }
+  template <typename IterType, typename CheckerIterType>
+  IterType riter_check(
+      IterType tree_iter, CheckerIterType checker_iter) const {
+    if (tree_iter == tree_.rend()) {
+      EXPECT_EQ(checker_iter, checker_.rend());
+    } else {
+      EXPECT_EQ(*tree_iter, *checker_iter);
+    }
+    return tree_iter;
+  }
+  void value_check(const value_type &x) {
+    typename KeyOfValue<typename TreeType::key_type,
+        typename TreeType::value_type>::type key_of_value;
+    const key_type &key = key_of_value(x);
+    EXPECT_EQ(*find(key), x);
+    lower_bound(key);
+    upper_bound(key);
+    equal_range(key);
+    count(key);
+  }
+  void erase_check(const key_type &key) {
+    EXPECT_TRUE(tree_.find(key) == const_tree_.end());
+    EXPECT_TRUE(const_tree_.find(key) == tree_.end());
+    EXPECT_TRUE(tree_.equal_range(key).first ==
+                const_tree_.equal_range(key).second);
+  }
+
+  // Lookup routines.
+  iterator lower_bound(const key_type &key) {
+    return iter_check(tree_.lower_bound(key), checker_.lower_bound(key));
+  }
+  const_iterator lower_bound(const key_type &key) const {
+    return iter_check(tree_.lower_bound(key), checker_.lower_bound(key));
+  }
+  iterator upper_bound(const key_type &key) {
+    return iter_check(tree_.upper_bound(key), checker_.upper_bound(key));
+  }
+  const_iterator upper_bound(const key_type &key) const {
+    return iter_check(tree_.upper_bound(key), checker_.upper_bound(key));
+  }
+  std::pair<iterator,iterator> equal_range(const key_type &key) {
+    std::pair<typename CheckerType::iterator,
+        typename CheckerType::iterator> checker_res =
+        checker_.equal_range(key);
+    std::pair<iterator, iterator> tree_res = tree_.equal_range(key);
+    iter_check(tree_res.first, checker_res.first);
+    iter_check(tree_res.second, checker_res.second);
+    return tree_res;
+  }
+  std::pair<const_iterator,const_iterator> equal_range(const key_type &key) const {
+    std::pair<typename CheckerType::const_iterator,
+        typename CheckerType::const_iterator> checker_res =
+        checker_.equal_range(key);
+    std::pair<const_iterator, const_iterator> tree_res = tree_.equal_range(key);
+    iter_check(tree_res.first, checker_res.first);
+    iter_check(tree_res.second, checker_res.second);
+    return tree_res;
+  }
+  iterator find(const key_type &key) {
+    return iter_check(tree_.find(key), checker_.find(key));
+  }
+  const_iterator find(const key_type &key) const {
+    return iter_check(tree_.find(key), checker_.find(key));
+  }
+  size_type count(const key_type &key) const {
+    size_type res = checker_.count(key);
+    EXPECT_EQ(res, tree_.count(key));
+    return res;
+  }
+
+  // Assignment operator.
+  self_type& operator=(const self_type &x) {
+    tree_ = x.tree_;
+    checker_ = x.checker_;
+    return *this;
+  }
+
+  // Deletion routines.
+  int erase(const key_type &key) {
+    int size = tree_.size();
+    int res = checker_.erase(key);
+    EXPECT_EQ(res, tree_.count(key));
+    EXPECT_EQ(res, tree_.erase(key));
+    EXPECT_EQ(tree_.count(key), 0);
+    EXPECT_EQ(tree_.size(), size - res);
+    erase_check(key);
+    return res;
+  }
+  iterator erase(iterator iter) {
+    key_type key = iter.key();
+    int size = tree_.size();
+    int count = tree_.count(key);
+    typename CheckerType::iterator checker_iter = checker_.find(key);
+    for (iterator tmp(tree_.find(key)); tmp != iter; ++tmp) {
+      ++checker_iter;
+    }
+    typename CheckerType::iterator checker_next = checker_iter;
+    ++checker_next;
+    checker_.erase(checker_iter);
+    iter = tree_.erase(iter);
+    EXPECT_EQ(tree_.size(), checker_.size());
+    EXPECT_EQ(tree_.size(), size - 1);
+    EXPECT_EQ(tree_.count(key), count - 1);
+    if (count == 1) {
+      erase_check(key);
+    }
+    return iter_check(iter, checker_next);
+  }
+
+  void erase(iterator begin, iterator end) {
+    int size = tree_.size();
+    int count = distance(begin, end);
+    typename CheckerType::iterator checker_begin = checker_.find(begin.key());
+    for (iterator tmp(tree_.find(begin.key())); tmp != begin; ++tmp) {
+      ++checker_begin;
+    }
+    typename CheckerType::iterator checker_end =
+        end == tree_.end() ? checker_.end() : checker_.find(end.key());
+    if (end != tree_.end()) {
+      for (iterator tmp(tree_.find(end.key())); tmp != end; ++tmp) {
+        ++checker_end;
+      }
+    }
+    checker_.erase(checker_begin, checker_end);
+    tree_.erase(begin, end);
+    EXPECT_EQ(tree_.size(), checker_.size());
+    EXPECT_EQ(tree_.size(), size - count);
+  }
+
+  // Utility routines.
+  void clear() {
+    tree_.clear();
+    checker_.clear();
+  }
+  void swap(self_type &x) {
+    tree_.swap(x.tree_);
+    checker_.swap(x.checker_);
+  }
+
+  void verify() const {
+    tree_.verify();
+    EXPECT_EQ(tree_.size(), checker_.size());
+
+    // Move through the forward iterators using increment.
+    typename CheckerType::const_iterator
+        checker_iter(checker_.begin());
+    const_iterator tree_iter(tree_.begin());
+    for (; tree_iter != tree_.end();
+         ++tree_iter, ++checker_iter) {
+      EXPECT_EQ(*tree_iter, *checker_iter);
+    }
+
+    // Move through the forward iterators using decrement.
+    for (int n = tree_.size() - 1; n >= 0; --n) {
+      iter_check(tree_iter, checker_iter);
+      --tree_iter;
+      --checker_iter;
+    }
+    EXPECT_TRUE(tree_iter == tree_.begin());
+    EXPECT_TRUE(checker_iter == checker_.begin());
+
+    // Move through the reverse iterators using increment.
+    typename CheckerType::const_reverse_iterator
+        checker_riter(checker_.rbegin());
+    const_reverse_iterator tree_riter(tree_.rbegin());
+    for (; tree_riter != tree_.rend();
+         ++tree_riter, ++checker_riter) {
+      EXPECT_EQ(*tree_riter, *checker_riter);
+    }
+
+    // Move through the reverse iterators using decrement.
+    for (int n = tree_.size() - 1; n >= 0; --n) {
+      riter_check(tree_riter, checker_riter);
+      --tree_riter;
+      --checker_riter;
+    }
+    EXPECT_EQ(tree_riter, tree_.rbegin());
+    EXPECT_EQ(checker_riter, checker_.rbegin());
+  }
+
+  // Access to the underlying btree.
+  const TreeType& tree() const { return tree_; }
+
+  // Size routines.
+  size_type size() const {
+    EXPECT_EQ(tree_.size(), checker_.size());
+    return tree_.size();
+  }
+  size_type max_size() const { return tree_.max_size(); }
+  bool empty() const {
+    EXPECT_EQ(tree_.empty(), checker_.empty());
+    return tree_.empty();
+  }
+  size_type height() const { return tree_.height(); }
+  size_type internal_nodes() const { return tree_.internal_nodes(); }
+  size_type leaf_nodes() const { return tree_.leaf_nodes(); }
+  size_type nodes() const { return tree_.nodes(); }
+  size_type bytes_used() const { return tree_.bytes_used(); }
+  double fullness() const { return tree_.fullness(); }
+  double overhead() const { return tree_.overhead(); }
+
+ protected:
+  TreeType tree_;
+  const TreeType &const_tree_;
+  CheckerType checker_;
+};
+
+// A checker for unique sorted associative containers. TreeType is expected to
+// be btree_{set,map} and CheckerType is expected to be {set,map}.
+template <typename TreeType, typename CheckerType>
+class unique_checker : public base_checker<TreeType, CheckerType> {
+  typedef base_checker<TreeType, CheckerType> super_type;
+  typedef unique_checker<TreeType, CheckerType> self_type;
+
+ public:
+  typedef typename super_type::iterator iterator;
+  typedef typename super_type::value_type value_type;
+
+ public:
+  // Default constructor.
+  unique_checker()
+      : super_type() {
+  }
+  // Copy constructor.
+  unique_checker(const self_type &x)
+      : super_type(x) {
+  }
+  // Range constructor.
+  template <class InputIterator>
+  unique_checker(InputIterator b, InputIterator e)
+      : super_type(b, e) {
+  }
+
+  // Insertion routines.
+  std::pair<iterator,bool> insert(const value_type &x) {
+    int size = this->tree_.size();
+    std::pair<typename CheckerType::iterator,bool> checker_res =
+        this->checker_.insert(x);
+    std::pair<iterator,bool> tree_res = this->tree_.insert(x);
+    EXPECT_EQ(*tree_res.first, *checker_res.first);
+    EXPECT_EQ(tree_res.second, checker_res.second);
+    EXPECT_EQ(this->tree_.size(), this->checker_.size());
+    EXPECT_EQ(this->tree_.size(), size + tree_res.second);
+    return tree_res;
+  }
+  iterator insert(iterator position, const value_type &x) {
+    int size = this->tree_.size();
+    std::pair<typename CheckerType::iterator,bool> checker_res =
+        this->checker_.insert(x);
+    iterator tree_res = this->tree_.insert(position, x);
+    EXPECT_EQ(*tree_res, *checker_res.first);
+    EXPECT_EQ(this->tree_.size(), this->checker_.size());
+    EXPECT_EQ(this->tree_.size(), size + checker_res.second);
+    return tree_res;
+  }
+  template <typename InputIterator>
+  void insert(InputIterator b, InputIterator e) {
+    for (; b != e; ++b) {
+      insert(*b);
+    }
+  }
+};
+
+// A checker for multiple sorted associative containers. TreeType is expected
+// to be btree_{multiset,multimap} and CheckerType is expected to be
+// {multiset,multimap}.
+template <typename TreeType, typename CheckerType>
+class multi_checker : public base_checker<TreeType, CheckerType> {
+  typedef base_checker<TreeType, CheckerType> super_type;
+  typedef multi_checker<TreeType, CheckerType> self_type;
+
+ public:
+  typedef typename super_type::iterator iterator;
+  typedef typename super_type::value_type value_type;
+
+ public:
+  // Default constructor.
+  multi_checker()
+      : super_type() {
+  }
+  // Copy constructor.
+  multi_checker(const self_type &x)
+      : super_type(x) {
+  }
+  // Range constructor.
+  template <class InputIterator>
+  multi_checker(InputIterator b, InputIterator e)
+      : super_type(b, e) {
+  }
+
+  // Insertion routines.
+  iterator insert(const value_type &x) {
+    int size = this->tree_.size();
+    typename CheckerType::iterator checker_res = this->checker_.insert(x);
+    iterator tree_res = this->tree_.insert(x);
+    EXPECT_EQ(*tree_res, *checker_res);
+    EXPECT_EQ(this->tree_.size(), this->checker_.size());
+    EXPECT_EQ(this->tree_.size(), size + 1);
+    return tree_res;
+  }
+  iterator insert(iterator position, const value_type &x) {
+    int size = this->tree_.size();
+    typename CheckerType::iterator checker_res = this->checker_.insert(x);
+    iterator tree_res = this->tree_.insert(position, x);
+    EXPECT_EQ(*tree_res, *checker_res);
+    EXPECT_EQ(this->tree_.size(), this->checker_.size());
+    EXPECT_EQ(this->tree_.size(), size + 1);
+    return tree_res;
+  }
+  template <typename InputIterator>
+  void insert(InputIterator b, InputIterator e) {
+    for (; b != e; ++b) {
+      insert(*b);
+    }
+  }
+};
+
+char* GenerateDigits(char buf[16], int val, int maxval) {
+  EXPECT_LE(val, maxval);
+  int p = 15;
+  buf[p--] = 0;
+  while (maxval > 0) {
+    buf[p--] = '0' + (val % 10);
+    val /= 10;
+    maxval /= 10;
+  }
+  return buf + p + 1;
+}
+
+template <typename K>
+struct Generator {
+  int maxval;
+  Generator(int m)
+      : maxval(m) {
+  }
+  K operator()(int i) const {
+    EXPECT_LE(i, maxval);
+    return i;
+  }
+};
+
+template <>
+struct Generator<std::string> {
+  int maxval;
+  Generator(int m)
+      : maxval(m) {
+  }
+  std::string operator()(int i) const {
+    char buf[16];
+    return GenerateDigits(buf, i, maxval);
+  }
+};
+
+template <typename T, typename U>
+struct Generator<std::pair<T, U> > {
+  Generator<typename std::remove_const<T>::type> tgen;
+  Generator<typename std::remove_const<U>::type> ugen;
+
+  Generator(int m)
+      : tgen(m),
+        ugen(m) {
+  }
+  std::pair<T, U> operator()(int i) const {
+    return std::make_pair(tgen(i), ugen(i));
+  }
+};
+
+// Generate values for our tests and benchmarks. Value range is [0, maxval].
+const std::vector<int>& GenerateNumbers(int n, int maxval) {
+  static std::vector<int> values;
+  static std::set<int> unique_values;
+
+  if (values.size() < n) {
+
+    for (int i = values.size(); i < n; i++) {
+      int value;
+      do {
+        value = rand() % (maxval + 1);
+      } while (unique_values.find(value) != unique_values.end());
+
+      values.push_back(value);
+      unique_values.insert(value);
+    }
+  }
+
+  return values;
+}
+
+// Generates values in the range
+// [0, 4 * min(FLAGS_benchmark_values, FLAGS_test_values)]
+template <typename V>
+std::vector<V> GenerateValues(int n) {
+  int two_times_max = 2 * std::max(FLAGS_benchmark_values, FLAGS_test_values);
+  int four_times_max = 2 * two_times_max;
+  EXPECT_LE(n, two_times_max);
+  const std::vector<int> &nums = GenerateNumbers(n, four_times_max);
+  Generator<V> gen(four_times_max);
+  std::vector<V> vec;
+
+  for (int i = 0; i < n; i++) {
+    vec.push_back(gen(nums[i]));
+  }
+
+  return vec;
+}
+
+template <typename T, typename V>
+void DoTest(const char *name, T *b, const std::vector<V> &values) {
+  typename KeyOfValue<typename T::key_type, V>::type key_of_value;
+
+  T &mutable_b = *b;
+  const T &const_b = *b;
+
+  // Test insert.
+  for (int i = 0; i < values.size(); ++i) {
+    mutable_b.insert(values[i]);
+    mutable_b.value_check(values[i]);
+  }
+  assert(mutable_b.size() == values.size());
+
+  const_b.verify();
+  printf("    %s fullness=%0.2f  overhead=%0.2f  bytes-per-value=%0.2f\n",
+         name, const_b.fullness(), const_b.overhead(),
+         double(const_b.bytes_used()) / const_b.size());
+
+  // Test copy constructor.
+  T b_copy(const_b);
+  EXPECT_EQ(b_copy.size(), const_b.size());
+  EXPECT_LE(b_copy.height(), const_b.height());
+  EXPECT_LE(b_copy.internal_nodes(), const_b.internal_nodes());
+  EXPECT_LE(b_copy.leaf_nodes(), const_b.leaf_nodes());
+  for (int i = 0; i < values.size(); ++i) {
+    EXPECT_EQ(*b_copy.find(key_of_value(values[i])), values[i]);
+  }
+
+  // Test range constructor.
+  T b_range(const_b.begin(), const_b.end());
+  EXPECT_EQ(b_range.size(), const_b.size());
+  EXPECT_LE(b_range.height(), const_b.height());
+  EXPECT_LE(b_range.internal_nodes(), const_b.internal_nodes());
+  EXPECT_LE(b_range.leaf_nodes(), const_b.leaf_nodes());
+  for (int i = 0; i < values.size(); ++i) {
+    EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]);
+  }
+
+  // Test range insertion for values that already exist.
+  b_range.insert(b_copy.begin(), b_copy.end());
+  b_range.verify();
+
+  // Test range insertion for new values.
+  b_range.clear();
+  b_range.insert(b_copy.begin(), b_copy.end());
+  EXPECT_EQ(b_range.size(), b_copy.size());
+  EXPECT_EQ(b_range.height(), b_copy.height());
+  EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes());
+  EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes());
+  for (int i = 0; i < values.size(); ++i) {
+    EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]);
+  }
+
+  // Test assignment to self. Nothing should change.
+  b_range.operator=(b_range);
+  EXPECT_EQ(b_range.size(), b_copy.size());
+  EXPECT_EQ(b_range.height(), b_copy.height());
+  EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes());
+  EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes());
+
+  // Test assignment of new values.
+  b_range.clear();
+  b_range = b_copy;
+  EXPECT_EQ(b_range.size(), b_copy.size());
+  EXPECT_EQ(b_range.height(), b_copy.height());
+  EXPECT_EQ(b_range.internal_nodes(), b_copy.internal_nodes());
+  EXPECT_EQ(b_range.leaf_nodes(), b_copy.leaf_nodes());
+
+  // Test swap.
+  b_range.clear();
+  b_range.swap(b_copy);
+  EXPECT_EQ(b_copy.size(), 0);
+  EXPECT_EQ(b_range.size(), const_b.size());
+  for (int i = 0; i < values.size(); ++i) {
+    EXPECT_EQ(*b_range.find(key_of_value(values[i])), values[i]);
+  }
+  b_range.swap(b_copy);
+
+  // Test erase via values.
+  for (int i = 0; i < values.size(); ++i) {
+    mutable_b.erase(key_of_value(values[i]));
+    // Erasing a non-existent key should have no effect.
+    EXPECT_EQ(mutable_b.erase(key_of_value(values[i])), 0);
+  }
+
+  const_b.verify();
+  EXPECT_EQ(const_b.internal_nodes(), 0);
+  EXPECT_EQ(const_b.leaf_nodes(), 0);
+  EXPECT_EQ(const_b.size(), 0);
+
+  // Test erase via iterators.
+  mutable_b = b_copy;
+  for (int i = 0; i < values.size(); ++i) {
+    mutable_b.erase(mutable_b.find(key_of_value(values[i])));
+  }
+
+  const_b.verify();
+  EXPECT_EQ(const_b.internal_nodes(), 0);
+  EXPECT_EQ(const_b.leaf_nodes(), 0);
+  EXPECT_EQ(const_b.size(), 0);
+
+  // Test insert with hint.
+  for (int i = 0; i < values.size(); i++) {
+    mutable_b.insert(mutable_b.upper_bound(key_of_value(values[i])), values[i]);
+  }
+
+  const_b.verify();
+
+  // Test dumping of the btree to an ostream. There should be 1 line for each
+  // value.
+  std::stringstream strm;
+  strm << mutable_b.tree();
+  EXPECT_EQ(mutable_b.size(), strcount(strm.str(), '\n'));
+
+  // Test range erase.
+  mutable_b.erase(mutable_b.begin(), mutable_b.end());
+  EXPECT_EQ(mutable_b.size(), 0);
+  const_b.verify();
+
+  // First half.
+  mutable_b = b_copy;
+  typename T::iterator mutable_iter_end = mutable_b.begin();
+  for (int i = 0; i < values.size() / 2; ++i) ++mutable_iter_end;
+  mutable_b.erase(mutable_b.begin(), mutable_iter_end);
+  EXPECT_EQ(mutable_b.size(), values.size() - values.size() / 2);
+  const_b.verify();
+
+  // Second half.
+  mutable_b = b_copy;
+  typename T::iterator mutable_iter_begin = mutable_b.begin();
+  for (int i = 0; i < values.size() / 2; ++i) ++mutable_iter_begin;
+  mutable_b.erase(mutable_iter_begin, mutable_b.end());
+  EXPECT_EQ(mutable_b.size(), values.size() / 2);
+  const_b.verify();
+
+  // Second quarter.
+  mutable_b = b_copy;
+  mutable_iter_begin = mutable_b.begin();
+  for (int i = 0; i < values.size() / 4; ++i) ++mutable_iter_begin;
+  mutable_iter_end = mutable_iter_begin;
+  for (int i = 0; i < values.size() / 4; ++i) ++mutable_iter_end;
+  mutable_b.erase(mutable_iter_begin, mutable_iter_end);
+  EXPECT_EQ(mutable_b.size(), values.size() - values.size() / 4);
+  const_b.verify();
+
+  mutable_b.clear();
+}
+
+template <typename T>
+void ConstTest() {
+  typedef typename T::value_type value_type;
+  typename KeyOfValue<typename T::key_type, value_type>::type key_of_value;
+
+  T mutable_b;
+  const T &const_b = mutable_b;
+
+  // Insert a single value into the container and test looking it up.
+  value_type value = Generator<value_type>(2)(2);
+  mutable_b.insert(value);
+  EXPECT_TRUE(mutable_b.find(key_of_value(value)) != const_b.end());
+  EXPECT_TRUE(const_b.find(key_of_value(value)) != mutable_b.end());
+  EXPECT_EQ(*const_b.lower_bound(key_of_value(value)), value);
+  EXPECT_TRUE(const_b.upper_bound(key_of_value(value)) == const_b.end());
+  EXPECT_EQ(*const_b.equal_range(key_of_value(value)).first, value);
+
+  // We can only create a non-const iterator from a non-const container.
+  typename T::iterator mutable_iter(mutable_b.begin());
+  EXPECT_TRUE(mutable_iter == const_b.begin());
+  EXPECT_TRUE(mutable_iter != const_b.end());
+  EXPECT_TRUE(const_b.begin() == mutable_iter);
+  EXPECT_TRUE(const_b.end() != mutable_iter);
+  typename T::reverse_iterator mutable_riter(mutable_b.rbegin());
+  EXPECT_TRUE(mutable_riter == const_b.rbegin());
+  EXPECT_TRUE(mutable_riter != const_b.rend());
+  EXPECT_TRUE(const_b.rbegin() == mutable_riter);
+  EXPECT_TRUE(const_b.rend() != mutable_riter);
+
+  // We can create a const iterator from a non-const iterator.
+  typename T::const_iterator const_iter(mutable_iter);
+  EXPECT_TRUE(const_iter == mutable_b.begin());
+  EXPECT_TRUE(const_iter != mutable_b.end());
+  EXPECT_TRUE(mutable_b.begin() == const_iter);
+  EXPECT_TRUE(mutable_b.end() != const_iter);
+  typename T::const_reverse_iterator const_riter(mutable_riter);
+  EXPECT_EQ(const_riter, mutable_b.rbegin());
+  EXPECT_TRUE(const_riter != mutable_b.rend());
+  EXPECT_EQ(mutable_b.rbegin(), const_riter);
+  EXPECT_TRUE(mutable_b.rend() != const_riter);
+
+  // Make sure various methods can be invoked on a const container.
+  const_b.verify();
+  EXPECT_FALSE(const_b.empty());
+  EXPECT_EQ(const_b.size(), 1);
+  EXPECT_GT(const_b.max_size(), 0);
+  EXPECT_EQ(const_b.height(), 1);
+  EXPECT_EQ(const_b.count(key_of_value(value)), 1);
+  EXPECT_EQ(const_b.internal_nodes(), 0);
+  EXPECT_EQ(const_b.leaf_nodes(), 1);
+  EXPECT_EQ(const_b.nodes(), 1);
+  EXPECT_GT(const_b.bytes_used(), 0);
+  EXPECT_GT(const_b.fullness(), 0);
+  EXPECT_GT(const_b.overhead(), 0);
+}
+
+template <typename T, typename C>
+void BtreeTest() {
+  ConstTest<T>();
+
+  typedef typename std::remove_const<typename T::value_type>::type V;
+  std::vector<V> random_values = GenerateValues<V>(FLAGS_test_values);
+
+  unique_checker<T, C> container;
+
+  // Test key insertion/deletion in sorted order.
+  std::vector<V> sorted_values(random_values);
+  sort(sorted_values.begin(), sorted_values.end());
+  DoTest("sorted:    ", &container, sorted_values);
+
+  // Test key insertion/deletion in reverse sorted order.
+  reverse(sorted_values.begin(), sorted_values.end());
+  DoTest("rsorted:   ", &container, sorted_values);
+
+  // Test key insertion/deletion in random order.
+  DoTest("random:    ", &container, random_values);
+}
+
+template <typename T, typename C>
+void BtreeMultiTest() {
+  ConstTest<T>();
+
+  typedef typename std::remove_const<typename T::value_type>::type V;
+  const std::vector<V>& random_values = GenerateValues<V>(FLAGS_test_values);
+
+  multi_checker<T, C> container;
+
+  // Test keys in sorted order.
+  std::vector<V> sorted_values(random_values);
+  sort(sorted_values.begin(), sorted_values.end());
+  DoTest("sorted:    ", &container, sorted_values);
+
+  // Test keys in reverse sorted order.
+  reverse(sorted_values.begin(), sorted_values.end());
+  DoTest("rsorted:   ", &container, sorted_values);
+
+  // Test keys in random order.
+  DoTest("random:    ", &container, random_values);
+
+  // Test keys in random order w/ duplicates.
+  std::vector<V> duplicate_values(random_values);
+  duplicate_values.insert(
+      duplicate_values.end(), random_values.begin(), random_values.end());
+  DoTest("duplicates:", &container, duplicate_values);
+
+  // Test all identical keys.
+  std::vector<V> identical_values(100);
+  fill(identical_values.begin(), identical_values.end(), Generator<V>(2)(2));
+  DoTest("identical: ", &container, identical_values);
+}
+
+template <typename T, typename Alloc = std::allocator<T> >
+class TestAllocator : public Alloc {
+ public:
+  typedef typename Alloc::pointer pointer;
+  typedef typename Alloc::size_type size_type;
+
+  TestAllocator() : bytes_used_(NULL) { }
+  TestAllocator(int64_t *bytes_used) : bytes_used_(bytes_used) { }
+
+  // Constructor used for rebinding
+  template <class U>
+  TestAllocator(const TestAllocator<U>& x)
+      : Alloc(x),
+        bytes_used_(x.bytes_used()) {
+  }
+
+  pointer allocate(size_type n, std::allocator<void>::const_pointer hint = 0) {
+    EXPECT_TRUE(bytes_used_ != NULL);
+    *bytes_used_ += n * sizeof(T);
+    return Alloc::allocate(n, hint);
+  }
+
+  void deallocate(pointer p, size_type n) {
+    Alloc::deallocate(p, n);
+    EXPECT_TRUE(bytes_used_ != NULL);
+    *bytes_used_ -= n * sizeof(T);
+  }
+
+  // Rebind allows an allocator<T> to be used for a different type
+  template <class U> struct rebind {
+    typedef TestAllocator<U, typename Alloc::template rebind<U>::other> other;
+  };
+
+  int64_t* bytes_used() const { return bytes_used_; }
+
+ private:
+  int64_t *bytes_used_;
+};
+
+template <typename T>
+void BtreeAllocatorTest() {
+  typedef typename T::value_type value_type;
+
+  int64_t alloc1 = 0;
+  int64_t alloc2 = 0;
+  T b1(typename T::key_compare(), &alloc1);
+  T b2(typename T::key_compare(), &alloc2);
+
+  // This should swap the allocators!
+  swap(b1, b2);
+
+  for (int i = 0; i < 1000; i++) {
+    b1.insert(Generator<value_type>(1000)(i));
+  }
+
+  // We should have allocated out of alloc2!
+  EXPECT_LE(b1.bytes_used(), alloc2 + sizeof(b1));
+  EXPECT_GT(alloc2, alloc1);
+}
+
+template <typename T>
+void BtreeMapTest() {
+  typedef typename T::value_type value_type;
+  typedef typename T::mapped_type mapped_type;
+
+  mapped_type m = Generator<mapped_type>(0)(0);
+  (void) m;
+
+  T b;
+
+  // Verify we can insert using operator[].
+  for (int i = 0; i < 1000; i++) {
+    value_type v = Generator<value_type>(1000)(i);
+    b[v.first] = v.second;
+  }
+  EXPECT_EQ(b.size(), 1000);
+
+  // Test whether we can use the "->" operator on iterators and
+  // reverse_iterators. This stresses the btree_map_params::pair_pointer
+  // mechanism.
+  EXPECT_EQ(b.begin()->first, Generator<value_type>(1000)(0).first);
+  EXPECT_EQ(b.begin()->second, Generator<value_type>(1000)(0).second);
+  EXPECT_EQ(b.rbegin()->first, Generator<value_type>(1000)(999).first);
+  EXPECT_EQ(b.rbegin()->second, Generator<value_type>(1000)(999).second);
+}
+
+template <typename T>
+void BtreeMultiMapTest() {
+  typedef typename T::mapped_type mapped_type;
+  mapped_type m = Generator<mapped_type>(0)(0);
+  (void) m;
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_BTREE_TEST_H__
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/btree_test_flags.cc b/third-party/xdelta3/xdelta3/cpp-btree/btree_test_flags.cc
new file mode 100644
index 0000000000..bf608a9b28
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/btree_test_flags.cc
@@ -0,0 +1,20 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gflags/gflags.h"
+
+DEFINE_int32(test_values, 10000,
+             "The number of values to use for tests.");
+DEFINE_int32(benchmark_values, 1000000,
+             "The number of values to use for benchmarks.");
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/safe_btree.h b/third-party/xdelta3/xdelta3/cpp-btree/safe_btree.h
new file mode 100644
index 0000000000..2d85c70be8
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/safe_btree.h
@@ -0,0 +1,395 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A safe_btree<> wraps around a btree<> and removes the caveat that insertion
+// and deletion invalidate iterators. A safe_btree<> maintains a generation
+// number that is incremented on every mutation. A safe_btree<>::iterator keeps
+// a pointer to the safe_btree<> it came from, the generation of the tree when
+// it was last validated and the key the underlying btree<>::iterator points
+// to. If an iterator is accessed and its generation differs from the tree
+// generation it is revalidated.
+//
+// References and pointers returned by safe_btree iterators are not safe.
+//
+// See the incorrect usage examples mentioned in safe_btree_set.h and
+// safe_btree_map.h.
+
+#ifndef UTIL_BTREE_SAFE_BTREE_H__
+#define UTIL_BTREE_SAFE_BTREE_H__
+
+#include <stddef.h>
+#include <iosfwd>
+#include <utility>
+
+#include "btree.h"
+
+namespace btree {
+
+template <typename Tree, typename Iterator>
+class safe_btree_iterator {
+ public:
+  typedef typename Iterator::key_type key_type;
+  typedef typename Iterator::value_type value_type;
+  typedef typename Iterator::size_type size_type;
+  typedef typename Iterator::difference_type difference_type;
+  typedef typename Iterator::pointer pointer;
+  typedef typename Iterator::reference reference;
+  typedef typename Iterator::const_pointer const_pointer;
+  typedef typename Iterator::const_reference const_reference;
+  typedef typename Iterator::iterator_category iterator_category;
+  typedef typename Tree::iterator iterator;
+  typedef typename Tree::const_iterator const_iterator;
+  typedef safe_btree_iterator<Tree, Iterator> self_type;
+
+  void update() const {
+    if (iter_ != tree_->internal_btree()->end()) {
+      // A positive generation indicates a valid key.
+      generation_ = tree_->generation();
+      key_ = iter_.key();
+    } else {
+      // Use a negative generation to indicate iter_ points to end().
+      generation_ = -tree_->generation();
+    }
+  }
+
+ public:
+  safe_btree_iterator()
+      : generation_(0),
+        key_(),
+        iter_(),
+        tree_(NULL) {
+  }
+  safe_btree_iterator(const iterator &x)
+      : generation_(x.generation()),
+        key_(x.key()),
+        iter_(x.iter()),
+        tree_(x.tree()) {
+  }
+  safe_btree_iterator(Tree *tree, const Iterator &iter)
+      : generation_(),
+        key_(),
+        iter_(iter),
+        tree_(tree) {
+    update();
+  }
+
+  Tree* tree() const { return tree_; }
+  int64_t generation() const { return generation_; }
+
+  Iterator* mutable_iter() const {
+    if (generation_ != tree_->generation()) {
+      if (generation_ > 0) {
+        // This does the wrong thing for a multi{set,map}. If my iter was
+        // pointing to the 2nd of 2 values with the same key, then this will
+        // reset it to point to the first. This is why we don't provide a
+        // safe_btree_multi{set,map}.
+        iter_ = tree_->internal_btree()->lower_bound(key_);
+        update();
+      } else if (-generation_ != tree_->generation()) {
+        iter_ = tree_->internal_btree()->end();
+        generation_ = -tree_->generation();
+      }
+    }
+    return &iter_;
+  }
+  const Iterator& iter() const {
+    return *mutable_iter();
+  }
+
+  // Equality/inequality operators.
+  bool operator==(const const_iterator &x) const {
+    return iter() == x.iter();
+  }
+  bool operator!=(const const_iterator &x) const {
+    return iter() != x.iter();
+  }
+
+  // Accessors for the key/value the iterator is pointing at.
+  const key_type& key() const {
+    return key_;
+  }
+  // This reference value is potentially invalidated by any non-const
+  // method on the tree; it is NOT safe.
+  reference operator*() const {
+    assert(generation_ > 0);
+    return iter().operator*();
+  }
+  // This pointer value is potentially invalidated by any non-const
+  // method on the tree; it is NOT safe.
+  pointer operator->() const {
+    assert(generation_ > 0);
+    return iter().operator->();
+  }
+
+  // Increment/decrement operators.
+  self_type& operator++() {
+    ++(*mutable_iter());
+    update();
+    return *this;
+  }
+  self_type& operator--() {
+    --(*mutable_iter());
+    update();
+    return *this;
+  }
+  self_type operator++(int) {
+    self_type tmp = *this;
+    ++*this;
+    return tmp;
+  }
+  self_type operator--(int) {
+    self_type tmp = *this;
+    --*this;
+    return tmp;
+  }
+
+ private:
+  // The generation of the tree when "iter" was updated.
+  mutable int64_t generation_;
+  // The key the iterator points to.
+  mutable key_type key_;
+  // The underlying iterator.
+  mutable Iterator iter_;
+  // The tree the iterator is associated with.
+  Tree *tree_;
+};
+
+template <typename Params>
+class safe_btree {
+  typedef safe_btree<Params> self_type;
+
+  typedef btree<Params> btree_type;
+  typedef typename btree_type::iterator tree_iterator;
+  typedef typename btree_type::const_iterator tree_const_iterator;
+
+ public:
+  typedef typename btree_type::params_type params_type;
+  typedef typename btree_type::key_type key_type;
+  typedef typename btree_type::data_type data_type;
+  typedef typename btree_type::mapped_type mapped_type;
+  typedef typename btree_type::value_type value_type;
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+  typedef typename btree_type::pointer pointer;
+  typedef typename btree_type::const_pointer const_pointer;
+  typedef typename btree_type::reference reference;
+  typedef typename btree_type::const_reference const_reference;
+  typedef typename btree_type::size_type size_type;
+  typedef typename btree_type::difference_type difference_type;
+  typedef safe_btree_iterator<self_type, tree_iterator> iterator;
+  typedef safe_btree_iterator<
+    const self_type, tree_const_iterator> const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+
+ public:
+  // Default constructor.
+  safe_btree(const key_compare &comp, const allocator_type &alloc)
+      : tree_(comp, alloc),
+        generation_(1) {
+  }
+
+  // Copy constructor.
+  safe_btree(const self_type &x)
+      : tree_(x.tree_),
+        generation_(1) {
+  }
+
+  iterator begin() {
+    return iterator(this, tree_.begin());
+  }
+  const_iterator begin() const {
+    return const_iterator(this, tree_.begin());
+  }
+  iterator end() {
+    return iterator(this, tree_.end());
+  }
+  const_iterator end() const {
+    return const_iterator(this, tree_.end());
+  }
+  reverse_iterator rbegin() {
+    return reverse_iterator(end());
+  }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+  reverse_iterator rend() {
+    return reverse_iterator(begin());
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  // Lookup routines.
+  iterator lower_bound(const key_type &key) {
+    return iterator(this, tree_.lower_bound(key));
+  }
+  const_iterator lower_bound(const key_type &key) const {
+    return const_iterator(this, tree_.lower_bound(key));
+  }
+  iterator upper_bound(const key_type &key) {
+    return iterator(this, tree_.upper_bound(key));
+  }
+  const_iterator upper_bound(const key_type &key) const {
+    return const_iterator(this, tree_.upper_bound(key));
+  }
+  std::pair<iterator, iterator> equal_range(const key_type &key) {
+    std::pair<tree_iterator, tree_iterator> p = tree_.equal_range(key);
+    return std::make_pair(iterator(this, p.first),
+                     iterator(this, p.second));
+  }
+  std::pair<const_iterator, const_iterator> equal_range(const key_type &key) const {
+    std::pair<tree_const_iterator, tree_const_iterator> p = tree_.equal_range(key);
+    return std::make_pair(const_iterator(this, p.first),
+                     const_iterator(this, p.second));
+  }
+  iterator find_unique(const key_type &key) {
+    return iterator(this, tree_.find_unique(key));
+  }
+  const_iterator find_unique(const key_type &key) const {
+    return const_iterator(this, tree_.find_unique(key));
+  }
+  iterator find_multi(const key_type &key) {
+    return iterator(this, tree_.find_multi(key));
+  }
+  const_iterator find_multi(const key_type &key) const {
+    return const_iterator(this, tree_.find_multi(key));
+  }
+  size_type count_unique(const key_type &key) const {
+    return tree_.count_unique(key);
+  }
+  size_type count_multi(const key_type &key) const {
+    return tree_.count_multi(key);
+  }
+
+  // Insertion routines.
+  template <typename ValuePointer>
+  std::pair<iterator, bool> insert_unique(const key_type &key, ValuePointer value) {
+    std::pair<tree_iterator, bool> p = tree_.insert_unique(key, value);
+    generation_ += p.second;
+    return std::make_pair(iterator(this, p.first), p.second);
+  }
+  std::pair<iterator, bool> insert_unique(const value_type &v) {
+    std::pair<tree_iterator, bool> p = tree_.insert_unique(v);
+    generation_ += p.second;
+    return std::make_pair(iterator(this, p.first), p.second);
+  }
+  iterator insert_unique(iterator position, const value_type &v) {
+    tree_iterator tree_pos = position.iter();
+    ++generation_;
+    return iterator(this, tree_.insert_unique(tree_pos, v));
+  }
+  template <typename InputIterator>
+  void insert_unique(InputIterator b, InputIterator e) {
+    for (; b != e; ++b) {
+      insert_unique(*b);
+    }
+  }
+  iterator insert_multi(const value_type &v) {
+    ++generation_;
+    return iterator(this, tree_.insert_multi(v));
+  }
+  iterator insert_multi(iterator position, const value_type &v) {
+    tree_iterator tree_pos = position.iter();
+    ++generation_;
+    return iterator(this, tree_.insert_multi(tree_pos, v));
+  }
+  template <typename InputIterator>
+  void insert_multi(InputIterator b, InputIterator e) {
+    for (; b != e; ++b) {
+      insert_multi(*b);
+    }
+  }
+  self_type& operator=(const self_type &x) {
+    if (&x == this) {
+      // Don't copy onto ourselves.
+      return *this;
+    }
+    ++generation_;
+    tree_ = x.tree_;
+    return *this;
+  }
+
+  // Deletion routines.
+  void erase(const iterator &begin, const iterator &end) {
+    tree_.erase(begin.iter(), end.iter());
+    ++generation_;
+  }
+  // Erase the specified iterator from the btree. The iterator must be valid
+  // (i.e. not equal to end()).  Return an iterator pointing to the node after
+  // the one that was erased (or end() if none exists).
+  iterator erase(iterator iter) {
+    tree_iterator res = tree_.erase(iter.iter());
+    ++generation_;
+    return iterator(this, res);
+  }
+  int erase_unique(const key_type &key) {
+    int res = tree_.erase_unique(key);
+    generation_ += res;
+    return res;
+  }
+  int erase_multi(const key_type &key) {
+    int res = tree_.erase_multi(key);
+    generation_ += res;
+    return res;
+  }
+
+  // Access to the underlying btree.
+  btree_type* internal_btree() { return &tree_; }
+  const btree_type* internal_btree() const { return &tree_; }
+
+  // Utility routines.
+  void clear() {
+    ++generation_;
+    tree_.clear();
+  }
+  void swap(self_type &x) {
+    ++generation_;
+    ++x.generation_;
+    tree_.swap(x.tree_);
+  }
+  void dump(std::ostream &os) const {
+    tree_.dump(os);
+  }
+  void verify() const {
+    tree_.verify();
+  }
+  int64_t generation() const {
+    return generation_;
+  }
+  key_compare key_comp() const { return tree_.key_comp(); }
+
+  // Size routines.
+  size_type size() const { return tree_.size(); }
+  size_type max_size() const { return tree_.max_size(); }
+  bool empty() const { return tree_.empty(); }
+  size_type height() const { return tree_.height(); }
+  size_type internal_nodes() const { return tree_.internal_nodes(); }
+  size_type leaf_nodes() const { return tree_.leaf_nodes(); }
+  size_type nodes() const { return tree_.nodes(); }
+  size_type bytes_used() const { return tree_.bytes_used(); }
+  static double average_bytes_per_value() {
+    return btree_type::average_bytes_per_value();
+  }
+  double fullness() const { return tree_.fullness(); }
+  double overhead() const { return tree_.overhead(); }
+
+ private:
+  btree_type tree_;
+  int64_t generation_;
+};
+
+}  // namespace btree
+
+#endif  // UTIL_BTREE_SAFE_BTREE_H__
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_map.h b/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_map.h
new file mode 100644
index 0000000000..a0668f1677
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_map.h
@@ -0,0 +1,89 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The safe_btree_map<> is like btree_map<> except that it removes the caveat
+// about insertion and deletion invalidating existing iterators at a small cost
+// in making iterators larger and slower.
+//
+// Revalidation occurs whenever an iterator is accessed.  References
+// and pointers returned by safe_btree_map<> iterators are not stable,
+// they are potentially invalidated by any non-const method on the map.
+//
+// BEGIN INCORRECT EXAMPLE
+//   for (auto i = safe_map->begin(); i != safe_map->end(); ++i) {
+//     const T *value = &i->second;  // DO NOT DO THIS
+//     [code that modifies safe_map and uses value];
+//   }
+// END INCORRECT EXAMPLE
+#ifndef UTIL_BTREE_SAFE_BTREE_MAP_H__
+#define UTIL_BTREE_SAFE_BTREE_MAP_H__
+
+#include <functional>
+#include <memory>
+#include <utility>
+
+#include "btree_container.h"
+#include "btree_map.h"
+#include "safe_btree.h"
+
+namespace btree {
+
+// The safe_btree_map class is needed mainly for its constructors.
+template <typename Key, typename Value,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<std::pair<const Key, Value> >,
+          int TargetNodeSize = 256>
+class safe_btree_map : public btree_map_container<
+  safe_btree<btree_map_params<Key, Value, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef safe_btree_map<Key, Value, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_map_params<
+    Key, Value, Compare, Alloc, TargetNodeSize> params_type;
+  typedef safe_btree<params_type> btree_type;
+  typedef btree_map_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+
+ public:
+  // Default constructor.
+  safe_btree_map(const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  safe_btree_map(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  safe_btree_map(InputIterator b, InputIterator e,
+                 const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename V, typename C, typename A, int N>
+inline void swap(safe_btree_map<K, V, C, A, N> &x,
+                 safe_btree_map<K, V, C, A, N> &y) {
+  x.swap(y);
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_SAFE_BTREE_MAP_H__
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_set.h b/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_set.h
new file mode 100644
index 0000000000..a6cd541859
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_set.h
@@ -0,0 +1,88 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// The safe_btree_set<> is like btree_set<> except that it removes the caveat
+// about insertion and deletion invalidating existing iterators at a small cost
+// in making iterators larger and slower.
+//
+// Revalidation occurs whenever an iterator is accessed.  References
+// and pointers returned by safe_btree_map<> iterators are not stable,
+// they are potentially invalidated by any non-const method on the set.
+//
+// BEGIN INCORRECT EXAMPLE
+//   for (auto i = safe_set->begin(); i != safe_set->end(); ++i) {
+//     const T &value = *i;  // DO NOT DO THIS
+//     [code that modifies safe_set and uses value];
+//   }
+// END INCORRECT EXAMPLE
+
+#ifndef UTIL_BTREE_SAFE_BTREE_SET_H__
+#define UTIL_BTREE_SAFE_BTREE_SET_H__
+
+#include <functional>
+#include <memory>
+
+#include "btree_container.h"
+#include "btree_set.h"
+#include "safe_btree.h"
+
+namespace btree {
+
+// The safe_btree_set class is needed mainly for its constructors.
+template <typename Key,
+          typename Compare = std::less<Key>,
+          typename Alloc = std::allocator<Key>,
+          int TargetNodeSize = 256>
+class safe_btree_set : public btree_unique_container<
+  safe_btree<btree_set_params<Key, Compare, Alloc, TargetNodeSize> > > {
+
+  typedef safe_btree_set<Key, Compare, Alloc, TargetNodeSize> self_type;
+  typedef btree_set_params<Key, Compare, Alloc, TargetNodeSize> params_type;
+  typedef safe_btree<params_type> btree_type;
+  typedef btree_unique_container<btree_type> super_type;
+
+ public:
+  typedef typename btree_type::key_compare key_compare;
+  typedef typename btree_type::allocator_type allocator_type;
+
+ public:
+  // Default constructor.
+  safe_btree_set(const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(comp, alloc) {
+  }
+
+  // Copy constructor.
+  safe_btree_set(const self_type &x)
+      : super_type(x) {
+  }
+
+  // Range constructor.
+  template <class InputIterator>
+  safe_btree_set(InputIterator b, InputIterator e,
+                 const key_compare &comp = key_compare(),
+                 const allocator_type &alloc = allocator_type())
+      : super_type(b, e, comp, alloc) {
+  }
+};
+
+template <typename K, typename C, typename A, int N>
+inline void swap(safe_btree_set<K, C, A, N> &x,
+                 safe_btree_set<K, C, A, N> &y) {
+  x.swap(y);
+}
+
+} // namespace btree
+
+#endif  // UTIL_BTREE_SAFE_BTREE_SET_H__
diff --git a/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_test.cc b/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_test.cc
new file mode 100644
index 0000000000..0d77ae060f
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/cpp-btree/safe_btree_test.cc
@@ -0,0 +1,116 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// TODO(pmattis): Add some tests that iterators are not invalidated by
+// insertion and deletion.
+
+#include <functional>
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "btree_test.h"
+#include "safe_btree_map.h"
+#include "safe_btree_set.h"
+
+class UnsafeArena;
+
+namespace btree {
+namespace {
+
+template <typename K, int N>
+void SetTest() {
+  typedef TestAllocator<K> TestAlloc;
+  BtreeTest<safe_btree_set<K, std::less<K>, std::allocator<K>, N>, std::set<K> >();
+  BtreeAllocatorTest<safe_btree_set<K, std::less<K>, TestAlloc, N> >();
+}
+
+template <typename K, int N>
+void MapTest() {
+  typedef TestAllocator<K> TestAlloc;
+  BtreeTest<safe_btree_map<K, K, std::less<K>, std::allocator<K>, N>, std::map<K, K> >();
+  BtreeAllocatorTest<safe_btree_map<K, K, std::less<K>, TestAlloc, N> >();
+  BtreeMapTest<safe_btree_map<K, K, std::less<K>, std::allocator<K>, N> >();
+}
+
+TEST(SafeBtree, set_int32_32)   { SetTest<int32_t, 32>(); }
+TEST(SafeBtree, set_int32_64)   { SetTest<int32_t, 64>(); }
+TEST(SafeBtree, set_int32_128)  { SetTest<int32_t, 128>(); }
+TEST(SafeBtree, set_int32_256)  { SetTest<int32_t, 256>(); }
+TEST(SafeBtree, set_int64_256)  { SetTest<int64_t, 256>(); }
+TEST(SafeBtree, set_string_256) { SetTest<std::string, 256>(); }
+TEST(SafeBtree, set_pair_256)   { SetTest<std::pair<int, int>, 256>(); }
+TEST(SafeBtree, map_int32_256)  { MapTest<int32_t, 256>(); }
+TEST(SafeBtree, map_int64_256)  { MapTest<int64_t, 256>(); }
+TEST(SafeBtree, map_string_256) { MapTest<std::string, 256>(); }
+TEST(SafeBtree, map_pair_256)   { MapTest<std::pair<int, int>, 256>(); }
+
+TEST(SafeBtree, Comparison) {
+  const int kSetSize = 1201;
+  safe_btree_set<int64_t> my_set;
+  for (int i = 0; i < kSetSize; ++i) {
+    my_set.insert(i);
+  }
+  safe_btree_set<int64_t> my_set_copy(my_set);
+  EXPECT_TRUE(my_set_copy == my_set);
+  EXPECT_TRUE(my_set == my_set_copy);
+  EXPECT_FALSE(my_set_copy != my_set);
+  EXPECT_FALSE(my_set != my_set_copy);
+
+  my_set.insert(kSetSize);
+  EXPECT_FALSE(my_set_copy == my_set);
+  EXPECT_FALSE(my_set == my_set_copy);
+  EXPECT_TRUE(my_set_copy != my_set);
+  EXPECT_TRUE(my_set != my_set_copy);
+
+  my_set.erase(kSetSize - 1);
+  EXPECT_FALSE(my_set_copy == my_set);
+  EXPECT_FALSE(my_set == my_set_copy);
+  EXPECT_TRUE(my_set_copy != my_set);
+  EXPECT_TRUE(my_set != my_set_copy);
+
+  safe_btree_map<std::string, int64_t> my_map;
+  for (int i = 0; i < kSetSize; ++i) {
+    my_map[std::string(i, 'a')] = i;
+  }
+  safe_btree_map<std::string, int64_t> my_map_copy(my_map);
+  EXPECT_TRUE(my_map_copy == my_map);
+  EXPECT_TRUE(my_map == my_map_copy);
+  EXPECT_FALSE(my_map_copy != my_map);
+  EXPECT_FALSE(my_map != my_map_copy);
+
+  ++my_map_copy[std::string(7, 'a')];
+  EXPECT_FALSE(my_map_copy == my_map);
+  EXPECT_FALSE(my_map == my_map_copy);
+  EXPECT_TRUE(my_map_copy != my_map);
+  EXPECT_TRUE(my_map != my_map_copy);
+
+  my_map_copy = my_map;
+  my_map["hello"] = kSetSize;
+  EXPECT_FALSE(my_map_copy == my_map);
+  EXPECT_FALSE(my_map == my_map_copy);
+  EXPECT_TRUE(my_map_copy != my_map);
+  EXPECT_TRUE(my_map != my_map_copy);
+
+  my_map.erase(std::string(kSetSize - 1, 'a'));
+  EXPECT_FALSE(my_map_copy == my_map);
+  EXPECT_FALSE(my_map == my_map_copy);
+  EXPECT_TRUE(my_map_copy != my_map);
+  EXPECT_TRUE(my_map != my_map_copy);
+}
+
+} // namespace
+} // namespace btree
diff --git a/third-party/xdelta3/xdelta3/draft-korn-vcdiff.txt b/third-party/xdelta3/xdelta3/draft-korn-vcdiff.txt
new file mode 100644
index 0000000000..1487deb853
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/draft-korn-vcdiff.txt
@@ -0,0 +1,1322 @@
+                                                     David G. Korn, AT&T Labs
+				             Joshua P. MacDonald, UC Berkeley
+                                                 Jeffrey C. Mogul, Compaq WRL
+Internet-Draft                                       Kiem-Phong Vo, AT&T Labs
+Expires: 09 November 2002                                    09 November 2001
+
+
+        The VCDIFF Generic Differencing and Compression Data Format
+
+                         draft-korn-vcdiff-06.txt
+
+
+
+Status of this Memo
+
+    This document is an Internet-Draft and is in full conformance
+    with all provisions of Section 10 of RFC2026.
+
+    Internet-Drafts are working documents of the Internet Engineering
+    Task Force (IETF), its areas, and its working groups.  Note that
+    other groups may also distribute working documents as
+    Internet-Drafts.
+
+    Internet-Drafts are draft documents valid for a maximum of six
+    months and may be updated, replaced, or obsoleted by other
+    documents at any time.  It is inappropriate to use Internet-
+    Drafts as reference material or to cite them other than as
+    "work in progress."
+
+    The list of current Internet-Drafts can be accessed at
+    http://www.ietf.org/ietf/1id-abstracts.txt
+
+    The list of Internet-Draft Shadow Directories can be accessed at
+    http://www.ietf.org/shadow.html.
+
+
+Abstract
+
+    This memo describes a general, efficient and portable data format
+    suitable for encoding compressed and/or differencing data so that
+    they can be easily transported among computers.
+
+
+Table of Contents:
+
+    1.  EXECUTIVE SUMMARY ............................................  2
+    2.  CONVENTIONS ..................................................  3
+    3.  DELTA INSTRUCTIONS ...........................................  4
+    4.  DELTA FILE ORGANIZATION ......................................  5
+    5.  DELTA INSTRUCTION ENCODING ...................................  9
+    6.  DECODING A TARGET WINDOW ..................................... 14
+    7.  APPLICATION-DEFINED CODE TABLES .............................. 16
+    8.  PERFORMANCE .................................................. 16
+    9.  FURTHER ISSUES ............................................... 17
+   10.  SUMMARY ...................................................... 18
+   11.  ACKNOWLEDGEMENTS ............................................. 18
+   12.  SECURITY CONSIDERATIONS ...................................... 18
+   13.  SOURCE CODE AVAILABILITY ..................................... 18
+   14.  INTELLECTUAL PROPERTY RIGHTS ................................. 18
+   15.  IANA CONSIDERATIONS .......................................... 19
+   16.  REFERENCES ................................................... 19
+   17.  AUTHOR'S ADDRESS ............................................. 20
+
+
+1.  EXECUTIVE SUMMARY
+
+    Compression and differencing techniques can greatly improve storage
+    and transmission of files and file versions.  Since files are often
+    transported across machines with distinct architectures and performance
+    characteristics, such data should be encoded in a form that is portable
+    and can be decoded with little or no knowledge of the encoders.
+    This document describes Vcdiff, a compact portable encoding format
+    designed for these purposes.
+
+    Data differencing is the process of computing a compact and invertible
+    encoding of a "target file" given a "source file".  Data compression
+    is similar but without the use of source data.  The UNIX utilities diff,
+    compress, and gzip are well-known examples of data differencing and
+    compression tools.  For data differencing, the computed encoding is
+    called a "delta file", and, for data compression, it is called
+    a "compressed file".  Delta and compressed files are good for storage
+    and transmission as they are often smaller than the originals.
+
+    Data differencing and data compression are traditionally treated
+    as distinct types of data processing.  However, as shown in the Vdelta
+    technique by Korn and Vo [1], compression can be thought of as a special
+    case of differencing in which the source data is empty. The basic idea
+    is to unify the string parsing scheme used in the Lempel-Ziv'77 style
+    compressors [2], and the block-move technique of Tichy [3].  Loosely
+    speaking, this works as follows:
+
+        a. Concatenate source and target data.
+        b. Parse the data from left to right as in LZ'77 but
+	   make sure that a parsed segment starts the target data.
+        c. Start to output when reaching target data.
+
+    Parsing is based on string matching algorithms such as suffix trees [4]
+    or hashing with different time and space performance characteristics.
+    Vdelta uses a fast string matching algorithm that requires less memory
+    than other techniques [5,6].  However, even with this algorithm, the
+    memory requirement can still be prohibitive for large files.  A common
+    way to deal with memory limitation is to partition an input file into
+    chunks called "windows" and process them separately. Here, except for
+    unpublished work by Vo, little has been done on designing effective
+    windowing schemes. Current techniques, including Vdelta, simply use
+    source and target windows with corresponding addresses across source
+    and target files.
+
+    String matching and windowing algorithms have large influence on the
+    compression rate of delta and compressed files. However, it is desirable
+    to have a portable encoding format that is independent of such algorithms.
+    This enables construction of client-server applications in which a server
+    may serve clients with unknown computing characteristics.  Unfortunately,
+    all current differencing and compressing tools, including Vdelta, fall
+    short in this respect. Their storage formats are closely intertwined
+    with the implemented string matching and/or windowing algorithms.
+
+    The encoding format Vcdiff proposed here addresses the above issues.
+    Vcdiff achieves the below characteristics:
+
+	Output compactness:
+            The basic encoding format compactly represents compressed or delta
+	    files. Applications can further extend the basic encoding format
+	    with "secondary encoders" to achieve more compression.
+
+	Data portability:
+	    The basic encoding format is free from machine byte order and
+	    word size issues. This allows data to be encoded on one machine
+	    and decoded on a different machine with different architecture.
+
+    	Algorithm genericity:
+	    The decoding algorithm is independent from string matching and
+	    windowing algorithms. This allows competition among implementations
+	    of the encoder while keeping the same decoder.
+
+    	Decoding efficiency:
+	    Except for secondary encoder issues, the decoding algorithm runs
+	    in time proportional to the size of the target file and uses space
+	    proportional to the maximal window size.  Vcdiff differs from more
+	    conventional compressors in that it uses only byte-aligned
+	    data, thus avoiding bit-level operations, which improves
+	    decoding speed at the slight cost of compression efficiency.
+
+    The Vcdiff data format and the algorithms for decoding data shall be
+    described next.  Since Vcdiff treats compression as a special case of
+    differencing, we shall use the term "delta file" to indicate the
+    compressed output for both cases.
+
+
+2. CONVENTIONS
+
+    The basic data unit is a byte.  For portability, Vcdiff shall limit
+    a byte to its lower eight bits even on machines with larger bytes.
+    The bits in a byte are ordered from right to left so that the least
+    significant bit (LSB) has value 1, and the most significant bit (MSB),
+    has value 128.
+
+    For purposes of exposition in this document, we adopt the convention
+    that the LSB is numbered 0, and the MSB is numbered 7.  Bit numbers
+    never appear in the encoded format itself.
+
+    Vcdiff encodes unsigned integer values using a portable variable-sized
+    format (originally introduced in the Sfio library [7]). This encoding
+    treats an integer as a number in base 128. Then, each digit in this
+    representation is encoded in the lower seven bits of a byte. Except for
+    the least significant byte, other bytes have their most significant bit
+    turned on to indicate that there are still more digits in the encoding.
+    The two key properties of this integer encoding that are beneficial
+    to a data compression format are:
+
+	a. The encoding is portable among systems using 8-bit bytes, and
+        b. Small values are encoded compactly.
+
+    For example, consider the value 123456789 which can be represented with
+    four 7-bit digits whose values are 58, 111, 26, 21 in order from most
+    to least significant. Below is the 8-bit byte encoding of these digits.
+    Note that the MSBs of 58, 111 and 26 are on.
+
+                 +-------------------------------------------+
+                 | 10111010 | 11101111 | 10011010 | 00010101 |
+                 +-------------------------------------------+
+                   MSB+58     MSB+111    MSB+26     0+21
+
+
+    Henceforth, the terms "byte" and "integer" will refer to a byte and an
+    unsigned integer as described.
+
+
+    From time to time, algorithms are exhibited to clarify the descriptions
+    of parts of the Vcdiff format. On such occasions, the C language will be
+    used to make precise the algorithms.  The C code shown in this
+    document is meant for clarification only, and is not part of the
+    actual specification of the Vcdiff format.
+
+    In this specification, the key words "MUST", "MUST NOT",
+    "SHOULD", "SHOULD NOT", and "MAY" document are to be interpreted as
+    described in RFC2119 [12].
+
+
+3.  DELTA INSTRUCTIONS
+
+    A large target file is partitioned into non-overlapping sections
+    called "target windows". These target windows are processed separately
+    and sequentially based on their order in the target file.
+
+    A target window T of length t may be compared against some source data
+    segment S of length s. By construction, this source data segment S
+    comes either from the source file, if one is used, or from a part of
+    the target file earlier than T.  In this way, during decoding, S is
+    completely known when T is being decoded.
+
+    The choices of T, t, S and s are made by some window selection algorithm
+    which can greatly affect the size of the encoding. However, as seen later,
+    these choices are encoded so that no knowledge of the window selection
+    algorithm is needed during decoding.
+
+    Assume that S[j] represents the jth byte in S, and T[k] represents
+    the kth byte in T.  Then, for the delta instructions, we treat the data
+    windows S and T as substrings of a superstring U formed by concatenating
+    them like this:
+
+        S[0]S[1]...S[s-1]T[0]T[1]...T[t-1]
+
+    The "address" of a byte in S or T is referred to by its location in U.
+    For example, the address of T[k] is s+k.
+
+    The instructions to encode and direct the reconstruction of a target
+    window are called delta instructions. There are three types:
+
+	ADD: This instruction has two arguments, a size x and a sequence of
+	    x bytes to be copied.
+	COPY: This instruction has two arguments, a size x and an address p
+	    in the string U. The arguments specify the substring of U that
+	    must be copied. We shall assert that such a substring must be
+	    entirely contained in either S or T.
+	RUN: This instruction has two arguments, a size x and a byte b that
+	    will be repeated x times.
+
+    Below are example source and target windows and the delta instructions
+    that encode the target window in terms of the source window.
+
+        a b c d e f g h i j k l m n o p
+        a b c d w x y z e f g h e f g h e f g h e f g h z z z z
+
+        COPY  4, 0
+        ADD   4, w x y z
+        COPY  4, 4
+        COPY 12, 24
+	RUN   4, z
+
+
+    Thus, the first letter 'a' in the target window is at location 16
+    in the superstring. Note that the fourth instruction, "COPY 12, 24",
+    copies data from T itself since address 24 is position 8 in T.
+    This instruction also shows that it is fine to overlap the data to be
+    copied with the data being copied from as long as the latter starts
+    earlier. This enables efficient encoding of periodic sequences,
+    i.e., sequences with regularly repeated subsequences. The RUN instruction
+    is a compact way to encode a sequence repeating the same byte even though
+    such a sequence can be thought of as a periodic sequence with period 1.
+
+    To reconstruct the target window, one simply processes one delta
+    instruction at a time and copy the data either from the source window
+    or the being reconstructed target window based on the type of the
+    instruction and the associated address, if any.
+
+
+4.  DELTA FILE ORGANIZATION
+
+    A Vcdiff delta file starts with a Header section followed by a sequence
+    of Window sections. The Header section includes magic bytes to identify
+    the file type, and information concerning data processing beyond the
+    basic encoding format. The Window sections encode the target windows.
+
+    Below is the overall organization of a delta file. The indented items
+    refine the ones immediately above them. An item in square brackets may
+    or may not be present in the file depending on the information encoded
+    in the Indicator byte above it.
+
+        Header
+	    Header1                                  - byte
+	    Header2                                  - byte
+	    Header3                                  - byte
+	    Header4                                  - byte
+	    Hdr_Indicator                            - byte
+	    [Secondary compressor ID]                - byte
+
+[@@@ Why is compressor ID not an integer? ]
+[@@@ If we aren't defining any secondary compressors yet, then it seems
+that defining the [Secondary compressor ID] and the corresponding
+VCD_DECOMPRESS Hdr_Indicator bit in this draft has no real value.  An
+implementation of this specification won't be able to decode a VCDIFF
+encoded with this option if it doesn't know about any secondary
+compressors.  It seems that you should specify the bits related to
+secondary compressors once you have defined the first a secondary
+compressor.  I can imagine a secondary-compressor might want to supply
+extra information, such as a dictionary of some kind, in which case
+this speculative treatment wouldn't go far enough.]
+
+	    [Length of code table data]              - integer
+	    [Code table data]
+	      	Size of near cache                   - byte
+	        Size of same cache                   - byte
+	        Compressed code table data
+	Window1
+	    Win_Indicator                            - byte
+	    [Source segment size]                    - integer
+	    [Source segment position]                - integer
+            The delta encoding of the target window
+	        Length of the delta encoding         - integer
+	        The delta encoding
+	            Size of the target window        - integer
+	            Delta_Indicator                  - byte
+	            Length of data for ADDs and RUNs - integer
+	            Length of instructions and sizes - integer
+	            Length of addresses for COPYs    - integer
+	            Data section for ADDs and RUNs   - array of bytes
+	            Instructions and sizes section   - array of bytes
+	            Addresses section for COPYs      - array of bytes
+	Window2
+	...
+
+
+
+4.1 The Header Section
+
+    Each delta file starts with a header section organized as below.
+    Note the convention that square-brackets enclose optional items.
+
+	    Header1                                  - byte = 0xE6
+	    Header2                                  - byte = 0xD3
+	    Header3                                  - byte = 0xD4
+
+HMMM
+
+0xD6
+0xC3
+0xC4
+
+	    Header4                                  - byte
+	    Hdr_Indicator                            - byte
+	    [Secondary compressor ID]                - byte
+	    [Length of code table data]              - integer
+	    [Code table data]
+
+    The first three Header bytes are the ASCII characters 'V', 'C' and 'D'
+    with their most significant bits turned on (in hexadecimal, the values
+    are 0xE6, 0xD3, and 0xD4). The fourth Header byte is currently set to
+    zero. In the future, it might be used to indicate the version of Vcdiff.
+
+    The Hdr_Indicator byte shows if there are any initialization data
+    required to aid in the reconstruction of data in the Window sections.
+    This byte MAY have non-zero values for either, both, or neither of
+    the two bits VCD_DECOMPRESS and VCD_CODETABLE below:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	                ^ ^
+	                | |
+	                | +-- VCD_DECOMPRESS
+	                +---- VCD_CODETABLE
+
+    If bit 0 (VCD_DECOMPRESS) is non-zero, this indicates that a secondary
+    compressor may have been used to further compress certain parts of the
+    delta encoding data as described in Sections 4.3 and 6. In that case,
+    the ID of the secondary compressor is given next. If this bit is zero,
+    the compressor ID byte is not included.
+
+[@@@ If we aren't defining any secondary compressors yet, then it seems
+this bit has no real value yet..]
+
+    If bit 1 (VCD_CODETABLE) is non-zero, this indicates that an
+    application-defined code table is to be used for decoding the delta
+    instructions. This table itself is compressed.  The length of the data
+    comprising this compressed code table and the data follow next. Section 7
+    discusses application-defined code tables.  If this bit is zero, the code
+    table data length and the code table data are not included.
+
+    If both bits are set, then the compressor ID byte is included
+    before the code table data length and the code table data.
+
+
+4.2 The Format of a Window Section
+
+    Each Window section is organized as follows:
+
+	    Win_Indicator                            - byte
+	    [Source segment length]                  - integer
+	    [Source segment position]                - integer
+            The delta encoding of the target window
+
+
+    Below are the detail of the various items:
+
+[@@@ Here, I want to replace the Win_Indicator with a source-count,
+followed by source-count length/position pairs?]
+
+        Win_Indicator:
+	    This byte is a set of bits, as shown:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	                ^ ^
+	                | |
+	                | +-- VCD_SOURCE
+	                +---- VCD_TARGET
+
+
+	    If bit 0 (VCD_SOURCE) is non-zero, this indicates that a segment
+            of data from the "source" file was used as the corresponding
+            source window of data to encode the target window. The decoder
+	    will use this same source data segment to decode the target window.
+
+	    If bit 1 (VCD_TARGET) is non-zero, this indicates that a segment
+            of data from the "target" file was used as the corresponding
+	    source window of data to encode the target window. As above, this
+	    same source data segment is used to decode the target window.
+
+	    The Win_Indicator byte MUST NOT have more than one of the bits
+	    set (non-zero).  It MAY have none of these bits set.
+
+	    If one of these bits is set, the byte is followed by two
+            integers to indicate respectively the length and position of
+            the source data segment in the relevant file.  If the
+            indicator byte is zero, the target window was compressed
+            by itself without comparing against another data segment,
+            and these two integers are not included.
+
+        The delta encoding of the target window:
+            This contains the delta encoding of the target window either
+            in terms of the source data segment (i.e., VCD_SOURCE
+            or VCD_TARGET was set) or by itself if no source window
+            is specified. This data format is discussed next.
+
+
+4.3 The Delta Encoding of a Target Window
+
+    The delta encoding of a target window is organized as follows:
+
+	Length of the delta encoding            - integer
+	The delta encoding
+	    Length of the target window         - integer
+	    Delta_Indicator                     - byte
+	    Length of data for ADDs and RUNs    - integer
+	    Length of instructions section      - integer
+	    Length of addresses for COPYs       - integer
+	    Data section for ADDs and RUNs      - array of bytes
+	    Instructions and sizes section      - array of bytes
+	    Addresses section for COPYs         - array of bytes
+
+
+	Length of the delta encoding:
+	    This integer gives the total number of remaining bytes that
+	    comprise data of the delta encoding for this target window.
+
+        The delta encoding:
+	    This contains the data representing the delta encoding which
+	    is described next.
+
+    	Length of the target window:
+	    This integer indicates the actual size of the target window
+            after decompression. A decoder can use this value to allocate
+            memory to store the uncompressed data.
+
+	Delta_Indicator:
+	    This byte is a set of bits, as shown:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	              ^ ^ ^
+	              | | |
+	              | | +-- VCD_DATACOMP
+	              | +---- VCD_INSTCOMP
+	              +------ VCD_ADDRCOMP
+
+		VCD_DATACOMP:	bit value 1.
+		VCD_INSTCOMP:	bit value 2.
+		VCD_ADDRCOMP:	bit value 4.
+
+            As discussed, the delta encoding consists of COPY, ADD and RUN
+            instructions. The ADD and RUN instructions have accompanying
+            unmatched data (that is, data that does not specifically match
+            any data in the source window or in some earlier part of the
+            target window) and the COPY instructions have addresses of where
+	    the matches occur. OPTIONALLY, these types of data MAY be further
+	    compressed using a secondary compressor. Thus, Vcdiff separates
+            the encoding of the delta instructions into three parts:
+
+	        a. The unmatched data in the ADD and RUN instructions,
+	        b. The delta instructions and accompanying sizes, and
+                c. The addresses of the COPY instructions.
+
+            If the bit VCD_DECOMPRESS (Section 4.1) was on, each of these
+            sections may have been compressed using the specified secondary
+            compressor. The bit positions 0 (VCD_DATACOMP), 1 (VCD_INSTCOMP),
+            and 2 (VCD_ADDRCOMP) respectively indicate, if non-zero, that
+            the corresponding parts are compressed. Then, these parts MUST
+	    be decompressed before decoding the delta instructions.
+
+	Length of data for ADDs and RUNs:
+	    This is the length (in bytes) of the section of data storing
+            the unmatched data accompanying the ADD and RUN instructions.
+
+	Length of instructions section:
+	    This is the length (in bytes) of the delta instructions and
+            accompanying sizes.
+
+	Length of addresses for COPYs:
+	    This is the length (in bytes) of the section storing
+            the addresses of the COPY instructions.
+
+    	Data section for ADDs and RUNs:
+	    This sequence of bytes encodes the unmatched data for the ADD
+            and RUN instructions.
+
+	Instructions and sizes section:
+	    This sequence of bytes encodes the instructions and their sizes.
+
+	Addresses section for COPYs:
+	    This sequence of bytes encodes the addresses of the COPY
+	    instructions.
+
+
+5. DELTA INSTRUCTION ENCODING
+
+    The delta instructions described in Section 3 represent the results of
+    string matching. For many data differencing applications in which the
+    changes between source and target data are small, any straightforward
+    representation of these instructions would be adequate.  However, for
+    applications including data compression, it is important to encode
+    these instructions well to achieve good compression rates.  From our
+    experience, the following observations can be made:
+
+    a. The addresses in COPY instructions are locations of matches and
+       often occur close by or even exactly equal to one another. This is
+       because data in local regions are often replicated with minor changes.
+       In turn, this means that coding a newly matched address against some
+       set of recently matched addresses can be beneficial.
+
+    b. The matches are often short in length and separated by small amounts
+       of unmatched data. That is, the lengths of COPY and ADD instructions
+       are often small. This is particularly true of binary data such as
+       executable files or structured data such as HTML or XML. In such cases,
+       compression can be improved by combining the encoding of the sizes
+       and the instruction types as well as combining the encoding of adjacent
+       delta instructions with sufficiently small data sizes.
+
+    The below subsections discuss how the Vcdiff data format provides
+    mechanisms enabling encoders to use the above observations to improve
+    compression rates.
+
+
+5.1 Address Encoding Modes of COPY Instructions
+
+    As mentioned earlier, addresses of COPY instructions often occur close
+    to one another or are exactly equal. To take advantage of this phenomenon
+    and encode addresses of COPY instructions more efficiently, the Vcdiff
+    data format supports the use of two different types of address caches.
+    Both the encoder and decoder maintain these caches, so that decoder's
+    caches remain synchronized with the encoder's caches.
+
+    a. A "near" cache is an array with "s_near" slots, each containing an
+       address used for encoding addresses nearby to previously encoded
+       addresses (in the positive direction only).  The near cache also
+       maintains a "next_slot" index to the near cache.  New entries to the
+       near cache are always inserted in the next_slot index, which maintains
+       a circular buffer of the s_near most recent addresses.
+
+    b. A "same" cache is an array with "s_same" multiple of 256 slots, each
+       containing an address.  The same cache maintains a hash table of recent
+       addresses used for repeated encoding of the exact same address.
+
+
+    By default, the parameters s_near and s_same are respectively set to 4
+    and 3. An encoder MAY modify these values, but then it MUST encode the
+    new values in the encoding itself, as discussed in Section 7, so that
+    the decoder can properly set up its own caches.
+
+    At the start of processing a target window, an implementation
+    (encoder or decoder) initializes all of the slots in both caches
+    to zero.  The next_slot pointer of the near cache is set
+    to point to slot zero.
+
+    Each time a COPY instruction is processed by the encoder or
+    decoder, the implementation's caches are updated as follows, where
+    "addr" is the address in the COPY instruction.
+
+    a. The slot in the near cache referenced by the next_slot
+       index is set to addr.  The next_slot index is then incremented
+       modulo s_near.
+
+    b. The slot in the same cache whose index is addr%(s_same*256)
+       is set to addr. [We use the C notations of % for modulo and
+       * for multiplication.]
+
+
+5.2 Example code for maintaining caches
+
+    To make clear the above description, below are example cache data
+    structures and algorithms to initialize and update them:
+
+        typedef struct _cache_s
+        {
+	    int*  near;      /* array of size s_near        */
+            int   s_near;
+            int   next_slot; /* the circular index for near */
+            int*  same;      /* array of size s_same*256    */
+            int   s_same;
+        } Cache_t;
+
+        cache_init(Cache_t* ka)
+        {
+	    int   i;
+
+            ka->next_slot = 0;
+            for(i = 0; i < ka->s_near; ++i)
+                 ka->near[i] = 0;
+
+            for(i = 0; i < ka->s_same*256; ++i)
+                 ka->same[i] = 0;
+        }
+
+        cache_update(Cache_t* ka, int addr)
+        {
+	    if(ka->s_near > 0)
+            {   ka->near[ka->next_slot] = addr;
+                ka->next_slot = (ka->next_slot + 1) % ka->s_near;
+            }
+
+            if(ka->s_same > 0)
+                ka->same[addr % (ka->s_same*256)] = addr;
+        }
+
+
+5.3 Encoding of COPY instruction addresses
+
+    The address of a COPY instruction is encoded using different modes
+    depending on the type of cached address used, if any.
+
+    Let "addr" be the address of a COPY instruction to be decoded and "here"
+    be the current location in the target data (i.e., the start of the data
+    about to be encoded or decoded).  Let near[j] be the jth element in
+    the near cache, and same[k] be the kth element in the same cache.
+    Below are the possible address modes:
+
+	VCD_SELF: This mode has value 0. The address was encoded by itself
+            as an integer.
+
+	VCD_HERE: This mode has value 1. The address was encoded as
+	    the integer value "here - addr".
+
+	Near modes: The "near modes" are in the range [2,s_near+1]. Let m
+	    be the mode of the address encoding. The address was encoded
+	    as the integer value "addr - near[m-2]".
+
+	Same modes: The "same modes" are in the range
+	    [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding.
+	    The address was encoded as a single byte b such that
+	    "addr == same[(m - (s_near+2))*256 + b]".
+
+
+5.3 Example code for encoding and decoding of COPY instruction addresses
+
+    We show example algorithms below to demonstrate use of address modes more
+    clearly. The encoder has freedom to choose address modes, the sample
+    addr_encode() algorithm merely shows one way of picking the address
+    mode. The decoding algorithm addr_decode() will uniquely decode addresses
+    regardless of the encoder's algorithm choice.
+
+    Note that the address caches are updated immediately after an address is
+    encoded or decoded. In this way, the decoder is always synchronized with
+    the encoder.
+
+        int addr_encode(Cache_t* ka, int addr, int here, int* mode)
+        {
+	    int  i, d, bestd, bestm;
+
+	    /* Attempt to find the address mode that yields the
+	     * smallest integer value for "d", the encoded address
+	     * value, thereby minimizing the encoded size of the
+	     * address. */
+
+            bestd = addr; bestm = VCD_SELF;      /* VCD_SELF == 0 */
+
+            if((d = here-addr) < bestd)
+                { bestd = d; bestm = VCD_HERE; } /* VCD_HERE == 1 */
+
+            for(i = 0; i < ka->s_near; ++i)
+                if((d = addr - ka->near[i]) >= 0 && d < bestd)
+                    { bestd = d; bestm = i+2; }
+
+            if(ka->s_same > 0 && ka->same[d = addr%(ka->s_same*256)] == addr)
+                { bestd = d%256; bestm = ka->s_near + 2 + d/256; }
+
+            cache_update(ka,addr);
+
+            *mode = bestm; /* this returns the address encoding mode */
+            return  bestd; /* this returns the encoded address       */
+        }
+
+    Note that the addr_encode() algorithm chooses the best address mode using a
+    local optimization, but that may not lead to the best encoding efficiency
+    because different modes lead to different instruction encodings, as    described below.
+
+    The functions addrint() and addrbyte() used in addr_decode() obtain from
+    the "Addresses section for COPYs" (Section 4.3) an integer or a byte,
+    respectively. These utilities will not be described here.  We simply
+    recall that an integer is represented as a compact variable-sized string
+    of bytes as described in Section 2 (i.e., base 128).
+
+        int addr_decode(Cache_t* ka, int here, int mode)
+        {   int  addr, m;
+
+            if(mode == VCD_SELF)
+                 addr = addrint();
+            else if(mode == VCD_HERE)
+                 addr = here - addrint();
+            else if((m = mode - 2) >= 0 && m < ka->s_near) /* near cache */
+                 addr = ka->near[m] + addrint();
+            else /* same cache */
+            {    m = mode - (2 + ka->s_near);
+                 addr = ka->same[m*256 + addrbyte()];
+            }
+
+            cache_update(ka, addr);
+
+            return addr;
+        }
+
+
+5.4 Instruction Codes
+
+    As noted, the data sizes associated with delta instructions are often
+    small. Thus, compression efficiency can be improved by combining the sizes
+    and instruction types in a single encoding, as well by combining certain
+    pairs of adjacent delta instructions. Effective choices of when to perform
+    such combinations depend on many factors including the data being processed
+    and the string matching algorithm in use. For example, if many COPY
+    instructions have the same data sizes, it may be worth to encode these
+    instructions more compactly than others.
+
+    The Vcdiff data format is designed so that a decoder does not need to be
+    aware of the choices made in encoding algorithms. This is achieved with the
+    notion of an "instruction code table" containing 256 entries. Each entry
+    defines either a single delta instruction or a pair of instructions that
+    have been combined.  Note that the code table itself only exists in main
+    memory, not in the delta file (unless using an application-defined code
+    table, described in Section 7). The encoded data simply includes the index
+    of each instruction and, since there are only 256 indices, each index
+    can be represented as a single byte.
+
+    Each instruction code entry contains six fields, each of which
+    is a single byte with unsigned value:
+
+            +-----------------------------------------------+
+	    | inst1 | size1 | mode1 | inst2 | size2 | mode2 |
+	    +-----------------------------------------------+
+
+@@@ could be more compact
+
+    Each triple (inst,size,mode) defines a delta instruction. The meanings
+    of these fields are as follows:
+
+    inst: An "inst" field can have one of the four values: NOOP (0), ADD (1),
+	RUN (2) or COPY (3) to indicate the instruction types. NOOP means
+	that no instruction is specified. In this case, both the corresponding
+	size and mode fields will be zero.
+
+    size: A "size" field is zero or positive. A value zero means that the
+	size associated with the instruction is encoded separately as
+	an integer in the "Instructions and sizes section" (Section 6).
+	A positive value for "size" defines the actual data size.
+	Note that since the size is restricted to a byte, the maximum
+	value for any instruction with size implicitly defined in the code
+	table is 255.
+
+    mode: A "mode" field is significant only when the associated delta
+	instruction is a COPY. It defines the mode used to encode the
+	associated addresses. For other instructions, this is always zero.
+
+
+5.5 The Code Table
+
+    Following the discussions on address modes and instruction code tables,
+    we define a "Code Table" to have the data below:
+
+	s_near: the size of the near cache,
+	s_same: the size of the same cache,
+	i_code: the 256-entry instruction code table.
+
+    Vcdiff itself defines a "default code table" in which s_near is 4
+    and s_same is 3. Thus, there are 9 address modes for a COPY instruction.
+    The first two are VCD_SELF (0) and VCD_HERE (1). Modes 2, 3, 4 and 5
+    are for addresses coded against the near cache. And, modes 6, 7  and 8
+    are for addresses coded against the same cache.
+
+    The default instruction code table is depicted below, in a compact
+    representation that we use only for descriptive purposes.  See section 7
+    for the specification of how an instruction code table is represented
+    in the Vcdiff encoding format.  In the depiction, a zero value for
+    size indicates that the size is separately coded. The mode of non-COPY
+    instructions is represented as 0 even though they are not used.
+
+
+         TYPE      SIZE     MODE    TYPE     SIZE     MODE     INDEX
+        ---------------------------------------------------------------
+     1.  RUN         0        0     NOOP       0        0        0
+     2.  ADD    0, [1,17]     0     NOOP       0        0      [1,18]
+     3.  COPY   0, [4,18]     0     NOOP       0        0     [19,34]
+     4.  COPY   0, [4,18]     1     NOOP       0        0     [35,50]
+     5.  COPY   0, [4,18]     2     NOOP       0        0     [51,66]
+     6.  COPY   0, [4,18]     3     NOOP       0        0     [67,82]
+     7.  COPY   0, [4,18]     4     NOOP       0        0     [83,98]
+     8.  COPY   0, [4,18]     5     NOOP       0        0     [99,114]
+     9.  COPY   0, [4,18]     6     NOOP       0        0    [115,130]
+    10.  COPY   0, [4,18]     7     NOOP       0        0    [131,146]
+    11.  COPY   0, [4,18]     8     NOOP       0        0    [147,162]
+    12.  ADD       [1,4]      0     COPY     [4,6]      0    [163,174]
+    13.  ADD       [1,4]      0     COPY     [4,6]      1    [175,186]
+    14.  ADD       [1,4]      0     COPY     [4,6]      2    [187,198]
+    15.  ADD       [1,4]      0     COPY     [4,6]      3    [199,210]
+    16.  ADD       [1,4]      0     COPY     [4,6]      4    [211,222]
+    17.  ADD       [1,4]      0     COPY     [4,6]      5    [223,234]
+    18.  ADD       [1,4]      0     COPY       4        6    [235,238]
+    19.  ADD       [1,4]      0     COPY       4        7    [239,242]
+    20.  ADD       [1,4]      0     COPY       4        8    [243,246]
+    21.  COPY        4      [0,8]   ADD        1        0    [247,255]
+        ---------------------------------------------------------------
+
+    In the above depiction, each numbered line represents one or more
+    entries in the actual instruction code table (recall that an entry in
+    the instruction code table may represent up to two combined delta
+    instructions.) The last column ("INDEX") shows which index value or
+    range of index values of the entries covered by that line. The notation
+    [i,j] means values from i through j, inclusive. The first 6 columns of
+    a line in the depiction describe the pairs of instructions used for
+    the corresponding index value(s).
+
+    If a line in the depiction includes a column entry using the [i,j]
+    notation, this means that the line is instantiated for each value
+    in the range from i to j, inclusive.  The notation "0, [i,j]" means
+    that the line is instantiated for the value 0 and for each value
+    in the range from i to j, inclusive.
+
+    If a line in the depiction includes more than one entry using the [i,j]
+    notation, implying a "nested loop" to convert the line to a range of
+    table entries, the first such [i,j] range specifies the outer loop,
+    and the second specifies the inner loop.
+
+    The below examples should make clear the above description:
+
+    Line 1 shows the single RUN instruction with index 0. As the size field
+    is 0, this RUN instruction always has its actual size encoded separately.
+
+    Line 2 shows the 18 single ADD instructions. The ADD instruction with
+    size field 0 (i.e., the actual size is coded separately) has index 1.
+    ADD instructions with sizes from 1 to 17 use code indices 2 to 18 and
+    their sizes are as given (so they will not be separately encoded.)
+
+    Following the single ADD instructions are the single COPY instructions
+    ordered by their address encoding modes. For example, line 11 shows the
+    COPY instructions with mode 8, i.e., the last of the same cache.
+    In this case, the COPY instruction with size field 0 has index 147.
+    Again, the actual size of this instruction will be coded separately.
+
+    Lines 12 to 21 show the pairs of instructions that are combined together.
+    For example, line 12 depicts the 12 entries in which an ADD instruction
+    is combined with an immediately following COPY instruction. The entries
+    with indices 163, 164, 165 represent the pairs in which the ADD
+    instructions all have size 1 while the COPY instructions has mode
+    0 (VCD_SELF) and sizes 4, 5 and 6 respectively.
+
+    The last line, line 21, shows the eight instruction pairs where the first
+    instruction is a COPY and the second is an ADD. In this case, all COPY
+    instructions have size 4 with mode ranging from 0 to 8 and all the ADD
+    instructions have size 1. Thus, the entry with largest index 255
+    combines a COPY instruction of size 4 and mode 8 with an ADD instruction
+    of size 1.
+
+    The choice of the minimum size 4 for COPY instructions in the default code
+    table was made from experiments that showed that excluding small matches
+    (less then 4 bytes long) improved the compression rates.
+
+
+6. DECODING A TARGET WINDOW
+
+    Section 4.3 discusses that the delta instructions and associated data
+    are encoded in three arrays of bytes:
+
+        Data section for ADDs and RUNs,
+        Instructions and sizes section, and
+        Addresses section for COPYs.
+
+
+    Further, these data sections may have been further compressed by some
+    secondary compressor. Assuming that any such compressed data has been
+    decompressed so that we now have three arrays:
+
+	inst: bytes coding the instructions and sizes.
+        data: unmatched data associated with ADDs and RUNs.
+	addr: bytes coding the addresses of COPYs.
+
+    These arrays are organized as follows:
+
+	inst:
+	    a sequence of (index, [size1], [size2]) tuples, where "index"
+            is an index into the instruction code table, and size1 and size2
+            are integers that MAY or MAY NOT be included in the tuple as
+            follows. The entry with the given "index" in the instruction
+            code table potentially defines two delta instructions. If the
+            first delta instruction is not a VCD_NOOP and its size is zero,
+            then size1 MUST be present. Otherwise, size1 MUST be omitted and
+            the size of the instruction (if it is not VCD_NOOP) is as defined
+            in the table. The presence or absence of size2 is defined
+            similarly with respect to the second delta instruction.
+
+	data:
+	    a sequence of data values, encoded as bytes.
+
+	addr:
+	    a sequence of address values. Addresses are normally encoded as
+            integers as described in Section 2 (i.e., base 128).
+	    Since the same cache emits addresses in the range [0,255],
+	    however, same cache addresses are always encoded as a
+	    single byte.
+
+    To summarize, each tuple in the "inst" array includes an index to some
+    entry in the instruction code table that determines:
+
+    a. Whether one or two instructions were encoded and their types.
+
+    b. If the instructions have their sizes encoded separately, these
+       sizes will follow, in order, in the tuple.
+
+    c. If the instructions have accompanying data, i.e., ADDs or RUNs,
+       their data will be in the array "data".
+
+    d. Similarly, if the instructions are COPYs, the coded addresses are
+       found in the array "addr".
+
+    The decoding procedure simply processes the arrays by reading one code
+    index at a time, looking up the corresponding instruction code entry,
+    then consuming the respective sizes, data and addresses following the
+    directions in this entry. In other words, the decoder maintains an implicit
+    next-element pointer for each array; "consuming" an instruction tuple,
+    data, or address value implies incrementing the associated pointer.
+
+    For example, if during the processing of the target window, the next
+    unconsumed tuple in the inst array has index value 19, then the first
+    instruction is a COPY, whose size is found as the immediately following
+    integer in the inst array.  Since the mode of this COPY instruction is
+    VCD_SELF, the corresponding address is found by consuming the next
+    integer in the addr array.  The data array is left intact. As the second
+    instruction for code index 19 is a NOOP, this tuple is finished.
+
+
+7. APPLICATION-DEFINED CODE TABLES
+
+    Although the default code table used in Vcdiff is good for general
+    purpose encoders, there are times when other code tables may perform
+    better. For example, to code a file with many identical segments of data,
+    it may be advantageous to have a COPY instruction with the specific size
+    of these data segments so that the instruction can be encoded in a single
+    byte. Such a special code table MUST then be encoded in the delta file
+    so that the decoder can reconstruct it before decoding the data.
+
+    Vcdiff allows an application-defined code table to be specified
+    in a delta file with the following data:
+
+	Size of near cache            - byte
+	Size of same cache            - byte
+	Compressed code table data
+
+    The "compressed code table data" encodes the delta between the default
+    code table (source) and the new code table (target) in the same manner as
+    described in Section 4.3 for encoding a target window in terms of a
+    source window. This delta is computed using the following steps:
+
+    a.  Convert the new instruction code table into a string, "code", of
+	1536 bytes using the below steps in order:
+
+        i. Add in order the 256 bytes representing the types of the first
+	   instructions in the instruction pairs.
+       ii. Add in order the 256 bytes representing the types of the second
+	   instructions in the instruction pairs.
+      iii. Add in order the 256 bytes representing the sizes of the first
+	   instructions in the instruction pairs.
+       iv. Add in order the 256 bytes representing the sizes of the second
+	   instructions in the instruction pairs.
+        v. Add in order the 256 bytes representing the modes of the first
+	   instructions in the instruction pairs.
+       vi. Add in order the 256 bytes representing the modes of the second
+	   instructions in the instruction pairs.
+
+    b.  Similarly, convert the default instruction code table into
+	a string "dflt".
+
+    c.  Treat the string "code" as a target window and "dflt" as the
+	corresponding source data and apply an encoding algorithm to
+	compute the delta encoding of "code" in terms of "dflt".
+	This computation MUST use the default code table for encoding
+	the delta instructions.
+
+    The decoder can then reverse the above steps to decode the compressed
+    table data using the method of Section 6, employing the default code
+    table, to generate the new code table.  Note that the decoder does not
+    need to know anything about the details of the encoding algorithm used
+    in step (c). The decoder is still able to decode the new code table
+    because the Vcdiff format is independent from the choice of encoding
+    algorithm, and because the encoder in step (c) uses the known, default
+    code table.
+
+
+8. PERFORMANCE
+
+    The encoding format is compact. For compression only, using the LZ-77
+    string parsing strategy and without any secondary compressors, the typical
+    compression rate is better than Unix compress and close to gzip.  For
+    differencing, the data format is better than all known methods in
+    terms of its stated goal, which is primarily decoding speed and
+    encoding efficiency.
+
+    We compare the performance of compress, gzip and Vcdiff using the
+    archives of three versions of the Gnu C compiler, gcc-2.95.1.tar,
+    gcc-2.95.2.tar and gcc-2.95.3.tar. The experiments were done on an
+    SGI-MIPS3, 400MHZ. Gzip was used at its default compression level.
+    Vcdiff timings were done using the Vcodex/Vcdiff software (Section 13).
+    As string and window matching typically dominates the computation during
+    compression, the Vcdiff compression times were directly due to the
+    algorithms used in the Vcodex/Vcdiff software. However, the decompression
+    times should be generic and representative of any good implementation
+    of the Vcdiff data format. Timing was done by running each program
+    three times and taking the average of the total cpu+system times.
+
+    Below are the different Vcdiff runs:
+
+	Vcdiff: vcdiff is used as compressor only.
+
+	Vcdiff-d: vcdiff is used as a differencer only. That is, it only
+		compares target data against source data.  Since the files
+		involved are large, they are broken into windows. In this
+		case, each target window starting at some file offset in
+		the target file is compared against a source window with
+		the same file offset (in the source file). The source
+		window is also slightly larger than the target window
+		to increase matching opportunities. The -d option also gives
+		a hint to the string matching algorithm of Vcdiff that
+		the two files are very similar with long stretches of matches.
+		The algorithm takes advantage of this to minimize its
+		processing of source data and save time.
+
+	Vcdiff-dc: This is similar to Vcdiff-d but vcdiff can also compare
+		target data against target data as applicable. Thus, vcdiff
+		both computes differences and compresses data. The windowing
+		algorithm is the same as above. However, the above hint is
+		recinded in this case.
+
+	Vcdiff-dcs: This is similar to Vcdiff-dc but the windowing algorithm
+		uses a content-based heuristic to select source data segments
+		that are more likely to match with a given target window.
+		Thus, the source data segment selected for a target window
+		often will not be aligned with the file offsets of this
+		target window.
+
+
+                gcc-2.95.1    gcc-2.95.2    compression   decompression
+    raw size      55746560      55797760
+    compress         -          19939390       13.85s	      7.09s
+    gzip             -          12973443       42.99s         5.35s
+    Vcdiff           -          15358786       20.04s         4.65s
+    Vcdiff-d         -            100971       10.93s         1.92s
+    Vcdiff-dc        -             97246       20.03s         1.84s
+    Vcdiff-dcs       -            256445       44.81s         1.84s
+
+		TABLE 1. Compressing gcc-2.95.2.tar given gcc-2.95.1
+
+
+    TABLE 1 shows the raw sizes of gcc-2.95.1.tar and gcc-2.95.2.tar and the
+    sizes of the compressed results. As a pure compressor, the compression
+    rate for Vcdiff is worse than gzip and better than compress. The last
+    three rows shows that when two file versions are very similar, differencing
+    can have dramatically good compression rates. Vcdiff-d and Vcdiff-dc use
+    the same simple window selection method but Vcdiff-dc also does compression
+    so its output is slightly smaller. Vcdiff-dcs uses a heuristic based on
+    data content to search for source data that likely will match a given target
+    window. Although it does a good job, the heuristic did not always find the
+    best matches which are given by the simple algorithm of Vcdiff-d.  As a
+    result, the output size is slightly larger. Note also that there is a large
+    cost in computing matching windows this way. Finally, the compression times
+    of Vcdiff-d is nearly half of that of Vcdiff-dc. It is tempting to conclude
+    that the compression feature causes the additional time in Vcdiff-dc
+    relative to Vcdiff-d.  However, this is not the case. The hint given to
+    the Vcdiff string matching algorithm that the two files are likely to
+    have very long stretches of matches helps the algorithm to minimize
+    processing of the "source data", thus saving half the time. However, as we
+    shall see below when this hint is wrong, the result is even longer time.
+
+
+                gcc-2.95.2    gcc-2.95.3    compression   decompression
+    raw size      55797760      55787520
+    compress         -          19939453       13.54s	      7.00s
+    gzip             -          12998097       42.63s         5.62s
+    Vcdiff           -          15371737       20.09s         4.74s
+    Vcdiff-d         -          26383849       71.41s         6.41s
+    Vcdiff-dc        -          14461203       42.48s         4.82s
+    Vcdiff-dcs       -           1248543       61.18s         1.99s
+
+		TABLE 2. Compressing gcc-2.95.3.tar given gcc-2.95.2
+
+
+    TABLE 2 shows the raw sizes of gcc-2.95.2.tar and gcc-2.95.3.tar and
+    the sizes of the compressed results. In this case, the tar file of
+    gcc-2.95.3 is rearranged in a way that makes the straightforward method
+    of matching file offsets for source and target windows fail. As a
+    result, Vcdiff-d performs rather dismally both in time and output size.
+    The large time for Vcdiff-d is directly due to fact that the string
+    matching algorithm has to work much harder to find matches when the hint
+    that two files have long matching stretches fails to hold. On the other
+    hand, Vcdiff-dc does both differencing and compression resulting in good
+    output size. Finally, the window searching heuristic used in Vcdiff-dcs is
+    effective in finding the right matching source windows for target windows
+    resulting a small output size. This shows why the data format needs to
+    have a way to specify matching windows to gain performance. Finally,
+    we note that the decoding times are always good regardless of how
+    the string matching or window searching algorithms perform.
+
+
+9. FURTHER ISSUES
+
+    This document does not address a few issues:
+
+    Secondary compressors:
+        As discussed in Section 4.3, certain sections in the delta encoding
+	of a window may be further compressed by a secondary compressor.
+	In our experience, the basic Vcdiff format is adequate for most
+	purposes so that secondary compressors are seldom needed. In
+        particular, for normal use of data differencing where the files to
+	be compared have long stretches of matches, much of the gain in
+	compression rate is already achieved by normal string matching.
+	Thus, the use of secondary compressors is seldom needed in this case.
+	However, for applications beyond differencing of such nearly identical
+	files, secondary compressors may be needed to achieve maximal
+	compressed results.
+
+        Therefore, we recommend to leave the Vcdiff data format defined
+	as in this document so that the use of secondary compressors
+ 	can be implemented when they become needed in the future.
+        The formats of the compressed data via such compressors or any
+	compressors that may be defined in the future are left open to
+	their implementations.  These could include Huffman encoding,
+	arithmetic encoding, and splay tree encoding [8,9].
+
+    Large file system vs. small file system:
+	As discussed in Section 4, a target window in a large file may be
+	compared against some source window in another file or in the same
+	file (from some earlier part). In that case, the file offset of the
+	source window is specified as a variable-sized integer in the delta
+	encoding. There is a possibility that the encoding was computed on
+	a system supporting much larger files than in a system where
+	the data may be decoded (e.g., 64-bit file systems vs. 32-bit file
+	systems). In that case, some target data may not be recoverable.
+	This problem could afflict any compression format, and ought
+	to be resolved with a generic negotiation mechanism in the
+	appropriate protocol(s).
+
+
+10.  SUMMARY
+
+    We have described Vcdiff, a general and portable encoding format for
+    compression and differencing. The format is good in that it allows
+    implementing a decoder without knowledge of the encoders. Further,
+    ignoring the use of secondary compressors not defined within the format,
+    the decoding algorithms runs in linear time and requires working space
+    proportional to window sizes.
+
+
+
+11. ACKNOWLEDGEMENTS
+
+    Thanks are due to Balachander Krishnamurthy, Jeff Mogul and Arthur Van Hoff
+    who provided much encouragement to publicize Vcdiff. In particular, Jeff
+    helped clarifying the description of the data format presented here.
+
+
+
+12. SECURITY CONSIDERATIONS
+
+    Vcdiff only provides a format to encode compressed and differenced data.
+    It does not address any issues concerning how such data are, in fact,
+    stored in a given file system or the run-time memory of a computer system.
+    Therefore, we do not anticipate any security issues with respect to Vcdiff.
+
+
+
+13. SOURCE CODE AVAILABILITY
+
+    Vcdiff is implemented as a data transforming method in Phong Vo's
+    Vcodex library. AT&T Corp. has made the source code for Vcodex available
+    for anyone to use to transmit data via HTTP/1.1 Delta Encoding [10,11].
+    The source code and according license is accessible at the below URL:
+
+          http://www.research.att.com/sw/tools
+
+
+14. INTELLECTUAL PROPERTY RIGHTS
+
+   The IETF has been notified of intellectual property rights claimed in
+   regard to some or all of the specification contained in this
+   document.  For more information consult the online list of claimed
+   rights, at <http://www.ietf.org/ipr.html>.
+
+   The IETF takes no position regarding the validity or scope of any
+   intellectual property or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; neither does it represent that it
+   has made any effort to identify any such rights.  Information on the
+   IETF's procedures with respect to rights in standards-track and
+   standards-related documentation can be found in BCP-11.  Copies of
+   claims of rights made available for publication and any assurances of
+   licenses to be made available, or the result of an attempt made to
+   obtain a general license or permission for the use of such
+   proprietary rights by implementors or users of this specification can
+   be obtained from the IETF Secretariat.
+
+
+
+15. IANA CONSIDERATIONS
+
+   The Internet Assigned Numbers Authority (IANA) administers the number
+   space for Secondary Compressor ID values.  Values and their meaning
+   must be documented in an RFC or other peer-reviewed, permanent, and
+   readily available reference, in sufficient detail so that
+   interoperability between independent implementations is possible.
+   Subject to these constraints, name assignments are First Come, First
+   Served - see RFC2434 [13].  Legal ID values are in the range 1..255.
+
+   This document does not define any values in this number space.
+
+
+16. REFERENCES
+
+    [1] D.G. Korn and K.P. Vo, Vdelta: Differencing and Compression,
+        Practical Reusable Unix Software, Editor B. Krishnamurthy,
+        John Wiley & Sons, Inc., 1995.
+
+    [2] J. Ziv and A. Lempel, A Universal Algorithm for Sequential Data
+        Compression, IEEE Trans. on Information Theory, 23(3):337-343, 1977.
+
+    [3] W. Tichy, The String-to-String Correction Problem with Block Moves,
+        ACM Transactions on Computer Systems, 2(4):309-321, November 1984.
+
+    [4] E.M. McCreight, A Space-Economical Suffix Tree Construction
+        Algorithm, Journal of the ACM, 23:262-272, 1976.
+
+    [5] J.J. Hunt, K.P. Vo, W. Tichy, An Empirical Study of Delta Algorithms,
+        IEEE Software Configuration and Maintenance Workshop, 1996.
+
+    [6] J.J. Hunt, K.P. Vo, W. Tichy, Delta Algorithms: An Empirical Analysis,
+        ACM Trans. on Software Engineering and Methodology, 7:192-214, 1998.
+
+    [7] D.G. Korn, K.P. Vo, Sfio: A buffered I/O Library,
+        Proc. of the Summer '91 Usenix Conference, 1991.
+
+    [8] D. W. Jones, Application of Splay Trees to Data Compression,
+        CACM, 31(8):996:1007.
+
+    [9] M. Nelson, J. Gailly, The Data Compression Book, ISBN 1-55851-434-1,
+        M&T Books, New York, NY, 1995.
+
+   [10] J.C. Mogul, F. Douglis, A. Feldmann, and B. Krishnamurthy,
+        Potential benefits of delta encoding and data compression for HTTP,
+        SIGCOMM '97, Cannes, France, 1997.
+
+   [11] J.C. Mogul, B. Krishnamurthy, F. Douglis, A. Feldmann,
+        Y. Goland, and A. Van Hoff, Delta Encoding in HTTP,
+        IETF, draft-mogul-http-delta-10, 2001.
+
+   [12] S. Bradner, Key words for use in RFCs to Indicate Requirement Levels,
+        RFC 2119, March 1997.
+
+   [13] T. Narten, H. Alvestrand, Guidelines for Writing an IANA
+        Considerations Section in RFCs, RFC2434, October 1998.
+
+
+
+17. AUTHOR'S ADDRESS
+
+    Kiem-Phong Vo (main contact)
+    AT&T Labs, Room D223
+    180 Park Avenue
+    Florham Park, NJ 07932
+    Email: kpv@research.att.com
+    Phone: 1 973 360 8630
+
+    David G. Korn
+    AT&T Labs, Room D237
+    180 Park Avenue
+    Florham Park, NJ 07932
+    Email: dgk@research.att.com
+    Phone: 1 973 360 8602
+
+    Jeffrey C. Mogul
+    Western Research Laboratory
+    Compaq Computer Corporation
+    250 University Avenue
+    Palo Alto, California, 94305, U.S.A.
+    Email: JeffMogul@acm.org
+    Phone: 1 650 617 3304 (email preferred)
+
+    Joshua P. MacDonald
+    Computer Science Division
+    University of California, Berkeley
+    345 Soda Hall
+    Berkeley, CA 94720
+    Email: jmacd@cs.berkeley.edu
diff --git a/third-party/xdelta3/xdelta3/examples/README.md b/third-party/xdelta3/xdelta3/examples/README.md
new file mode 100644
index 0000000000..ebaf522834
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/README.md
@@ -0,0 +1,8 @@
+Files in this directory demonstrate how to use the Xdelta3 API.  Copyrights
+are held by the respective authors.
+
+small_page_test.c -- how to use xdelta3 in an environment such as the kernel
+for small pages with little memory
+
+encode_decode_test.c -- how to use xdelta3 to process (encode/decode) data in
+multiple windows with the non-blocking API
diff --git a/third-party/xdelta3/xdelta3/examples/compare_test.c b/third-party/xdelta3/xdelta3/examples/compare_test.c
new file mode 100644
index 0000000000..24f1cb8e4c
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/compare_test.c
@@ -0,0 +1,138 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include <assert.h>
+
+#include "xdelta3.h"
+
+#define NUM (1<<20)
+#define ITERS 100
+
+/* From wikipedia on RDTSC */
+inline uint64_t rdtsc() {
+  uint32_t lo, hi;
+  asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
+  return (uint64_t)hi << 32 | lo;
+}
+
+typedef int (*test_func)(const char *s1, const char *s2, int n);
+
+void run_test(const char *buf1, const char *buf2,
+	      const char *name, test_func func) {
+  uint64_t start, end;
+  uint64_t accum = 0;
+  int i, x;
+
+  for (i = 0; i < ITERS; i++) {
+    start = rdtsc();
+    x = func(buf1, buf2, NUM);
+    end = rdtsc();
+    accum += end - start;
+    assert(x == NUM - 1);
+  }
+
+  accum /= ITERS;
+
+  printf("%s : %qu cycles\n", name, accum);
+}
+
+/* Build w/ -fno-builtin for this to be fast, this assumes that there
+ * is a difference at s1[n-1] */
+int memcmp_fake(const char *s1, const char *s2, int n) {
+  int x = memcmp(s1, s2, n);
+  return x < 0 ? n - 1 : n + 1;
+}
+
+#define UNALIGNED_OK 1
+static inline int
+test2(const char *s1c, const char *s2c, int n)
+{
+  int i = 0;
+#if UNALIGNED_OK
+  int nint = n / sizeof(int);
+
+  if (nint >> 3)
+    {
+      int j = 0;
+      const int *s1 = (const int*)s1c;
+      const int *s2 = (const int*)s2c;
+      int nint_8 = nint - 8;
+
+      while (i <= nint_8 &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&	 
+	     s1[i++] == s2[j++]) { }
+
+      i = (i - 1) * sizeof(int);
+    }
+#endif
+
+  while (i < n && s1c[i] == s2c[i])
+    {
+      i++;
+    }
+  return i;
+}
+
+static inline int
+test1(const char *s1c, const char *s2c, int n) {
+  int i = 0;
+  while (i < n && s1c[i] == s2c[i])
+    {
+      i++;
+    }
+  return i;
+}
+
+int main(/*int argc, char **argv*/) {
+  char *buf1 = malloc(NUM+1);
+  char *buf2 = malloc(NUM+1);
+  int i;
+
+  for (i = 0; i < NUM; i++) {
+    buf1[i] = buf2[i] = rand();
+  }
+
+  buf2[NUM-1]++;
+
+  printf ("ALIGNED\n");
+
+  run_test(buf1, buf2, "memcmp", &memcmp_fake);
+  run_test(buf1, buf2, "test1", &test1);
+  run_test(buf1, buf2, "test2", &test2);
+
+  for (i = 0; i < NUM; i++) {
+    buf1[i] = buf2[i+1] = rand();
+  }
+
+  buf2[NUM]++;
+
+  printf ("UNALIGNED\n");
+
+  run_test(buf1, buf2+1, "memcmp", &memcmp_fake);
+  run_test(buf1, buf2+1, "test1", &test1);
+  run_test(buf1, buf2+1, "test2", &test2);
+
+  return 0;
+}
diff --git a/third-party/xdelta3/xdelta3/examples/encode_decode_test.c b/third-party/xdelta3/xdelta3/examples/encode_decode_test.c
new file mode 100644
index 0000000000..dc4fefaa74
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/encode_decode_test.c
@@ -0,0 +1,203 @@
+// Permission to distribute this example by
+// Copyright (C) 2007 Ralf Junker
+// Ralf Junker <delphi@yunqa.de>
+// http://www.yunqa.de/delphi/
+
+//---------------------------------------------------------------------------
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include "xdelta3.h"
+#include "xdelta3.c"
+
+//---------------------------------------------------------------------------
+
+int code (
+  int encode,
+  FILE*  InFile,
+  FILE*  SrcFile ,
+  FILE* OutFile,
+  int BufSize )
+{
+  int r, ret;
+  struct stat statbuf;
+  xd3_stream stream;
+  xd3_config config;
+  xd3_source source;
+  void* Input_Buf;
+  int Input_Buf_Read;
+
+  if (BufSize < XD3_ALLOCSIZE)
+    BufSize = XD3_ALLOCSIZE;
+
+  memset (&stream, 0, sizeof (stream));
+  memset (&source, 0, sizeof (source));
+
+  xd3_init_config(&config, XD3_ADLER32);
+  config.winsize = BufSize;
+  xd3_config_stream(&stream, &config);
+
+  if (SrcFile)
+  {
+    r = fstat(fileno(SrcFile), &statbuf);
+    if (r)
+      return r;
+
+    source.blksize = BufSize;
+    source.curblk = malloc(source.blksize);
+
+    /* Load 1st block of stream. */
+    r = fseek(SrcFile, 0, SEEK_SET);
+    if (r)
+      return r;
+    source.onblk = fread((void*)source.curblk, 1, source.blksize, SrcFile);
+    source.curblkno = 0;
+    /* Set the stream. */
+    xd3_set_source(&stream, &source);
+  }
+
+  Input_Buf = malloc(BufSize);
+
+  fseek(InFile, 0, SEEK_SET);
+  do
+  {
+    Input_Buf_Read = fread(Input_Buf, 1, BufSize, InFile);
+    if (Input_Buf_Read < BufSize)
+    {
+      xd3_set_flags(&stream, XD3_FLUSH | stream.flags);
+    }
+    xd3_avail_input(&stream, Input_Buf, Input_Buf_Read);
+
+process:
+    if (encode)
+      ret = xd3_encode_input(&stream);
+    else
+      ret = xd3_decode_input(&stream);
+
+    switch (ret)
+    {
+    case XD3_INPUT:
+      {
+        fprintf (stderr,"XD3_INPUT\n");
+        continue;
+      }
+
+    case XD3_OUTPUT:
+      {
+        fprintf (stderr,"XD3_OUTPUT\n");
+        r = fwrite(stream.next_out, 1, stream.avail_out, OutFile);
+        if (r != (int)stream.avail_out)
+          return r;
+	xd3_consume_output(&stream);
+        goto process;
+      }
+
+    case XD3_GETSRCBLK:
+      {
+        fprintf (stderr,"XD3_GETSRCBLK %qd\n", source.getblkno);
+        if (SrcFile)
+        {
+          r = fseek(SrcFile, source.blksize * source.getblkno, SEEK_SET);
+          if (r)
+            return r;
+          source.onblk = fread((void*)source.curblk, 1,
+			       source.blksize, SrcFile);
+          source.curblkno = source.getblkno;
+        }
+        goto process;
+      }
+
+    case XD3_GOTHEADER:
+      {
+        fprintf (stderr,"XD3_GOTHEADER\n");
+        goto process;
+      }
+
+    case XD3_WINSTART:
+      {
+        fprintf (stderr,"XD3_WINSTART\n");
+        goto process;
+      }
+
+    case XD3_WINFINISH:
+      {
+        fprintf (stderr,"XD3_WINFINISH\n");
+        goto process;
+      }
+
+    default:
+      {
+        fprintf (stderr,"!!! INVALID %s %d !!!\n",
+		stream.msg, ret);
+        return ret;
+      }
+
+    }
+
+  }
+  while (Input_Buf_Read == BufSize);
+
+  free(Input_Buf);
+
+  free((void*)source.curblk);
+  xd3_close_stream(&stream);
+  xd3_free_stream(&stream);
+
+  return 0;
+
+};
+
+
+int main(int argc, char* argv[])
+{
+  FILE*  InFile;
+  FILE*  SrcFile;
+  FILE* OutFile;
+  int r;
+
+  if (argc != 3) {
+    fprintf (stderr, "usage: %s source input\n", argv[0]);
+    return 1;
+  }
+
+  char *input = argv[2];
+  char *source = argv[1];
+  const char *output = "encoded.testdata";
+  const char *decoded = "decoded.testdata";
+
+  /* Encode */
+
+  InFile = fopen(input, "rb");
+  SrcFile = fopen(source, "rb");
+  OutFile = fopen(output, "wb");
+
+  r = code (1, InFile, SrcFile, OutFile, 0x1000);
+
+  fclose(OutFile);
+  fclose(SrcFile);
+  fclose(InFile);
+
+  if (r) {
+    fprintf (stderr, "Encode error: %d\n", r);
+    return r;
+  }
+
+  /* Decode */
+
+  InFile = fopen(output, "rb");
+  SrcFile = fopen(source, "rb");
+  OutFile = fopen(decoded, "wb");
+
+  r = code (0, InFile, SrcFile, OutFile, 0x1000);
+
+  fclose(OutFile);
+  fclose(SrcFile);
+  fclose(InFile);
+
+  if (r) {
+    fprintf (stderr, "Decode error: %d\n", r);
+    return r;
+  }
+
+  return 0;
+}
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj
new file mode 100644
index 0000000000..d50d9f755b
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test.xcodeproj/project.pbxproj
@@ -0,0 +1,389 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		B9001B65158D008900B9E855 /* xdelta3.c in Sources */ = {isa = PBXBuildFile; fileRef = B9001B63158D008900B9E855 /* xdelta3.c */; };
+		B9313C3C158D11BA001C1F28 /* file_v1_to_v2.bin in Resources */ = {isa = PBXBuildFile; fileRef = B9313C39158D11BA001C1F28 /* file_v1_to_v2.bin */; };
+		B9313C3D158D11BA001C1F28 /* file_v1.bin in Resources */ = {isa = PBXBuildFile; fileRef = B9313C3A158D11BA001C1F28 /* file_v1.bin */; };
+		B9313C3E158D11BA001C1F28 /* file_v2.bin in Resources */ = {isa = PBXBuildFile; fileRef = B9313C3B158D11BA001C1F28 /* file_v2.bin */; };
+		B9ADC6BF158CFD36007EF999 /* UIKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B9ADC6BE158CFD36007EF999 /* UIKit.framework */; };
+		B9ADC6C1158CFD36007EF999 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B9ADC6C0158CFD36007EF999 /* Foundation.framework */; };
+		B9ADC6C3158CFD36007EF999 /* CoreGraphics.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = B9ADC6C2158CFD36007EF999 /* CoreGraphics.framework */; };
+		B9ADC6C9158CFD36007EF999 /* InfoPlist.strings in Resources */ = {isa = PBXBuildFile; fileRef = B9ADC6C7158CFD36007EF999 /* InfoPlist.strings */; };
+		B9ADC6CB158CFD36007EF999 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = B9ADC6CA158CFD36007EF999 /* main.m */; };
+		B9ADC6CF158CFD36007EF999 /* Xd3iOSAppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = B9ADC6CE158CFD36007EF999 /* Xd3iOSAppDelegate.m */; };
+		B9ADC6D2158CFD36007EF999 /* MainStoryboard_iPhone.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = B9ADC6D0158CFD36007EF999 /* MainStoryboard_iPhone.storyboard */; };
+		B9ADC6D5158CFD36007EF999 /* MainStoryboard_iPad.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = B9ADC6D3158CFD36007EF999 /* MainStoryboard_iPad.storyboard */; };
+		B9ADC6D8158CFD36007EF999 /* Xd3iOSViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = B9ADC6D7158CFD36007EF999 /* Xd3iOSViewController.m */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		B9001B56158D008900B9E855 /* xdelta3-blkcache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-blkcache.h"; path = "../../../../xdelta3-blkcache.h"; sourceTree = "<group>"; };
+		B9001B57158D008900B9E855 /* xdelta3-cfgs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-cfgs.h"; path = "../../../../xdelta3-cfgs.h"; sourceTree = "<group>"; };
+		B9001B58158D008900B9E855 /* xdelta3-decode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-decode.h"; path = "../../../../xdelta3-decode.h"; sourceTree = "<group>"; };
+		B9001B59158D008900B9E855 /* xdelta3-djw.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-djw.h"; path = "../../../../xdelta3-djw.h"; sourceTree = "<group>"; };
+		B9001B5A158D008900B9E855 /* xdelta3-fgk.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-fgk.h"; path = "../../../../xdelta3-fgk.h"; sourceTree = "<group>"; };
+		B9001B5B158D008900B9E855 /* xdelta3-hash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-hash.h"; path = "../../../../xdelta3-hash.h"; sourceTree = "<group>"; };
+		B9001B5C158D008900B9E855 /* xdelta3-internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-internal.h"; path = "../../../../xdelta3-internal.h"; sourceTree = "<group>"; };
+		B9001B5D158D008900B9E855 /* xdelta3-list.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-list.h"; path = "../../../../xdelta3-list.h"; sourceTree = "<group>"; };
+		B9001B5E158D008900B9E855 /* xdelta3-main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-main.h"; path = "../../../../xdelta3-main.h"; sourceTree = "<group>"; };
+		B9001B5F158D008900B9E855 /* xdelta3-merge.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-merge.h"; path = "../../../../xdelta3-merge.h"; sourceTree = "<group>"; };
+		B9001B60158D008900B9E855 /* xdelta3-python.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-python.h"; path = "../../../../xdelta3-python.h"; sourceTree = "<group>"; };
+		B9001B61158D008900B9E855 /* xdelta3-second.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-second.h"; path = "../../../../xdelta3-second.h"; sourceTree = "<group>"; };
+		B9001B62158D008900B9E855 /* xdelta3-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "xdelta3-test.h"; path = "../../../../xdelta3-test.h"; sourceTree = "<group>"; };
+		B9001B63158D008900B9E855 /* xdelta3.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = xdelta3.c; path = ../../../../xdelta3.c; sourceTree = "<group>"; };
+		B9001B64158D008900B9E855 /* xdelta3.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = xdelta3.h; path = ../../../../xdelta3.h; sourceTree = "<group>"; };
+		B9313C39158D11BA001C1F28 /* file_v1_to_v2.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; path = file_v1_to_v2.bin; sourceTree = "<group>"; };
+		B9313C3A158D11BA001C1F28 /* file_v1.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; path = file_v1.bin; sourceTree = "<group>"; };
+		B9313C3B158D11BA001C1F28 /* file_v2.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; path = file_v2.bin; sourceTree = "<group>"; };
+		B9ADC6BA158CFD36007EF999 /* xdelta3-ios-test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "xdelta3-ios-test.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+		B9ADC6BE158CFD36007EF999 /* UIKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = UIKit.framework; path = System/Library/Frameworks/UIKit.framework; sourceTree = SDKROOT; };
+		B9ADC6C0158CFD36007EF999 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; };
+		B9ADC6C2158CFD36007EF999 /* CoreGraphics.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreGraphics.framework; path = System/Library/Frameworks/CoreGraphics.framework; sourceTree = SDKROOT; };
+		B9ADC6C6158CFD36007EF999 /* xdelta3-ios-test-Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = "xdelta3-ios-test-Info.plist"; sourceTree = "<group>"; };
+		B9ADC6C8158CFD36007EF999 /* en */ = {isa = PBXFileReference; lastKnownFileType = text.plist.strings; name = en; path = en.lproj/InfoPlist.strings; sourceTree = "<group>"; };
+		B9ADC6CA158CFD36007EF999 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
+		B9ADC6CC158CFD36007EF999 /* xdelta3-ios-test-Prefix.pch */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "xdelta3-ios-test-Prefix.pch"; sourceTree = "<group>"; };
+		B9ADC6CD158CFD36007EF999 /* Xd3iOSAppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Xd3iOSAppDelegate.h; sourceTree = "<group>"; };
+		B9ADC6CE158CFD36007EF999 /* Xd3iOSAppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = Xd3iOSAppDelegate.m; sourceTree = "<group>"; };
+		B9ADC6D1158CFD36007EF999 /* en */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = en; path = en.lproj/MainStoryboard_iPhone.storyboard; sourceTree = "<group>"; };
+		B9ADC6D4158CFD36007EF999 /* en */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = en; path = en.lproj/MainStoryboard_iPad.storyboard; sourceTree = "<group>"; };
+		B9ADC6D6158CFD36007EF999 /* Xd3iOSViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = Xd3iOSViewController.h; sourceTree = "<group>"; };
+		B9ADC6D7158CFD36007EF999 /* Xd3iOSViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = Xd3iOSViewController.m; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		B9ADC6B7158CFD36007EF999 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				B9ADC6BF158CFD36007EF999 /* UIKit.framework in Frameworks */,
+				B9ADC6C1158CFD36007EF999 /* Foundation.framework in Frameworks */,
+				B9ADC6C3158CFD36007EF999 /* CoreGraphics.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		B9ADC6AF158CFD36007EF999 = {
+			isa = PBXGroup;
+			children = (
+				B9ADC6C4158CFD36007EF999 /* xdelta3-ios-test */,
+				B9ADC6BD158CFD36007EF999 /* Frameworks */,
+				B9ADC6BB158CFD36007EF999 /* Products */,
+			);
+			sourceTree = "<group>";
+		};
+		B9ADC6BB158CFD36007EF999 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				B9ADC6BA158CFD36007EF999 /* xdelta3-ios-test.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		B9ADC6BD158CFD36007EF999 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				B9ADC6BE158CFD36007EF999 /* UIKit.framework */,
+				B9ADC6C0158CFD36007EF999 /* Foundation.framework */,
+				B9ADC6C2158CFD36007EF999 /* CoreGraphics.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		B9ADC6C4158CFD36007EF999 /* xdelta3-ios-test */ = {
+			isa = PBXGroup;
+			children = (
+				B9001B56158D008900B9E855 /* xdelta3-blkcache.h */,
+				B9001B57158D008900B9E855 /* xdelta3-cfgs.h */,
+				B9001B58158D008900B9E855 /* xdelta3-decode.h */,
+				B9001B59158D008900B9E855 /* xdelta3-djw.h */,
+				B9001B5A158D008900B9E855 /* xdelta3-fgk.h */,
+				B9001B5B158D008900B9E855 /* xdelta3-hash.h */,
+				B9001B5C158D008900B9E855 /* xdelta3-internal.h */,
+				B9001B5D158D008900B9E855 /* xdelta3-list.h */,
+				B9001B5E158D008900B9E855 /* xdelta3-main.h */,
+				B9001B5F158D008900B9E855 /* xdelta3-merge.h */,
+				B9001B60158D008900B9E855 /* xdelta3-python.h */,
+				B9001B61158D008900B9E855 /* xdelta3-second.h */,
+				B9001B62158D008900B9E855 /* xdelta3-test.h */,
+				B9001B63158D008900B9E855 /* xdelta3.c */,
+				B9001B64158D008900B9E855 /* xdelta3.h */,
+				B9ADC6CD158CFD36007EF999 /* Xd3iOSAppDelegate.h */,
+				B9ADC6CE158CFD36007EF999 /* Xd3iOSAppDelegate.m */,
+				B9ADC6D0158CFD36007EF999 /* MainStoryboard_iPhone.storyboard */,
+				B9ADC6D3158CFD36007EF999 /* MainStoryboard_iPad.storyboard */,
+				B9ADC6D6158CFD36007EF999 /* Xd3iOSViewController.h */,
+				B9ADC6D7158CFD36007EF999 /* Xd3iOSViewController.m */,
+				B9ADC6C5158CFD36007EF999 /* Supporting Files */,
+			);
+			path = "xdelta3-ios-test";
+			sourceTree = "<group>";
+		};
+		B9ADC6C5158CFD36007EF999 /* Supporting Files */ = {
+			isa = PBXGroup;
+			children = (
+				B9313C39158D11BA001C1F28 /* file_v1_to_v2.bin */,
+				B9313C3A158D11BA001C1F28 /* file_v1.bin */,
+				B9313C3B158D11BA001C1F28 /* file_v2.bin */,
+				B9ADC6C6158CFD36007EF999 /* xdelta3-ios-test-Info.plist */,
+				B9ADC6C7158CFD36007EF999 /* InfoPlist.strings */,
+				B9ADC6CA158CFD36007EF999 /* main.m */,
+				B9ADC6CC158CFD36007EF999 /* xdelta3-ios-test-Prefix.pch */,
+			);
+			name = "Supporting Files";
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		B9ADC6B9158CFD36007EF999 /* xdelta3-ios-test */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = B9ADC6DB158CFD36007EF999 /* Build configuration list for PBXNativeTarget "xdelta3-ios-test" */;
+			buildPhases = (
+				B9ADC6B6158CFD36007EF999 /* Sources */,
+				B9ADC6B7158CFD36007EF999 /* Frameworks */,
+				B9ADC6B8158CFD36007EF999 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "xdelta3-ios-test";
+			productName = "xdelta3-ios-test";
+			productReference = B9ADC6BA158CFD36007EF999 /* xdelta3-ios-test.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		B9ADC6B1158CFD36007EF999 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 0420;
+			};
+			buildConfigurationList = B9ADC6B4158CFD36007EF999 /* Build configuration list for PBXProject "xdelta3-ios-test" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+			);
+			mainGroup = B9ADC6AF158CFD36007EF999;
+			productRefGroup = B9ADC6BB158CFD36007EF999 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				B9ADC6B9158CFD36007EF999 /* xdelta3-ios-test */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		B9ADC6B8158CFD36007EF999 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				B9ADC6C9158CFD36007EF999 /* InfoPlist.strings in Resources */,
+				B9ADC6D2158CFD36007EF999 /* MainStoryboard_iPhone.storyboard in Resources */,
+				B9ADC6D5158CFD36007EF999 /* MainStoryboard_iPad.storyboard in Resources */,
+				B9313C3C158D11BA001C1F28 /* file_v1_to_v2.bin in Resources */,
+				B9313C3D158D11BA001C1F28 /* file_v1.bin in Resources */,
+				B9313C3E158D11BA001C1F28 /* file_v2.bin in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		B9ADC6B6158CFD36007EF999 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				B9ADC6CB158CFD36007EF999 /* main.m in Sources */,
+				B9ADC6CF158CFD36007EF999 /* Xd3iOSAppDelegate.m in Sources */,
+				B9ADC6D8158CFD36007EF999 /* Xd3iOSViewController.m in Sources */,
+				B9001B65158D008900B9E855 /* xdelta3.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		B9ADC6C7158CFD36007EF999 /* InfoPlist.strings */ = {
+			isa = PBXVariantGroup;
+			children = (
+				B9ADC6C8158CFD36007EF999 /* en */,
+			);
+			name = InfoPlist.strings;
+			sourceTree = "<group>";
+		};
+		B9ADC6D0158CFD36007EF999 /* MainStoryboard_iPhone.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				B9ADC6D1158CFD36007EF999 /* en */,
+			);
+			name = MainStoryboard_iPhone.storyboard;
+			sourceTree = "<group>";
+		};
+		B9ADC6D3158CFD36007EF999 /* MainStoryboard_iPad.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				B9ADC6D4158CFD36007EF999 /* en */,
+			);
+			name = MainStoryboard_iPad.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		B9ADC6D9158CFD36007EF999 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_32_BIT)";
+				CLANG_ENABLE_OBJC_ARC = YES;
+				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_INPUT_FILETYPE = sourcecode.c.objc;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"XD3_USE_LARGEFILE64=0",
+					"XD3_POSIX=1",
+					"EXTERNAL_COMPRESSION=0",
+					"NOT_MAIN=1",
+					"XD3_MAIN=1",
+					"SECONDARY_DJW=1",
+					"XD3_DEBUG=1",
+					"REGRESSION_TEST=1",
+					"SHELL_TESTS=0",
+					"SECONDARY_FGK=1",
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 5.0;
+				OTHER_CFLAGS = (
+					"-DXD3_USE_LARGEFILE64=0",
+					"-DXD3_POSIX=1",
+					"-DEXTERNAL_COMPRESSION=0",
+					"-DNOT_MAIN=1",
+					"-DXD3_MAIN=1",
+					"-DSECONDARY_DJW=1",
+					"-DXD3_DEBUG=1",
+					"-DREGRESSION_TEST=1",
+					"-DSHELL_TESTS=0",
+					"-DSECONDARY_FGK=1",
+				);
+				SDKROOT = iphoneos;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		B9ADC6DA158CFD36007EF999 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				ARCHS = "$(ARCHS_STANDARD_32_BIT)";
+				CLANG_ENABLE_OBJC_ARC = YES;
+				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
+				COPY_PHASE_STRIP = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu99;
+				GCC_INPUT_FILETYPE = sourcecode.c.objc;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"XD3_USE_LARGEFILE64=0",
+					"XD3_POSIX=1",
+					"EXTERNAL_COMPRESSION=0",
+					"NOT_MAIN=1",
+					"XD3_MAIN=1",
+					"SECONDARY_DJW=1",
+					"XD3_DEBUG=1",
+					"REGRESSION_TEST=1",
+					"SHELL_TESTS=0",
+					"SECONDARY_FGK=1",
+				);
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 5.0;
+				OTHER_CFLAGS = (
+					"-DXD3_USE_LARGEFILE64=0",
+					"-DXD3_POSIX=1",
+					"-DEXTERNAL_COMPRESSION=0",
+					"-DNOT_MAIN=1",
+					"-DXD3_MAIN=1",
+					"-DSECONDARY_DJW=1",
+					"-DXD3_DEBUG=1",
+					"-DREGRESSION_TEST=1",
+					"-DSHELL_TESTS=0",
+					"-DSECONDARY_FGK=1",
+				);
+				SDKROOT = iphoneos;
+				TARGETED_DEVICE_FAMILY = "1,2";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		B9ADC6DC158CFD36007EF999 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_PRECOMPILE_PREFIX_HEADER = YES;
+				GCC_PREFIX_HEADER = "xdelta3-ios-test/xdelta3-ios-test-Prefix.pch";
+				INFOPLIST_FILE = "xdelta3-ios-test/xdelta3-ios-test-Info.plist";
+				OTHER_CFLAGS = "";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				WRAPPER_EXTENSION = app;
+			};
+			name = Debug;
+		};
+		B9ADC6DD158CFD36007EF999 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_PRECOMPILE_PREFIX_HEADER = YES;
+				GCC_PREFIX_HEADER = "xdelta3-ios-test/xdelta3-ios-test-Prefix.pch";
+				INFOPLIST_FILE = "xdelta3-ios-test/xdelta3-ios-test-Info.plist";
+				OTHER_CFLAGS = "";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				WRAPPER_EXTENSION = app;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		B9ADC6B4158CFD36007EF999 /* Build configuration list for PBXProject "xdelta3-ios-test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				B9ADC6D9158CFD36007EF999 /* Debug */,
+				B9ADC6DA158CFD36007EF999 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		B9ADC6DB158CFD36007EF999 /* Build configuration list for PBXNativeTarget "xdelta3-ios-test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				B9ADC6DC158CFD36007EF999 /* Debug */,
+				B9ADC6DD158CFD36007EF999 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = B9ADC6B1158CFD36007EF999 /* Project object */;
+}
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h
new file mode 100644
index 0000000000..b421071bac
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.h
@@ -0,0 +1,23 @@
+/* xdelta3 - delta compression tools and library -*- Mode: objc *-*
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#import <UIKit/UIKit.h>
+
+@interface Xd3iOSAppDelegate : UIResponder <UIApplicationDelegate>
+
+@property (strong, nonatomic) UIWindow *window;
+
+@end
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m
new file mode 100644
index 0000000000..629faea8f4
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSAppDelegate.m
@@ -0,0 +1,68 @@
+/* xdelta3 - delta compression tools and library -*- Mode: objc *-*
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#import "Xd3iOSAppDelegate.h"
+
+@implementation Xd3iOSAppDelegate
+
+@synthesize window = _window;
+
+- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions
+{
+    // Override point for customization after application launch.
+    return YES;
+}
+							
+- (void)applicationWillResignActive:(UIApplication *)application
+{
+    /*
+     Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
+     Use this method to pause ongoing tasks, disable timers, and throttle down OpenGL ES frame rates. Games should use this method to pause the game.
+     */
+}
+
+- (void)applicationDidEnterBackground:(UIApplication *)application
+{
+    /*
+     Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. 
+     If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
+     */
+}
+
+- (void)applicationWillEnterForeground:(UIApplication *)application
+{
+    /*
+     Called as part of the transition from the background to the inactive state; here you can undo many of the changes made on entering the background.
+     */
+}
+
+- (void)applicationDidBecomeActive:(UIApplication *)application
+{
+    /*
+     Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
+     */
+}
+
+- (void)applicationWillTerminate:(UIApplication *)application
+{
+    /*
+     Called when the application is about to terminate.
+     Save data if appropriate.
+     See also applicationDidEnterBackground:.
+     */
+}
+
+@end
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h
new file mode 100644
index 0000000000..287a4be58e
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.h
@@ -0,0 +1,28 @@
+/* xdelta3 - delta compression tools and library -*- Mode: objc *-*
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#import <UIKit/UIKit.h>
+
+@interface Xd3iOSViewController : UIViewController <UITextViewDelegate> {
+    NSString *inputSeed;
+}
+- (IBAction)startTest:(id)sender;
+@property (weak, nonatomic) IBOutlet UITextField *theSeed;
+@property (weak, nonatomic) IBOutlet UITextView *theView;
+@property (atomic, retain) NSMutableString *theOutput;
+@property (nonatomic) BOOL inTest;
+
+@end
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m
new file mode 100644
index 0000000000..0db7e390bd
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/Xd3iOSViewController.m
@@ -0,0 +1,177 @@
+/* xdelta3 - delta compression tools and library -*- Mode: objc *-*
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#import "Xd3iOSViewController.h"
+#include "xdelta3.h"
+#include "dispatch/queue.h"
+#include "Foundation/NSBundle.h"
+
+extern void (*xprintf_message_func)(const char* msg);
+void print_to_view(const char* buf);
+int xd3_main_cmdline(int argc, char **argv);
+void do_localfile_test(void);
+int compare_files(const char* file1, const char* file2);
+Xd3iOSViewController *static_ptr;
+
+@implementation Xd3iOSViewController
+@synthesize theSeed = _theSeed;
+@synthesize theView = _theView;
+@synthesize theOutput = _theOutput;
+@synthesize inTest = _inTest;
+
+- (void)didReceiveMemoryWarning
+{
+    [super didReceiveMemoryWarning];
+}
+
+#pragma mark - View lifecycle
+
+- (void)viewDidLoad
+{
+    [super viewDidLoad];
+}
+
+- (void)viewDidUnload
+{
+    [self setTheSeed:nil];
+    [self setTheView:nil];
+    [self setTheView:nil];
+    [super viewDidUnload];
+}
+
+- (void)viewWillAppear:(BOOL)animated
+{
+    [super viewWillAppear:animated];
+}
+
+- (void)viewDidAppear:(BOOL)animated
+{
+    [super viewDidAppear:animated];
+}
+
+- (void)viewWillDisappear:(BOOL)animated
+{
+	[super viewWillDisappear:animated];
+}
+
+- (void)viewDidDisappear:(BOOL)animated
+{
+	[super viewDidDisappear:animated];
+}
+
+- (BOOL)shouldAutorotateToInterfaceOrientation:(UIInterfaceOrientation)interfaceOrientation
+{
+    switch (interfaceOrientation) {
+        case UIInterfaceOrientationPortrait:
+        case UIInterfaceOrientationPortraitUpsideDown:
+            return YES;
+        default:
+            break;
+    }
+    return NO;
+}
+- (BOOL)textFieldShouldReturn:(UITextField*)theTextField {
+    if (theTextField == self.theSeed) {
+        [theTextField resignFirstResponder];
+    }
+    return YES;
+}
+- (IBAction)startTest:(id)sender {
+    if (self.inTest) {
+        return;
+    }
+    self.inTest = YES;
+    NSString *seedString = self.theSeed.text;
+    if ([seedString length] == 0) {
+        seedString = @"RFC3284";
+    }
+    static_ptr = self;
+    xprintf_message_func = &print_to_view;
+    self.theOutput = [[NSMutableString alloc] initWithFormat:@"Starting test (seed=%@)\n", seedString];
+    self.theView.text = self.theOutput;
+    dispatch_queue_t mq = dispatch_get_main_queue();
+    dispatch_queue_t dq = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
+    dispatch_async(dq, ^{
+        do_localfile_test();
+        char *argv[] = { "xdelta3", "test", NULL };
+        xd3_main_cmdline(2, argv);
+        print_to_view("Finished unittest: success");
+        dispatch_async(mq, ^{
+            self.inTest = NO;
+        });
+    });
+}
+
+void printns_to_view(NSString* ns);
+void printns_to_view(NSString* ns) {
+    dispatch_queue_t mq = dispatch_get_main_queue();
+    dispatch_async(mq, ^{
+        if ([static_ptr.theOutput length] < 25000) {
+            [static_ptr.theOutput appendString:ns];
+        } else {
+            static_ptr.theOutput = [[NSMutableString alloc] initWithString:ns];
+        }
+        static_ptr.theView.text = static_ptr.theOutput;
+        CGSize size = static_ptr.theView.contentSize;
+        [static_ptr.theView scrollRectToVisible:CGRectMake(0, size.height - 1, 1, 1) animated:NO];
+    });
+}
+
+void print_to_view(const char* buf) {
+    NSString *ns = [NSString stringWithCString:buf encoding:NSASCIIStringEncoding];
+    printns_to_view(ns);
+}
+
+void do_localfile_test(void) {
+    NSBundle *bundle;
+    bundle = [NSBundle mainBundle];
+    NSString *localfile1 = [bundle pathForResource:@"file_v1" ofType:@"bin"];
+    NSString *localfile2 = [bundle pathForResource:@"file_v2" ofType:@"bin"];
+    NSString *localfiled = [bundle pathForResource:@"file_v1_to_v2" ofType:@"bin"];
+    printns_to_view([localfile1 stringByAppendingString:@"\n"]);
+    printns_to_view([localfile2 stringByAppendingString:@"\n"]);
+    printns_to_view([localfiled stringByAppendingString:@"\n"]);
+    NSString *tmpdir = NSTemporaryDirectory();
+    NSString *tmpfile = [tmpdir stringByAppendingPathComponent:@"delta.tmp"];
+    printns_to_view([tmpfile stringByAppendingString:@"\n"]);
+    char *argv[] = { 
+        "xdelta3", "-dfvv", "-s", 
+        (char*)[localfile1 UTF8String],
+        (char*)[localfiled UTF8String],
+        (char*)[tmpfile UTF8String] };
+    xd3_main_cmdline(6, argv);
+
+    NSFileManager *filemgr;
+
+    filemgr = [NSFileManager defaultManager];
+    
+    if ([filemgr contentsEqualAtPath: localfile2 andPath: tmpfile] == YES) {
+        printns_to_view(@"File contents match\n");
+    } else {
+        NSError *err1 = NULL;
+        NSDictionary *d1 = [filemgr attributesOfItemAtPath: tmpfile error: &err1];
+        if (err1 != NULL) {
+            printns_to_view([@"File localfile2 could not stat %s\n" stringByAppendingString: tmpfile]);
+        } else {
+            printns_to_view([@"File contents do not match!!!! tmpfile size=" stringByAppendingString:
+                             [[NSMutableString alloc] initWithFormat:@"%llu\n", [d1 fileSize]]]);
+        }
+        compare_files([localfile2 UTF8String], [tmpfile UTF8String]);
+    }
+    print_to_view("Finished localfile test.\n");
+}
+
+@end
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings
new file mode 100644
index 0000000000..477b28ff8f
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/InfoPlist.strings
@@ -0,0 +1,2 @@
+/* Localized versions of Info.plist keys */
+
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard
new file mode 100644
index 0000000000..7581bbee0f
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPad.storyboard
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="1.0" toolsVersion="1938" systemVersion="11C74" targetRuntime="iOS.CocoaTouch.iPad" propertyAccessControl="none" initialViewController="2">
+    <dependencies>
+        <development defaultVersion="4200" identifier="xcode"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="933"/>
+    </dependencies>
+    <scenes>
+        <scene sceneID="4">
+            <objects>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="3" sceneMemberID="firstResponder"/>
+                <viewController id="2" customClass="Xd3iOSViewController" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="5">
+                        <rect key="frame" x="0.0" y="20" width="768" height="1004"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <subviews>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" id="d7Y-KS-zOa">
+                                <rect key="frame" x="258" y="28" width="197" height="37"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                <fontDescription key="fontDescription" type="boldSystem" pointSize="15"/>
+                                <state key="normal" title="Start test">
+                                    <color key="titleColor" red="0.19607843459999999" green="0.30980393290000002" blue="0.52156865600000002" alpha="1" colorSpace="calibratedRGB"/>
+                                    <color key="titleShadowColor" white="0.5" alpha="1" colorSpace="calibratedWhite"/>
+                                </state>
+                                <state key="highlighted">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="calibratedWhite"/>
+                                </state>
+                                <connections>
+                                    <action selector="startTest:" destination="2" eventType="touchUpInside" id="f4X-jg-ZsU"/>
+                                </connections>
+                            </button>
+                            <textField opaque="NO" clipsSubviews="YES" contentMode="scaleToFill" contentHorizontalAlignment="left" contentVerticalAlignment="center" borderStyle="roundedRect" placeholder="Random seed" minimumFontSize="17" id="TZ8-OW-wjf">
+                                <rect key="frame" x="27" y="28" width="197" height="31"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                <fontDescription key="fontDescription" type="system" pointSize="14"/>
+                                <textInputTraits key="textInputTraits" autocorrectionType="no"/>
+                                <connections>
+                                    <outlet property="delegate" destination="2" id="hjY-Ym-Fcw"/>
+                                </connections>
+                            </textField>
+                            <textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" showsHorizontalScrollIndicator="NO" editable="NO" id="LHz-h6-ZBC">
+                                <rect key="frame" x="27" y="88" width="721" height="887"/>
+                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                                <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="calibratedRGB"/>
+                                <fontDescription key="fontDescription" type="system" pointSize="14"/>
+                                <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
+                                <connections>
+                                    <outlet property="delegate" destination="2" id="fwY-fT-bCV"/>
+                                </connections>
+                            </textView>
+                        </subviews>
+                        <color key="backgroundColor" red="0.13337372065218178" green="0.1801924475036723" blue="0.21739130434782605" alpha="1" colorSpace="calibratedRGB"/>
+                    </view>
+                    <connections>
+                        <outlet property="theSeed" destination="TZ8-OW-wjf" id="QuA-uT-5IR"/>
+                        <outlet property="theView" destination="LHz-h6-ZBC" id="s64-32-fBA"/>
+                    </connections>
+                </viewController>
+            </objects>
+            <point key="canvasLocation" x="-601" y="-1021"/>
+        </scene>
+    </scenes>
+    <classes>
+        <class className="Xd3iOSViewController" superclassName="UIViewController">
+            <source key="sourceIdentifier" type="project" relativePath="./Classes/Xd3iOSViewController.h"/>
+            <relationships>
+                <relationship kind="action" name="startTest:"/>
+                <relationship kind="outlet" name="theSeed" candidateClass="UITextField"/>
+                <relationship kind="outlet" name="theView" candidateClass="UITextView"/>
+            </relationships>
+        </class>
+    </classes>
+    <simulatedMetricsContainer key="defaultSimulatedMetrics">
+        <simulatedStatusBarMetrics key="statusBar" statusBarStyle="blackTranslucent"/>
+        <simulatedOrientationMetrics key="orientation"/>
+        <simulatedScreenMetrics key="destination"/>
+    </simulatedMetricsContainer>
+</document>
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard
new file mode 100644
index 0000000000..08b2175e08
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/en.lproj/MainStoryboard_iPhone.storyboard
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="1.0" toolsVersion="1906" systemVersion="11A511" targetRuntime="iOS.CocoaTouch" nextObjectID="6" propertyAccessControl="none" initialViewController="2">
+    <dependencies>
+        <development defaultVersion="4200" identifier="xcode"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="902"/>
+    </dependencies>
+    <scenes>
+        <scene sceneID="5">
+            <objects>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="4" sceneMemberID="firstResponder"/>
+                <viewController id="2" customClass="Xd3iOSViewController" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="3">
+                        <rect key="frame" x="0.0" y="20" width="320" height="460"/>
+                        <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
+                        <subviews/>
+                        <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="calibratedWhite"/>
+                    </view>
+                </viewController>
+            </objects>
+        </scene>
+    </scenes>
+    <simulatedMetricsContainer key="defaultSimulatedMetrics">
+        <simulatedStatusBarMetrics key="statusBar"/>
+        <simulatedOrientationMetrics key="orientation"/>
+        <simulatedScreenMetrics key="destination"/>
+    </simulatedMetricsContainer>
+</document>
\ No newline at end of file
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m
new file mode 100644
index 0000000000..67f1e3d997
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/main.m
@@ -0,0 +1,25 @@
+/* xdelta3 - delta compression tools and library -*- Mode: objc *-*
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#import <UIKit/UIKit.h>
+
+#import "Xd3iOSAppDelegate.h"
+
+int main(int argc, char *argv[])
+{
+    @autoreleasepool {
+        return UIApplicationMain(argc, argv, nil, NSStringFromClass([Xd3iOSAppDelegate class]));
+    }
+}
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist
new file mode 100644
index 0000000000..d0e8a58880
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Info.plist
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>en</string>
+	<key>CFBundleDisplayName</key>
+	<string>${PRODUCT_NAME}</string>
+	<key>CFBundleExecutable</key>
+	<string>${EXECUTABLE_NAME}</string>
+	<key>CFBundleIconFiles</key>
+	<array/>
+	<key>CFBundleIdentifier</key>
+	<string>Joshua-MacDonald.${PRODUCT_NAME:rfc1034identifier}</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>${PRODUCT_NAME}</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>1.0</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>UIMainStoryboardFile</key>
+	<string>MainStoryboard_iPhone</string>
+	<key>UIMainStoryboardFile~ipad</key>
+	<string>MainStoryboard_iPad</string>
+	<key>UIRequiredDeviceCapabilities</key>
+	<array>
+		<string>armv7</string>
+	</array>
+	<key>UISupportedInterfaceOrientations</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+	</array>
+	<key>UISupportedInterfaceOrientations~ipad</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+</dict>
+</plist>
diff --git a/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch
new file mode 100644
index 0000000000..69f0135573
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/iOS/xdelta3-ios-test/xdelta3-ios-test/xdelta3-ios-test-Prefix.pch
@@ -0,0 +1,14 @@
+//
+// Prefix header for all source files of the 'xdelta3-ios-test' target in the 'xdelta3-ios-test' project
+//
+
+#import <Availability.h>
+
+#ifndef __IPHONE_5_0
+#warning "This project uses features only available in iOS SDK 5.0 and later."
+#endif
+
+#ifdef __OBJC__
+    #import <UIKit/UIKit.h>
+    #import <Foundation/Foundation.h>
+#endif
diff --git a/third-party/xdelta3/xdelta3/examples/small_page_test.c b/third-party/xdelta3/xdelta3/examples/small_page_test.c
new file mode 100644
index 0000000000..0e33547694
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/small_page_test.c
@@ -0,0 +1,215 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#include <stdio.h>
+
+#define PAGE_SIZE 4096
+
+#define SPACE_MAX 131072   // how much memory per process
+#define OUTPUT_MAX 1024    // max size for output
+#define XD3_ALLOCSIZE 256  // internal size for various buffers
+#define IOPT_SIZE 128      // instruction buffer
+
+// SPACE_MAX of 32K is sufficient for most inputs with XD3_COMPLEVEL_1
+// XD3_COMPLEVEL_9 requires about 4x more space than XD3_COMPLEVEL_1
+
+#include "xdelta3.h"
+#include "xdelta3.c"
+
+typedef struct _context {
+  uint8_t *buffer;
+  int allocated;
+} context_t;
+
+static int max_allocated = 0;
+
+void*
+process_alloc (void* opaque, usize_t items, usize_t size)
+{
+  context_t *ctx = (context_t*) opaque;
+  usize_t t = items * size;
+  void *ret;
+
+  if (ctx->allocated + t > SPACE_MAX)
+    {
+      return NULL;
+    }
+
+  ret = ctx->buffer + ctx->allocated;
+  ctx->allocated += t;
+  return ret;
+}
+
+void
+process_free (void* opaque, void *ptr)
+{
+}
+
+int
+process_page (int            is_encode,
+	      int          (*func) (xd3_stream *),
+	      const uint8_t *input,
+	      usize_t        input_size,
+	      const uint8_t *source,
+	      uint8_t       *output,
+	      usize_t       *output_size,
+	      usize_t        output_size_max,
+	      int            flags) {
+
+  /* On my x86 this is 1072 of objects on the stack */
+  xd3_stream stream;
+  xd3_config config;
+  xd3_source src;
+  context_t *ctx = calloc(SPACE_MAX, 1);
+  int ret;
+
+  memset (&config, 0, sizeof(config));
+
+  if (ctx == NULL)
+    {
+      printf("calloc failed\n");
+      return -1;
+    }
+
+  ctx->buffer = (uint8_t*)ctx;
+  ctx->allocated = sizeof(*ctx);
+
+  config.flags = flags;
+  config.winsize = PAGE_SIZE;
+  config.sprevsz = PAGE_SIZE;
+  config.srcwin_maxsz = PAGE_SIZE;
+  config.iopt_size = IOPT_SIZE;
+  config.alloc = &process_alloc;
+  config.freef = &process_free;
+  config.opaque = (void*) ctx;
+
+  src.blksize = PAGE_SIZE;
+  src.onblk = PAGE_SIZE;
+  src.curblk = source;
+  src.curblkno = 0;
+
+  if ((ret = xd3_config_stream (&stream, &config)) != 0 ||
+      (ret = xd3_set_source_and_size (&stream, &src, PAGE_SIZE)) != 0 ||
+      (ret = xd3_process_stream (is_encode,
+				 &stream,
+				 func, 1,
+				 input, input_size,
+				 output, output_size,
+				 output_size_max)) != 0)
+    {
+      if (stream.msg != NULL)
+	{
+	  fprintf(stderr, "stream message: %s\n", stream.msg);
+	}
+    }
+
+  xd3_free_stream (&stream);
+  if (max_allocated < ctx->allocated)
+    {
+      max_allocated = ctx->allocated;
+      fprintf(stderr, "max allocated %d\n", max_allocated);
+    }
+
+  free(ctx);
+  return ret;
+}
+
+int test(int stride, int encode_flags)
+{
+  uint8_t frompg[PAGE_SIZE];
+  uint8_t topg[PAGE_SIZE];
+  uint8_t output[OUTPUT_MAX];
+  uint8_t reout[PAGE_SIZE];
+  usize_t output_size;
+  usize_t re_size;
+  int i, j, ret;
+
+  for (i = 0; i < PAGE_SIZE; i++)
+    {
+      topg[i] = frompg[i] = (rand() >> 3 ^ rand() >> 6 ^ rand() >> 9);
+    }
+
+  // change 1 byte every stride
+  if (stride > 0)
+    {
+      for (j = stride; j <= PAGE_SIZE; j += stride)
+	{
+	  topg[j - 1] ^= 0xff;
+	}
+    }
+
+  if ((ret = process_page (1, xd3_encode_input,
+			   topg, PAGE_SIZE,
+			   frompg, output,
+			   &output_size, OUTPUT_MAX,
+			   encode_flags)) != 0)
+    {
+      fprintf (stderr, "encode failed: stride %u flags 0x%x\n",
+	       stride, encode_flags);
+      return ret;
+    }
+
+  if ((ret = process_page (0, xd3_decode_input,
+			   output, output_size,
+			   frompg, reout,
+			   &re_size, PAGE_SIZE,
+			   0)) != 0)
+    {
+      fprintf (stderr, "decode failed: stride %u output_size %u flags 0x%x\n",
+	       stride, output_size, encode_flags);
+      return ret;
+    }
+
+  if (output_size > OUTPUT_MAX || re_size != PAGE_SIZE)
+    {
+      fprintf (stderr, "internal error: %u != %u\n", output_size, re_size);
+      return -1;
+    }
+
+  for (i = 0; i < PAGE_SIZE; i++)
+    {
+      if (reout[i] != topg[i])
+	{
+	  fprintf (stderr, "encode-decode error: position %d\n", i);
+	  return -1;
+	}
+    }
+
+  fprintf(stderr, "stride %d flags 0x%x size %u ",
+	  stride, encode_flags, output_size);
+  fprintf(stderr, "%s\n", (ret == 0) ? "OK" : "FAIL");
+
+  return 0;
+}
+
+int main()
+{
+  int stride;
+  int level;
+
+  for (level = 1; level < 10; level = (level == 1 ? 3 : level + 3))
+    {
+      int lflag = level << XD3_COMPLEVEL_SHIFT;
+
+      for (stride = 2; stride <= PAGE_SIZE; stride += 2)
+	{
+	  test(stride, lflag);
+	  test(stride, lflag | XD3_SEC_DJW);
+	}
+    }
+
+  return 0;
+}
diff --git a/third-party/xdelta3/xdelta3/examples/speed_test.c b/third-party/xdelta3/xdelta3/examples/speed_test.c
new file mode 100644
index 0000000000..f8fbbb7509
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/speed_test.c
@@ -0,0 +1,87 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#include "test.h"
+
+usize_t bench_speed(const uint8_t *from_buf, const size_t from_len,
+		 const uint8_t *to_buf, const size_t to_len,
+		 uint8_t *delta_buf, const size_t delta_alloc,
+		 int flags) {
+  usize_t delta_size;
+  int ret = xd3_encode_memory(to_buf, to_len, from_buf, from_len,
+			      delta_buf, &delta_size, delta_alloc, flags);
+  if (ret != 0) {
+    fprintf(stderr, "encode failure: %d: %s\n", ret, xd3_strerror(ret));
+    abort();
+  }
+  return delta_size;
+}
+
+int main(int argc, char **argv) {
+  int repeat, level;
+  char *from, *to;
+  uint8_t *from_buf = NULL, *to_buf = NULL, *delta_buf = NULL;
+  size_t from_len = 0, to_len, delta_alloc, delta_size = 0;
+  long start, finish;
+  int i, ret;
+  int flags;
+
+  if (argc != 5) {
+    fprintf(stderr, "usage: speed_test LEVEL COUNT FROM TO\n");
+    return 1;
+  }
+
+  level = atoi(argv[1]);
+  repeat = atoi(argv[2]);
+  from = argv[3];
+  to = argv[4];
+  flags = (level << XD3_COMPLEVEL_SHIFT) & XD3_COMPLEVEL_MASK;
+
+  if ((strcmp(from, "null") != 0 &&
+       (ret = read_whole_file(from, &from_buf, &from_len))) ||
+      (ret = read_whole_file(to, &to_buf, &to_len))) {
+    fprintf(stderr, "read_whole_file error\n");
+    goto exit;
+  }
+
+  delta_alloc = to_len * 11 / 10;
+  delta_buf = main_malloc(delta_alloc);
+
+  start = get_millisecs_now();
+
+  for (i = 0; i < repeat; ++i) {
+    delta_size = bench_speed(from_buf, from_len,
+			     to_buf, to_len, delta_buf, delta_alloc, flags);
+  }
+
+  finish = get_millisecs_now();
+
+  fprintf(stderr,
+	  "STAT: encode %3.2f ms from %s to %s repeat %d %zdbit delta %zd\n",
+	  (double)(finish - start) / repeat, from, to, repeat, sizeof (xoff_t) * 8, delta_size);
+
+  ret = 0;
+
+  if (0) {
+  exit:
+    ret = 1;
+  }
+    
+  main_free(to_buf);
+  main_free(from_buf);
+  main_free(delta_buf);
+  return ret;
+}
diff --git a/third-party/xdelta3/xdelta3/examples/test.h b/third-party/xdelta3/xdelta3/examples/test.h
new file mode 100644
index 0000000000..f7082f2614
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/examples/test.h
@@ -0,0 +1,56 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#define NOT_MAIN 1
+
+#include "xdelta3.h"
+#include "xdelta3.c"
+
+static int read_whole_file(const char *name,
+			   uint8_t **buf_ptr,
+			   size_t *buf_len) {
+  main_file file;
+  int ret;
+  xoff_t len;
+  usize_t nread;
+  main_file_init(&file);
+  file.filename = name;
+  ret = main_file_open(&file, name, XO_READ);
+  if (ret != 0) {
+    fprintf(stderr, "open failed\n");
+    goto exit;
+  }
+  ret = main_file_stat(&file, &len);
+  if (ret != 0) {
+    fprintf(stderr, "stat failed\n");
+    goto exit;
+  }
+  
+  (*buf_len) = (size_t)len;
+  (*buf_ptr) = (uint8_t*) main_malloc(*buf_len);
+  ret = main_file_read(&file, *buf_ptr, *buf_len, &nread,
+		       "read failed");
+  if (ret == 0 && *buf_len == nread) {
+    ret = 0;
+  } else {
+    fprintf(stderr, "invalid read\n");
+    ret = XD3_INTERNAL;
+  }
+ exit:
+  main_file_cleanup(&file);
+  return ret;
+}
+
diff --git a/third-party/xdelta3/xdelta3/generate_build_files.sh b/third-party/xdelta3/xdelta3/generate_build_files.sh
new file mode 100644
index 0000000000..a01cb1e240
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/generate_build_files.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+aclocal &&
+    autoreconf --install &&
+    libtoolize &&
+    autoconf &&
+    automake --add-missing &&
+    automake
diff --git a/third-party/xdelta3/xdelta3/go/src/regtest.go b/third-party/xdelta3/xdelta3/go/src/regtest.go
new file mode 100644
index 0000000000..9d91f6902e
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/go/src/regtest.go
@@ -0,0 +1,274 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"path"
+	"os"
+	"sort"
+	"time"
+
+	"xdelta"
+)
+
+const (
+	xdataset = "/volume/home/jmacd/src/testdata"
+	xcompare = "/volume/home/jmacd/src/xdelta-devel/xdelta3/build/x86_64-pc-linux-gnu-m64/xoff64/xdelta3"
+	xdelta3  = "/volume/home/jmacd/src/xdelta-64bithash/xdelta3/build/x86_64-pc-linux-gnu-m64/usize64/xoff64/xdelta3"
+	seed = 1422253499919909358
+)
+
+type Config struct {
+	srcbuf_size int64
+	window_size int64
+	blocksize   int
+}
+
+func NewC() Config {
+	// TODO make these (and above) flags
+	return Config{1<<26, 1<<22, 1<<16}
+}
+
+func (c Config) smokeTest(t *xdelta.TestGroup, p xdelta.Program) {
+	target := "Hello world!"
+	source := "Hello world, nice to meet you!"
+
+	enc, err := t.Exec("encode", p, true, []string{"-e"})
+	if err != nil {
+		t.Panic(err)
+	}
+	dec, err := t.Exec("decode", p, true, []string{"-d"})
+	if err != nil {
+		t.Panic(err)
+	}
+
+	encodeout := t.Drain(enc.Stdout, "encode.stdout")
+	decodeout := t.Drain(dec.Stdout, "decode.stdout")
+
+	t.Empty(enc.Stderr, "encode")
+	t.Empty(dec.Stderr, "decode")
+
+	t.TestWrite("encode.stdin", enc.Stdin, []byte(target))
+	t.TestWrite("encode.srcin", enc.Srcin, []byte(source))
+
+	t.TestWrite("decode.stdin", dec.Stdin, <-encodeout)
+	t.TestWrite("decode.srcin", dec.Srcin, []byte(source))
+
+	if do := string(<-decodeout); do != target {
+		t.Panic(fmt.Errorf("It's not working! %s\n!=\n%s\n", do, target))
+	}
+	t.Wait(enc, dec)
+}
+
+type PairTest struct {
+	// Input
+	Config
+	program xdelta.Program
+	source, target string
+
+	// Output
+	TestOutput
+}
+
+type TestOutput struct {
+	encoded int64
+	encDuration time.Duration
+	decDuration time.Duration
+	encSysDuration time.Duration
+	decSysDuration time.Duration
+}
+
+func (to *TestOutput) Add(a TestOutput) {
+	to.encoded += a.encoded
+	to.encDuration += a.encDuration
+	to.decDuration += a.decDuration
+	to.encSysDuration += a.encSysDuration
+	to.decSysDuration += a.decSysDuration
+}
+
+func (to *TestOutput) String() string {
+	return fmt.Sprintf("SIZE: %v\tT: %v\tTSYS: %v\tDT: %v\tDTSYS: %v",
+		to.encoded, to.encDuration, to.encSysDuration, to.decDuration, to.encSysDuration)
+}
+
+// P is the test program, Q is the reference version.
+func (cfg Config) datasetTest(t *xdelta.TestGroup, p, q xdelta.Program) {
+	dir, err := os.Open(xdataset)
+	if err != nil {
+		t.Panic(err)
+	}
+	dents, err := dir.Readdir(-1)
+	if err != nil {
+		t.Panic(err)
+	}
+	paths := make([]string, len(dents))
+	var total int64
+	for i, d := range dents {
+		if !d.Mode().IsRegular() {
+			continue
+		}
+		paths[i] = fmt.Sprint(xdataset, "/", d.Name())
+		total += d.Size()
+	}
+	meansize := total / int64(len(dents))
+	largest  := uint(20)
+	for ; largest <= 31 && 1<<largest < meansize; largest++ {}
+
+	sort.Strings(paths)
+
+	testSum := map[uint]*TestOutput{}
+	compSum := map[uint]*TestOutput{}
+
+	for _, in1 := range paths {
+		for _, in2 := range paths {
+			if in1 == in2 { continue }
+
+			// 1/4, 1/2, and 1 of the power-of-2 rounded-up mean size
+			for b := largest - 2; b <= largest; b++ {
+				if _, has := testSum[b]; !has {
+					testSum[b] = &TestOutput{}
+					compSum[b] = &TestOutput{}
+				}
+				c1 := cfg
+				c1.srcbuf_size = 1<<b
+				ptest := &PairTest{c1, p, in1, in2, TestOutput{-1, 0, 0, 0, 0}}
+				ptest.datasetPairTest(t, 1<<b);
+				qtest := &PairTest{c1, q, in1, in2, TestOutput{-1, 0, 0, 0, 0}}
+				qtest.datasetPairTest(t, 1<<b)
+
+				testSum[b].Add(ptest.TestOutput)
+				compSum[b].Add(qtest.TestOutput)
+
+ 				fmt.Printf("%s, %s: %.2f%% %+d/%d\n\tE:%.2f%%/%s(%.2f%%/%s) D:%.2f%%/%s(%.2f%%/%s) [B=%d]\n",
+					path.Base(in1), path.Base(in2),
+					float64(ptest.encoded - qtest.encoded) * 100.0 / float64(qtest.encoded),
+					ptest.encoded - qtest.encoded,
+					qtest.encoded,
+					(ptest.encDuration - qtest.encDuration).Seconds() * 100.0 / qtest.encDuration.Seconds(),
+					qtest.encDuration,
+					(ptest.decDuration - qtest.decDuration).Seconds() * 100.0 / qtest.decDuration.Seconds(),
+					qtest.encDuration,
+					(ptest.encSysDuration - qtest.encSysDuration).Seconds() * 100.0 / qtest.encSysDuration.Seconds(),
+					qtest.encSysDuration,
+					(ptest.decSysDuration - qtest.decSysDuration).Seconds() * 100.0 / qtest.decSysDuration.Seconds(),
+					qtest.decSysDuration,
+					1<<b)
+			}
+		}
+	}
+	var keys []uint
+	for k, _ := range testSum {
+		keys = append(keys, k)
+	}
+	for _, k := range keys {		
+		fmt.Printf("B=%v\nTEST: %v\nCOMP: %v\n", 1<<k, testSum[k], compSum[k])
+	}
+}
+
+func (pt *PairTest) datasetPairTest(t *xdelta.TestGroup, meanSize int64) {
+	cfg := pt.Config
+	eargs := []string{"-e", fmt.Sprint("-B", cfg.srcbuf_size), // "-q",
+		fmt.Sprint("-W", cfg.window_size), "-s", pt.source,
+		"-I0", "-S", "none", pt.target}
+	enc, err := t.Exec("encode", pt.program, false, eargs)
+	if err != nil {
+		t.Panic(err)
+	}
+
+	dargs := []string{"-dc", fmt.Sprint("-B", cfg.srcbuf_size), //"-q",
+		fmt.Sprint("-W", cfg.window_size), "-s", pt.source,
+		"-S", "none"}
+
+	dec, err := t.Exec("decode", pt.program, false, dargs)
+	if err != nil {
+		t.Panic(err)
+	}
+	tgt_check, err := os.Open(pt.target)
+	if err != nil {
+		t.Panic(err)
+	}
+	tgt_info, err := tgt_check.Stat()
+	if err != nil {
+		t.Panic(err)
+	}
+	t.Empty(enc.Stderr, "encode")
+	t.Empty(dec.Stderr, "decode")
+	t.CopyStreams(enc.Stdout, dec.Stdin, &pt.encoded)
+	t.CompareStreams(dec.Stdout, tgt_check, tgt_info.Size())
+
+	t.Wait(enc, dec)
+
+	pt.decDuration = dec.Cmd.ProcessState.UserTime()
+	pt.encDuration = enc.Cmd.ProcessState.UserTime()
+	pt.decSysDuration = dec.Cmd.ProcessState.SystemTime()
+	pt.encSysDuration = enc.Cmd.ProcessState.SystemTime()
+}
+
+func (cfg Config) offsetTest(t *xdelta.TestGroup, p xdelta.Program, offset, length int64) {
+	eargs := []string{"-e", "-0", fmt.Sprint("-B", cfg.srcbuf_size), "-q",
+		fmt.Sprint("-W", cfg.window_size)}
+	enc, err := t.Exec("encode", p, true, eargs)
+	if err != nil {
+		t.Panic(err)
+	}
+	
+	dargs := []string{"-d", fmt.Sprint("-B", cfg.srcbuf_size), "-q",
+		fmt.Sprint("-W", cfg.window_size)}
+	dec, err := t.Exec("decode", p, true, dargs)
+	if err != nil {
+		t.Panic(err)
+	}
+
+	// The pipe used to read the decoder output and compare
+	// against the target.
+	read, write := io.Pipe()
+
+	t.Empty(enc.Stderr, "encode")
+	t.Empty(dec.Stderr, "decode")
+
+	var encoded_size int64
+	t.CopyStreams(enc.Stdout, dec.Stdin, &encoded_size)
+	t.CompareStreams(dec.Stdout, read, length)
+
+	// The decoder output ("read", above) is compared with the
+	// test-provided output ("write", below).  The following
+	// generates two identical inputs.
+	t.WriteRstreams("encode", seed, offset, length, enc.Srcin, enc.Stdin)
+	t.WriteRstreams("decode", seed, offset, length, dec.Srcin, write)
+	t.Wait(enc, dec)
+
+	expect := cfg.srcbuf_size - offset
+	if float64(encoded_size) < (0.95 * float64(expect)) ||
+		float64(encoded_size) > (1.05 * float64(expect)) {
+		t.Fail("encoded size should be ~=", expect, ", actual ", encoded_size)
+	}
+}
+
+func main() {
+	r, err := xdelta.NewRunner()
+	if err != nil {
+		panic(err)
+	}
+	defer r.Cleanup()
+
+	cfg := NewC()
+
+	prog := xdelta.Program{xdelta3}
+
+	r.RunTest("smoketest", func(t *xdelta.TestGroup) { cfg.smokeTest(t, prog) })
+
+	for i := uint(29); i <= 33; i += 1 {
+		// The arguments to offsetTest are offset, source
+		// window size, and file size. The source window size
+		// is (2 << i) and (in the 3.0x release branch) is
+		// limited to 2^31, so the the greatest value of i is
+		// 30.
+		cfg.srcbuf_size = 2 << i
+		r.RunTest(fmt.Sprint("offset", i), func(t *xdelta.TestGroup) {
+			cfg.offsetTest(t, prog, 1 << i, 3 << i) })
+	}
+	
+	comp := xdelta.Program{xcompare}
+
+	r.RunTest("dataset", func(t *xdelta.TestGroup) { cfg.datasetTest(t, prog, comp) })
+}
diff --git a/third-party/xdelta3/xdelta3/go/src/xdelta/rstream.go b/third-party/xdelta3/xdelta3/go/src/xdelta/rstream.go
new file mode 100644
index 0000000000..99c3d1783c
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/go/src/xdelta/rstream.go
@@ -0,0 +1,71 @@
+package xdelta
+
+
+import (
+	"io"
+	"math/rand"
+)
+
+const (
+	blocksize = 1<<17
+)
+
+func (t *TestGroup) WriteRstreams(desc string, seed, offset, len int64,
+	src, tgt io.WriteCloser) {
+	t.Go("src-write:"+desc, func (g *Goroutine) {
+		writeOne(g, seed, 0, len, tgt, false)
+	})
+	t.Go("tgt-write:"+desc, func (g *Goroutine) {
+		writeOne(g, seed, offset, len, src, true)
+	})
+}
+
+func writeOne(g *Goroutine, seed, offset, len int64, stream io.WriteCloser, readall bool) {
+	if !readall {
+		// Allow the source-read to fail or block until the process terminates.
+		// This behavior is reserved for the decoder, which is not required to
+		// read the entire source.
+		g.OK()
+	}
+	if offset != 0 {
+		// Fill with other random data until the offset
+		if err := writeRand(g, rand.New(rand.NewSource(^seed)), offset, stream); err != nil {
+			g.Panic(err)
+		}
+	}
+	if err := writeRand(g, rand.New(rand.NewSource(seed)),
+		len - offset, stream); err != nil {
+		g.Panic(err)
+	}
+	if err := stream.Close(); err != nil {
+		g.Panic(err)
+	}
+	g.OK()
+}
+
+func writeRand(g *Goroutine, r *rand.Rand, len int64, s io.Writer) error {
+	blk := make([]byte, blocksize)
+	for len > 0 {
+		fillRand(r, blk)
+		c := blocksize
+		if len < blocksize {
+			c = int(len)
+		}
+		if _, err := s.Write(blk[0:c]); err != nil {
+			return err
+		}
+		len -= int64(c)
+	}
+	return nil
+}
+
+func fillRand(r *rand.Rand, blk []byte) {
+	for p := 0; p < len(blk); {
+		v := r.Int63()
+		for i := 7; i != 0 && p < len(blk); i-- {
+			blk[p] = byte(v)
+			p++
+			v >>= 8
+		}
+	}
+}
diff --git a/third-party/xdelta3/xdelta3/go/src/xdelta/run.go b/third-party/xdelta3/xdelta3/go/src/xdelta/run.go
new file mode 100644
index 0000000000..448fabeba2
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/go/src/xdelta/run.go
@@ -0,0 +1,71 @@
+package xdelta
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+)
+
+type Program struct {
+	Path string
+}
+
+type Run struct {
+	Cmd exec.Cmd
+	Srcfile string
+	Stdin io.WriteCloser
+	Srcin io.WriteCloser
+	Stdout io.ReadCloser
+	Stderr io.ReadCloser
+}
+
+type Runner struct {
+	Testdir string
+}
+
+func (r *Run) Wait() error {
+	return r.Cmd.Wait()
+}
+
+func NewRunner() (*Runner, error) {
+	if dir, err := ioutil.TempDir(tmpDir, "xrt"); err != nil {
+		return nil, err
+	} else {
+		return &Runner{dir}, nil
+	}
+}
+
+func (r *Runner) newTestGroup(name string) (*TestGroup) {
+	tg := &TestGroup{Runner: r}
+	tg.WaitGroup.Add(1)
+	g0 := &Goroutine{tg, name, false}
+	tg.running = append(tg.running, g0)
+	tg.main = g0
+	return tg
+}
+
+func (r *Runner) Cleanup() {
+	os.RemoveAll(r.Testdir)
+}
+
+func (r *Runner) RunTest(name string, f func (t *TestGroup)) {
+	t := r.newTestGroup(name)
+	c := make(chan interface{})
+	go func() {
+		defer func() {
+			rec := recover()
+			c <- rec
+		}()
+		fmt.Println("Testing", name, "...")
+		f(t)
+		c <- nil
+	}()
+	rec := <- c
+	if t.errors == nil && rec == nil {
+		fmt.Println("Success:", name)
+	} else {
+		fmt.Println("FAILED:", name, t.errors, rec)
+	}
+}
diff --git a/third-party/xdelta3/xdelta3/go/src/xdelta/test.go b/third-party/xdelta3/xdelta3/go/src/xdelta/test.go
new file mode 100644
index 0000000000..72106983ed
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/go/src/xdelta/test.go
@@ -0,0 +1,164 @@
+package xdelta
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path"
+	"sync/atomic"
+
+	"golang.org/x/sys/unix"
+)
+
+var (
+	tmpDir = "/tmp"
+	srcSeq int64
+)
+
+func (t *TestGroup) Drain(f io.ReadCloser, desc string) <-chan []byte {
+	c := make(chan []byte)
+	t.Go(desc, func(g *Goroutine) {
+		if b, err := ioutil.ReadAll(f); err != nil {
+			g.Panic(err)
+		} else {
+			c <- b
+		}
+		g.OK()
+	})
+	return c
+}
+
+func (t *TestGroup) Empty(f io.ReadCloser, desc string) *Goroutine {
+	return t.Go("empty:"+desc, func (g *Goroutine) {
+		s := bufio.NewScanner(f)
+		for s.Scan() {
+			os.Stderr.Write([]byte(fmt.Sprint(desc, ": ", s.Text(), "\n")))
+		}
+		err := s.Err()
+		f.Close()
+		if err != nil {
+			g.Panic(err)
+		}
+		g.OK()
+	})
+}
+
+func (t *TestGroup) TestWrite(what string, f io.WriteCloser, b []byte) *Goroutine {
+	return t.Go("write", func(g *Goroutine) {
+		if _, err := f.Write(b); err != nil {
+			g.Panic(err)
+		}
+		if err := f.Close(); err != nil {
+			g.Panic(err)
+		}
+		g.OK()
+	})
+}
+
+func (t *TestGroup) CopyStreams(r io.ReadCloser, w io.WriteCloser, written *int64) *Goroutine {
+	return t.Go("copy", func(g *Goroutine) {
+		nwrite, err := io.Copy(w, r)
+		if err != nil {
+			g.Panic(err)
+		}
+		err = r.Close()
+		if err != nil {
+			g.Panic(err)
+		}
+		err = w.Close()
+		if err != nil {
+			g.Panic(err)
+		}
+		g.OK()
+		*written = nwrite
+	})
+}
+
+func (t *TestGroup) CompareStreams(r1 io.ReadCloser, r2 io.ReadCloser, length int64) *Goroutine {
+	return t.Go("compare", func(g *Goroutine) {
+		b1 := make([]byte, blocksize)
+		b2 := make([]byte, blocksize)
+		var idx int64
+		for length > 0 {
+			c := blocksize
+			if length < blocksize {
+				c = int(length)
+			}
+			if _, err := io.ReadFull(r1, b1[0:c]); err != nil {
+				g.Panic(err)
+			}
+			if _, err := io.ReadFull(r2, b2[0:c]); err != nil {
+				g.Panic(err)
+			}
+			if bytes.Compare(b1[0:c], b2[0:c]) != 0 {
+				fmt.Println("B1 is", string(b1[0:c]))
+				fmt.Println("B2 is", string(b2[0:c]))			
+				g.Panic(errors.New(fmt.Sprint("Bytes do not compare at ", idx)))
+			}
+			length -= int64(c)
+			idx += int64(c)
+		}
+		g.OK()
+	})
+}
+
+func (t *TestGroup) Exec(desc string, p Program, srcfifo bool, flags []string) (*Run, error) {
+	var err error
+	run := &Run{}
+	args := []string{p.Path}
+	if srcfifo {
+		num := atomic.AddInt64(&srcSeq, 1)
+		run.Srcfile = path.Join(t.Runner.Testdir, fmt.Sprint("source", num))
+		if err = unix.Mkfifo(run.Srcfile, 0600); err != nil {
+			return nil, err
+		}
+		read, write := io.Pipe()
+		t.writeFifo(run.Srcfile, read)
+		run.Srcin = write
+		args = append(args, "-s")
+		args = append(args, run.Srcfile)
+	}
+	if run.Stdin, err = run.Cmd.StdinPipe(); err != nil {
+		return nil, err
+	}
+	if run.Stdout, err = run.Cmd.StdoutPipe(); err != nil {
+		return nil, err
+	}
+	if run.Stderr, err = run.Cmd.StderrPipe(); err != nil {
+		return nil, err
+	}
+
+	run.Cmd.Path = p.Path
+	run.Cmd.Args = append(args, flags...)
+	run.Cmd.Dir = t.Runner.Testdir
+	if serr := run.Cmd.Start(); serr != nil {
+		return nil, serr
+	}
+	return run, nil
+}
+
+func (t *TestGroup) Fail(v ...interface{}) {
+	panic(fmt.Sprintln(v...))
+}
+
+func (t *TestGroup) writeFifo(srcfile string, read io.Reader) *Goroutine {
+	return t.Go("compare", func(g *Goroutine) {
+		fifo, err := os.OpenFile(srcfile, os.O_WRONLY, 0600)
+		if err != nil {
+			fifo.Close()
+			g.Panic(err)
+		}
+		if _, err := io.Copy(fifo, read); err != nil {
+			fifo.Close()
+			g.Panic(err)
+		}
+		if err := fifo.Close(); err != nil {
+			g.Panic(err)
+		}
+		g.OK()
+	})
+}
diff --git a/third-party/xdelta3/xdelta3/go/src/xdelta/tgroup.go b/third-party/xdelta3/xdelta3/go/src/xdelta/tgroup.go
new file mode 100644
index 0000000000..602b1e108c
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/go/src/xdelta/tgroup.go
@@ -0,0 +1,97 @@
+package xdelta
+
+import (
+	"fmt"
+	"runtime"
+	"sync"
+)
+
+type TestGroup struct {
+	*Runner
+	main *Goroutine
+	sync.Mutex
+	sync.WaitGroup
+	running []*Goroutine
+	errors []error
+	nonerrors []error  // For tolerated / expected conditions
+}
+
+type Goroutine struct {
+	*TestGroup
+	name string
+	done bool
+}
+
+func (g *Goroutine) String() string {
+	return fmt.Sprint("[", g.name, "]")
+}
+
+func (g *Goroutine) finish(err error) {
+	wait := false
+	tg := g.TestGroup
+	sbuf := make([]byte, 4096)
+	sbuf = sbuf[0:runtime.Stack(sbuf, false)]
+	if err != nil {
+		err = fmt.Errorf("%v:%v:%v", g.name, err, string(sbuf))
+	}
+	tg.Lock()
+	if g.done {
+		if err != nil {
+			tg.nonerrors = append(tg.nonerrors, err)
+		}
+	} else {
+		wait = true
+		g.done = true
+		if err != nil {
+			tg.errors = append(tg.errors, err)
+		}
+	}
+	tg.Unlock()
+	if wait {
+		tg.WaitGroup.Done()
+	}
+}
+
+func (g *Goroutine) OK() {
+	g.finish(nil)
+}
+
+func (g *Goroutine) Panic(err error) {
+	g.finish(err)
+	if g != g.TestGroup.main {
+		runtime.Goexit()
+	}
+}
+
+func (t *TestGroup) Main() *Goroutine { return t.main }
+
+func (t *TestGroup) Panic(err error) { t.Main().Panic(err) }
+
+func (t *TestGroup) Go(name string, f func(*Goroutine)) *Goroutine {
+	g := &Goroutine{t, name, false}
+	t.Lock()
+	t.WaitGroup.Add(1)
+	t.running = append(t.running, g)
+	t.Unlock()
+	go f(g)
+	return g
+}
+
+func (t *TestGroup) Wait(procs... *Run) {
+	t.Main().OK()
+	t.WaitGroup.Wait()
+	for _, p := range procs {
+		if err := p.Wait(); err != nil {
+			t.errors = append(t.errors, err)
+		}
+	}
+	for _, err := range t.errors {
+		fmt.Println(":ERROR:", err)
+	}
+	for _, err := range t.nonerrors {
+		fmt.Println("(ERROR)", err)
+	}
+	if len(t.errors) != 0 {
+		t.Fail("Test failed with", len(t.errors), "errors")
+	}
+}
diff --git a/third-party/xdelta3/xdelta3/linkxd3lib.c b/third-party/xdelta3/xdelta3/linkxd3lib.c
new file mode 100644
index 0000000000..0f7f7396bf
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/linkxd3lib.c
@@ -0,0 +1,42 @@
+#include "xdelta3.h"
+
+extern int VVV;
+
+int VVV;
+
+void use(int r)
+{
+  VVV = r;
+}
+
+int main() {
+  xd3_config config;
+  xd3_stream stream;
+  xd3_source source;
+
+  xd3_init_config (& config, 0);
+  use (xd3_config_stream (&stream, &config));
+  use (xd3_close_stream (&stream));
+  xd3_abort_stream (&stream);
+  xd3_free_stream (&stream);
+  
+  xd3_avail_input (& stream, NULL, 0);
+  xd3_consume_output (& stream);
+  
+  use (xd3_set_source (& stream, & source));
+  xd3_set_flags (& stream, 0);
+  
+  use (xd3_decode_stream (& stream, NULL, 0, NULL, NULL, 0));
+  use (xd3_decode_input (&stream));
+  use (xd3_get_appheader (& stream, NULL, NULL));
+  
+#if XD3_ENCODER
+  use (xd3_encode_input (&stream));
+  use (xd3_encode_stream (& stream, NULL, 0, NULL, NULL, 0));
+  use (xd3_set_appheader (& stream));
+  use (xd3_encoder_used_source (& stream));
+  use (xd3_encoder_srcbase (& stream));
+  use (xd3_encoder_srclen (& stream));
+#endif
+  return 0;
+}
diff --git a/third-party/xdelta3/xdelta3/m4/ax_check_aligned_access_required.m4 b/third-party/xdelta3/xdelta3/m4/ax_check_aligned_access_required.m4
new file mode 100644
index 0000000000..b07827554c
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/m4/ax_check_aligned_access_required.m4
@@ -0,0 +1,84 @@
+# ====================================================================================
+#  http://www.gnu.org/software/autoconf-archive/ax_check_aligned_access_required.html
+# ====================================================================================
+#
+# SYNOPSIS
+#
+#   AC_CHECK_ALIGNED_ACCESS_REQUIRED
+#
+# DESCRIPTION
+#
+#   While the x86 CPUs allow access to memory objects to be unaligned it
+#   happens that most of the modern designs require objects to be aligned -
+#   or they will fail with a buserror. That mode is quite known by
+#   big-endian machines (sparc, etc) however the alpha cpu is little-
+#   endian.
+#
+#   The following function will test for aligned access to be required and
+#   set a config.h define HAVE_ALIGNED_ACCESS_REQUIRED (name derived by
+#   standard usage). Structures loaded from a file (or mmapped to memory)
+#   should be accessed per-byte in that case to avoid segfault type errors.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+#
+#   This program is free software; you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation; either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 7
+
+AC_DEFUN([AX_CHECK_ALIGNED_ACCESS_REQUIRED],
+[AC_CACHE_CHECK([if pointers to integers require aligned access],
+  [ax_cv_have_aligned_access_required],
+  [AC_TRY_RUN([
+#include <stdio.h>
+#include <stdlib.h>
+
+int main()
+{
+  char* string = malloc(40);
+  int i;
+  for (i=0; i < 40; i++) string[[i]] = i;
+  {
+     void* s = string;
+     int* p = s+1;
+     int* q = s+2;
+
+     if (*p == *q) { return 1; }
+  }
+  return 0;
+}
+              ],
+     [ax_cv_have_aligned_access_required=yes],
+     [ax_cv_have_aligned_access_required=no],
+     [ax_cv_have_aligned_access_required=no])
+  ])
+if test "$ax_cv_have_aligned_access_required" = yes ; then
+  AC_DEFINE([HAVE_ALIGNED_ACCESS_REQUIRED], [1],
+    [Define if pointers to integers require aligned access])
+fi
+])
diff --git a/third-party/xdelta3/xdelta3/m4/ax_pkg_swig.m4 b/third-party/xdelta3/xdelta3/m4/ax_pkg_swig.m4
new file mode 100644
index 0000000000..e112f3d3fa
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/m4/ax_pkg_swig.m4
@@ -0,0 +1,135 @@
+# ===========================================================================
+#        http://www.gnu.org/software/autoconf-archive/ax_pkg_swig.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_PKG_SWIG([major.minor.micro], [action-if-found], [action-if-not-found])
+#
+# DESCRIPTION
+#
+#   This macro searches for a SWIG installation on your system. If found,
+#   then SWIG is AC_SUBST'd; if not found, then $SWIG is empty.  If SWIG is
+#   found, then SWIG_LIB is set to the SWIG library path, and AC_SUBST'd.
+#
+#   You can use the optional first argument to check if the version of the
+#   available SWIG is greater than or equal to the value of the argument. It
+#   should have the format: N[.N[.N]] (N is a number between 0 and 999. Only
+#   the first N is mandatory.) If the version argument is given (e.g.
+#   1.3.17), AX_PKG_SWIG checks that the swig package is this version number
+#   or higher.
+#
+#   As usual, action-if-found is executed if SWIG is found, otherwise
+#   action-if-not-found is executed.
+#
+#   In configure.in, use as:
+#
+#     AX_PKG_SWIG(1.3.17, [], [ AC_MSG_ERROR([SWIG is required to build..]) ])
+#     AX_SWIG_ENABLE_CXX
+#     AX_SWIG_MULTI_MODULE_SUPPORT
+#     AX_SWIG_PYTHON
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Sebastian Huber <sebastian-huber@web.de>
+#   Copyright (c) 2008 Alan W. Irwin <irwin@beluga.phys.uvic.ca>
+#   Copyright (c) 2008 Rafael Laboissiere <rafael@laboissiere.net>
+#   Copyright (c) 2008 Andrew Collier <colliera@ukzn.ac.za>
+#   Copyright (c) 2011 Murray Cumming <murrayc@openismus.com>
+#
+#   This program is free software; you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation; either version 2 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 8
+
+AC_DEFUN([AX_PKG_SWIG],[
+        # Ubuntu has swig 2.0 as /usr/bin/swig2.0
+        AC_PATH_PROGS([SWIG],[swig swig2.0])
+        if test -z "$SWIG" ; then
+                m4_ifval([$3],[$3],[:])
+        elif test -n "$1" ; then
+                AC_MSG_CHECKING([SWIG version])
+                [swig_version=`$SWIG -version 2>&1 | grep 'SWIG Version' | sed 's/.*\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\).*/\1/g'`]
+                AC_MSG_RESULT([$swig_version])
+                if test -n "$swig_version" ; then
+                        # Calculate the required version number components
+                        [required=$1]
+                        [required_major=`echo $required | sed 's/[^0-9].*//'`]
+                        if test -z "$required_major" ; then
+                                [required_major=0]
+                        fi
+                        [required=`echo $required | sed 's/[0-9]*[^0-9]//'`]
+                        [required_minor=`echo $required | sed 's/[^0-9].*//'`]
+                        if test -z "$required_minor" ; then
+                                [required_minor=0]
+                        fi
+                        [required=`echo $required | sed 's/[0-9]*[^0-9]//'`]
+                        [required_patch=`echo $required | sed 's/[^0-9].*//'`]
+                        if test -z "$required_patch" ; then
+                                [required_patch=0]
+                        fi
+                        # Calculate the available version number components
+                        [available=$swig_version]
+                        [available_major=`echo $available | sed 's/[^0-9].*//'`]
+                        if test -z "$available_major" ; then
+                                [available_major=0]
+                        fi
+                        [available=`echo $available | sed 's/[0-9]*[^0-9]//'`]
+                        [available_minor=`echo $available | sed 's/[^0-9].*//'`]
+                        if test -z "$available_minor" ; then
+                                [available_minor=0]
+                        fi
+                        [available=`echo $available | sed 's/[0-9]*[^0-9]//'`]
+                        [available_patch=`echo $available | sed 's/[^0-9].*//'`]
+                        if test -z "$available_patch" ; then
+                                [available_patch=0]
+                        fi
+                        # Convert the version tuple into a single number for easier comparison.
+                        # Using base 100 should be safe since SWIG internally uses BCD values
+                        # to encode its version number.
+                        required_swig_vernum=`expr $required_major \* 10000 \
+                            \+ $required_minor \* 100 \+ $required_patch`
+                        available_swig_vernum=`expr $available_major \* 10000 \
+                            \+ $available_minor \* 100 \+ $available_patch`
+
+                        if test $available_swig_vernum -lt $required_swig_vernum; then
+                                AC_MSG_WARN([SWIG version >= $1 is required.  You have $swig_version.])
+                                SWIG=''
+                                m4_ifval([$3],[$3],[])
+                        else
+                                AC_MSG_CHECKING([for SWIG library])
+                                SWIG_LIB=`$SWIG -swiglib`
+                                AC_MSG_RESULT([$SWIG_LIB])
+                                m4_ifval([$2],[$2],[])
+                        fi
+                else
+                        AC_MSG_WARN([cannot determine SWIG version])
+                        SWIG=''
+                        m4_ifval([$3],[$3],[])
+                fi
+        fi
+        AC_SUBST([SWIG_LIB])
+])
diff --git a/third-party/xdelta3/xdelta3/m4/ax_python_devel.m4 b/third-party/xdelta3/xdelta3/m4/ax_python_devel.m4
new file mode 100644
index 0000000000..a62b860de3
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/m4/ax_python_devel.m4
@@ -0,0 +1,325 @@
+# ===========================================================================
+#      http://www.gnu.org/software/autoconf-archive/ax_python_devel.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_PYTHON_DEVEL([version])
+#
+# DESCRIPTION
+#
+#   Note: Defines as a precious variable "PYTHON_VERSION". Don't override it
+#   in your configure.ac.
+#
+#   This macro checks for Python and tries to get the include path to
+#   'Python.h'. It provides the $(PYTHON_CPPFLAGS) and $(PYTHON_LDFLAGS)
+#   output variables. It also exports $(PYTHON_EXTRA_LIBS) and
+#   $(PYTHON_EXTRA_LDFLAGS) for embedding Python in your code.
+#
+#   You can search for some particular version of Python by passing a
+#   parameter to this macro, for example ">= '2.3.1'", or "== '2.4'". Please
+#   note that you *have* to pass also an operator along with the version to
+#   match, and pay special attention to the single quotes surrounding the
+#   version number. Don't use "PYTHON_VERSION" for this: that environment
+#   variable is declared as precious and thus reserved for the end-user.
+#
+#   This macro should work for all versions of Python >= 2.1.0. As an end
+#   user, you can disable the check for the python version by setting the
+#   PYTHON_NOVERSIONCHECK environment variable to something else than the
+#   empty string.
+#
+#   If you need to use this macro for an older Python version, please
+#   contact the authors. We're always open for feedback.
+#
+# LICENSE
+#
+#   Copyright (c) 2009 Sebastian Huber <sebastian-huber@web.de>
+#   Copyright (c) 2009 Alan W. Irwin <irwin@beluga.phys.uvic.ca>
+#   Copyright (c) 2009 Rafael Laboissiere <rafael@laboissiere.net>
+#   Copyright (c) 2009 Andrew Collier <colliera@ukzn.ac.za>
+#   Copyright (c) 2009 Matteo Settenvini <matteo@member.fsf.org>
+#   Copyright (c) 2009 Horst Knorr <hk_classes@knoda.org>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 8
+
+AU_ALIAS([AC_PYTHON_DEVEL], [AX_PYTHON_DEVEL])
+AC_DEFUN([AX_PYTHON_DEVEL],[
+	#
+	# Allow the use of a (user set) custom python version
+	#
+	AC_ARG_VAR([PYTHON_VERSION],[The installed Python
+		version to use, for example '2.3'. This string
+		will be appended to the Python interpreter
+		canonical name.])
+
+	AC_PATH_PROG([PYTHON],[python[$PYTHON_VERSION]])
+	if test -z "$PYTHON"; then
+	   AC_MSG_ERROR([Cannot find python$PYTHON_VERSION in your system path])
+	   PYTHON_VERSION=""
+	fi
+
+	#
+	# Check for a version of Python >= 2.1.0
+	#
+	AC_MSG_CHECKING([for a version of Python >= '2.1.0'])
+	ac_supports_python_ver=`$PYTHON -c "import sys; \
+		ver = sys.version.split ()[[0]]; \
+		print (ver >= '2.1.0')"`
+	if test "$ac_supports_python_ver" != "True"; then
+		if test -z "$PYTHON_NOVERSIONCHECK"; then
+			AC_MSG_RESULT([no])
+			AC_MSG_FAILURE([
+This version of the AC@&t@_PYTHON_DEVEL macro
+doesn't work properly with versions of Python before
+2.1.0. You may need to re-run configure, setting the
+variables PYTHON_CPPFLAGS, PYTHON_LDFLAGS, PYTHON_SITE_PKG,
+PYTHON_EXTRA_LIBS and PYTHON_EXTRA_LDFLAGS by hand.
+Moreover, to disable this check, set PYTHON_NOVERSIONCHECK
+to something else than an empty string.
+])
+		else
+			AC_MSG_RESULT([skip at user request])
+		fi
+	else
+		AC_MSG_RESULT([yes])
+	fi
+
+	#
+	# if the macro parameter ``version'' is set, honour it
+	#
+	if test -n "$1"; then
+		AC_MSG_CHECKING([for a version of Python $1])
+		ac_supports_python_ver=`$PYTHON -c "import sys; \
+			ver = sys.version.split ()[[0]]; \
+			print (ver $1)"`
+		if test "$ac_supports_python_ver" = "True"; then
+		   AC_MSG_RESULT([yes])
+		else
+			AC_MSG_RESULT([no])
+			AC_MSG_ERROR([this package requires Python $1.
+If you have it installed, but it isn't the default Python
+interpreter in your system path, please pass the PYTHON_VERSION
+variable to configure. See ``configure --help'' for reference.
+])
+			PYTHON_VERSION=""
+		fi
+	fi
+
+	#
+	# Check if you have distutils, else fail
+	#
+	AC_MSG_CHECKING([for the distutils Python package])
+	ac_distutils_result=`$PYTHON -c "import distutils" 2>&1`
+	if test -z "$ac_distutils_result"; then
+		AC_MSG_RESULT([yes])
+	else
+		AC_MSG_RESULT([no])
+		AC_MSG_ERROR([cannot import Python module "distutils".
+Please check your Python installation. The error was:
+$ac_distutils_result])
+		PYTHON_VERSION=""
+	fi
+
+	#
+	# Check for Python include path
+	#
+	AC_MSG_CHECKING([for Python include path])
+	if test -z "$PYTHON_CPPFLAGS"; then
+		python_path=`$PYTHON -c "import distutils.sysconfig; \
+			print (distutils.sysconfig.get_python_inc ());"`
+		if test -n "${python_path}"; then
+			python_path="-I$python_path"
+		fi
+		PYTHON_CPPFLAGS=$python_path
+	fi
+	AC_MSG_RESULT([$PYTHON_CPPFLAGS])
+	AC_SUBST([PYTHON_CPPFLAGS])
+
+	#
+	# Check for Python library path
+	#
+	AC_MSG_CHECKING([for Python library path])
+	if test -z "$PYTHON_LDFLAGS"; then
+		# (makes two attempts to ensure we've got a version number
+		# from the interpreter)
+		ac_python_version=`cat<<EOD | $PYTHON -
+
+# join all versioning strings, on some systems
+# major/minor numbers could be in different list elements
+from distutils.sysconfig import *
+ret = ''
+for e in get_config_vars ('VERSION'):
+	if (e != None):
+		ret += e
+print (ret)
+EOD`
+
+		if test -z "$ac_python_version"; then
+			if test -n "$PYTHON_VERSION"; then
+				ac_python_version=$PYTHON_VERSION
+			else
+				ac_python_version=`$PYTHON -c "import sys; \
+					print (sys.version[[:3]])"`
+			fi
+		fi
+
+		# Make the versioning information available to the compiler
+		AC_DEFINE_UNQUOTED([HAVE_PYTHON], ["$ac_python_version"],
+                                   [If available, contains the Python version number currently in use.])
+
+		# First, the library directory:
+		ac_python_libdir=`cat<<EOD | $PYTHON -
+
+# There should be only one
+import distutils.sysconfig
+for e in distutils.sysconfig.get_config_vars ('LIBDIR'):
+	if e != None:
+		print (e)
+		break
+EOD`
+
+		# Before checking for libpythonX.Y, we need to know
+		# the extension the OS we're on uses for libraries
+		# (we take the first one, if there's more than one fix me!):
+		ac_python_soext=`$PYTHON -c \
+		  "import distutils.sysconfig; \
+		  print (distutils.sysconfig.get_config_vars('SO')[[0]])"`
+
+		# Now, for the library:
+		ac_python_soname=`$PYTHON -c \
+		  "import distutils.sysconfig; \
+		  print (distutils.sysconfig.get_config_vars('LDLIBRARY')[[0]])"`
+
+		# Strip away extension from the end to canonicalize its name:
+		ac_python_library=`echo "$ac_python_soname" | sed "s/${ac_python_soext}$//"`
+
+		# This small piece shamelessly adapted from PostgreSQL python macro;
+		# credits goes to momjian, I think. I'd like to put the right name
+		# in the credits, if someone can point me in the right direction... ?
+		#
+		if test -n "$ac_python_libdir" -a -n "$ac_python_library" \
+			-a x"$ac_python_library" != x"$ac_python_soname"
+		then
+			# use the official shared library
+			ac_python_library=`echo "$ac_python_library" | sed "s/^lib//"`
+			PYTHON_LDFLAGS="-L$ac_python_libdir -l$ac_python_library"
+		else
+			# old way: use libpython from python_configdir
+			ac_python_libdir=`$PYTHON -c \
+			  "from distutils.sysconfig import get_python_lib as f; \
+			  import os; \
+			  print (os.path.join(f(plat_specific=1, standard_lib=1), 'config'));"`
+			PYTHON_LDFLAGS="-L$ac_python_libdir -lpython$ac_python_version"
+		fi
+
+		if test -z "PYTHON_LDFLAGS"; then
+			AC_MSG_ERROR([
+  Cannot determine location of your Python DSO. Please check it was installed with
+  dynamic libraries enabled, or try setting PYTHON_LDFLAGS by hand.
+			])
+		fi
+	fi
+	AC_MSG_RESULT([$PYTHON_LDFLAGS])
+	AC_SUBST([PYTHON_LDFLAGS])
+
+	#
+	# Check for site packages
+	#
+	AC_MSG_CHECKING([for Python site-packages path])
+	if test -z "$PYTHON_SITE_PKG"; then
+		PYTHON_SITE_PKG=`$PYTHON -c "import distutils.sysconfig; \
+			print (distutils.sysconfig.get_python_lib(0,0));"`
+	fi
+	AC_MSG_RESULT([$PYTHON_SITE_PKG])
+	AC_SUBST([PYTHON_SITE_PKG])
+
+	#
+	# libraries which must be linked in when embedding
+	#
+	AC_MSG_CHECKING(python extra libraries)
+	if test -z "$PYTHON_EXTRA_LIBS"; then
+	   PYTHON_EXTRA_LIBS=`$PYTHON -c "import distutils.sysconfig; \
+                conf = distutils.sysconfig.get_config_var; \
+                print (conf('LOCALMODLIBS') + ' ' + conf('LIBS'))"`
+	fi
+	AC_MSG_RESULT([$PYTHON_EXTRA_LIBS])
+	AC_SUBST(PYTHON_EXTRA_LIBS)
+
+	#
+	# linking flags needed when embedding
+	#
+	AC_MSG_CHECKING(python extra linking flags)
+	if test -z "$PYTHON_EXTRA_LDFLAGS"; then
+		PYTHON_EXTRA_LDFLAGS=`$PYTHON -c "import distutils.sysconfig; \
+			conf = distutils.sysconfig.get_config_var; \
+			print (conf('LINKFORSHARED'))"`
+	fi
+	AC_MSG_RESULT([$PYTHON_EXTRA_LDFLAGS])
+	AC_SUBST(PYTHON_EXTRA_LDFLAGS)
+
+	#
+	# final check to see if everything compiles alright
+	#
+	AC_MSG_CHECKING([consistency of all components of python development environment])
+	# save current global flags
+	ac_save_LIBS="$LIBS"
+	ac_save_CPPFLAGS="$CPPFLAGS"
+	LIBS="$ac_save_LIBS $PYTHON_LDFLAGS $PYTHON_EXTRA_LDFLAGS $PYTHON_EXTRA_LIBS"
+	CPPFLAGS="$ac_save_CPPFLAGS $PYTHON_CPPFLAGS"
+	AC_LANG_PUSH([C])
+	AC_LINK_IFELSE([
+		AC_LANG_PROGRAM([[#include <Python.h>]],
+				[[Py_Initialize();]])
+		],[pythonexists=yes],[pythonexists=no])
+	AC_LANG_POP([C])
+	# turn back to default flags
+	CPPFLAGS="$ac_save_CPPFLAGS"
+	LIBS="$ac_save_LIBS"
+
+	AC_MSG_RESULT([$pythonexists])
+
+        if test ! "x$pythonexists" = "xyes"; then
+	   AC_MSG_FAILURE([
+  Could not link test program to Python. Maybe the main Python library has been
+  installed in some non-standard library path. If so, pass it to configure,
+  via the LDFLAGS environment variable.
+  Example: ./configure LDFLAGS="-L/usr/non-standard-path/python/lib"
+  ============================================================================
+   ERROR!
+   You probably have to install the development version of the Python package
+   for your distribution.  The exact name of this package varies among them.
+  ============================================================================
+	   ])
+	  PYTHON_VERSION=""
+	fi
+
+	#
+	# all done!
+	#
+])
diff --git a/third-party/xdelta3/xdelta3/m4/ax_swig_python.m4 b/third-party/xdelta3/xdelta3/m4/ax_swig_python.m4
new file mode 100644
index 0000000000..8fd3df5a80
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/m4/ax_swig_python.m4
@@ -0,0 +1,64 @@
+# ===========================================================================
+#      http://www.gnu.org/software/autoconf-archive/ax_swig_python.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_SWIG_PYTHON([use-shadow-classes = {no, yes}])
+#
+# DESCRIPTION
+#
+#   Checks for Python and provides the $(AX_SWIG_PYTHON_CPPFLAGS), and
+#   $(AX_SWIG_PYTHON_OPT) output variables.
+#
+#   $(AX_SWIG_PYTHON_OPT) contains all necessary SWIG options to generate
+#   code for Python. Shadow classes are enabled unless the value of the
+#   optional first argument is exactly 'no'. If you need multi module
+#   support (provided by the AX_SWIG_MULTI_MODULE_SUPPORT macro) use
+#   $(AX_SWIG_PYTHON_LIBS) to link against the appropriate library. It
+#   contains the SWIG Python runtime library that is needed by the type
+#   check system for example.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Sebastian Huber <sebastian-huber@web.de>
+#   Copyright (c) 2008 Alan W. Irwin <irwin@beluga.phys.uvic.ca>
+#   Copyright (c) 2008 Rafael Laboissiere <rafael@laboissiere.net>
+#   Copyright (c) 2008 Andrew Collier <colliera@ukzn.ac.za>
+#
+#   This program is free software; you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation; either version 2 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 7
+
+AU_ALIAS([SWIG_PYTHON], [AX_SWIG_PYTHON])
+AC_DEFUN([AX_SWIG_PYTHON],[
+        AC_REQUIRE([AX_PKG_SWIG])
+        AC_REQUIRE([AX_PYTHON_DEVEL])
+        test "x$1" != "xno" || swig_shadow=" -noproxy"
+        AC_SUBST([AX_SWIG_PYTHON_OPT],[-python$swig_shadow])
+        AC_SUBST([AX_SWIG_PYTHON_CPPFLAGS],[$PYTHON_CPPFLAGS])
+])
diff --git a/third-party/xdelta3/xdelta3/plot.sh b/third-party/xdelta3/xdelta3/plot.sh
new file mode 100644
index 0000000000..8370ae7158
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/plot.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+G=/usr/bin/gnuplot
+
+D=./output_dir
+
+I=$1
+O=$D/$2
+
+$G > $O <<EOF
+
+#set terminal jpeg
+set terminal png
+
+f(x) = 1331000 + 30000 * (1 / (x - 2.45))
+
+# plot [x=1:10] [1:10] f(x)
+# plot sin(x), cos(x)
+# , f(x)
+
+plot "$I" using 1:2
+
+EOF
+
+mv "$I" "$D"
diff --git a/third-party/xdelta3/xdelta3/rcs_junk.cc b/third-party/xdelta3/xdelta3/rcs_junk.cc
new file mode 100644
index 0000000000..ac49644cb4
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/rcs_junk.cc
@@ -0,0 +1,1861 @@
+typedef struct _RcsWalker               RcsWalker;
+typedef struct _RcsFile                 RcsFile;
+typedef struct _RcsVersion              RcsVersion;
+typedef struct _RcsStats                RcsStats;
+typedef struct _IntStat                 IntStat;
+typedef struct _DblStat                 DblStat;
+typedef struct _BinCounter              BinCounter;
+typedef struct _ConfigOption            ConfigOption;
+
+struct _RcsWalker {
+  void*    (* initialize)    (void);
+  int      (* finalize)      (RcsStats* stats, void* data);
+  int      (* onefile)       (RcsFile* rcs, RcsStats* stats, void* data);
+  int      (* dateorder)     (RcsFile* rcs, RcsVersion* v, void* data);
+  int      (* delta_orig)    (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data);
+  int      (* delta_date)    (RcsFile* rcs, RcsVersion* from, RcsVersion *to, void* data);
+  int      min_versions;
+  int      max_versions;
+  gboolean write_files;
+};
+
+struct _RcsVersion {
+  RcsFile    *rcs;
+  time_t      date;
+  int         dateseq;
+  int         chain_length;
+  char       *vname;
+  off_t       size;
+  int         cc;
+  guint8*     segment;
+  char       *filename;
+  RcsVersion *parent;
+  GSList     *children;
+  guint       on_trunk : 1;
+};
+
+struct _RcsFile {
+  char       *filename;
+  char       *copyname;
+  char       *headname;
+
+  int         version_count;
+  int         forward_count;
+  int         reverse_count;
+  int         branch_count;
+
+  RcsVersion *versions;
+  RcsVersion **versions_date;
+
+  RcsVersion *head_version;
+  RcsVersion *root_version;
+
+  off_t       total_size;
+
+  guint       atflag : 1;
+};
+
+struct _RcsStats {
+  BinCounter *avg_version_size;
+  IntStat* version_stat;
+  IntStat* forward_stat;
+  IntStat* reverse_stat;
+  IntStat* branch_stat;
+  IntStat* unencoded_stat;
+  IntStat* literal_stat;
+};
+
+struct _IntStat {
+  const char* name;
+  int count;
+  long long sum;
+  long long min;
+  long long max;
+
+  GArray *values;
+};
+
+struct _DblStat {
+  const char* name;
+  int count;
+  double sum;
+  double min;
+  double max;
+
+  GArray *values;
+};
+
+struct _BinCounter {
+  const char *name;
+  GPtrArray  *bins;
+};
+
+enum _ConfigArgument {
+  CO_Required,
+  CO_Optional,
+  CO_None
+};
+
+typedef enum _ConfigArgument ConfigArgument;
+
+enum _ConfigOptionType {
+  CD_Bool,
+  CD_Int32,
+  CD_Double,
+  CD_String
+};
+
+typedef enum _ConfigOptionType ConfigOptionType;
+
+enum _ConfigStyle {
+  CS_Ignore,
+  CS_UseAsFile,
+  CS_Use
+};
+
+typedef enum _ConfigStyle ConfigStyle;
+
+struct _ConfigOption {
+  const char       *name;
+  const char       *abbrev;
+  ConfigStyle       style;
+  ConfigArgument    arg;
+  ConfigOptionType  type;
+  void             *value;
+  gboolean          found;
+};
+
+/* RCS inspection stuff
+ */
+
+void                rcswalk_init   (void);
+int            rcswalk        (RcsWalker *walker, const char* copy_base);
+void                rcswalk_report (RcsStats* stats);
+
+IntStat*            stat_int_new      (const char* name);
+void                stat_int_add_item (IntStat* stat, long long v);
+void                stat_int_report   (IntStat* stat);
+
+DblStat*            stat_dbl_new      (const char* name);
+void                stat_dbl_add_item (DblStat* stat, double v);
+void                stat_dbl_report   (DblStat* stat);
+
+BinCounter*         stat_bincount_new      (const char* name);
+void                stat_bincount_add_item (BinCounter* bc, int bin, double val);
+void                stat_bincount_report   (BinCounter* bc);
+
+/* Experiment configuration stuff
+ */
+
+void                config_register   (ConfigOption *opts, int nopts);
+int            config_parse      (const char* config_file);
+int            config_done       (void);
+void                config_help       (void);
+void                config_set_string (const char* var, const char* val);
+int            config_clear_dir  (const char* dir);
+int            config_create_dir (const char* dir);
+FILE*               config_output     (const char* fmt, ...);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+#include "rcswalk.h"
+#include "edsio.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <math.h>
+
+#undef BUFSIZE
+#define BUFSIZE (1<<14)
+
+char       *tmp_file_1;
+gboolean    tmp_file_1_free = TRUE;
+char       *tmp_file_2;
+gboolean    tmp_file_2_free = TRUE;
+
+int         skip_count;
+int         small_count;
+int         large_count;
+int         process_count;
+
+extern time_t str2time (char const *, time_t, long);
+
+static guint8 readbuf[BUFSIZE];
+
+static const char* rcswalk_input_dir = NULL;
+static const char* config_output_base = NULL;
+static const char* config_output_dir = NULL;
+static const char* rcswalk_experiment = NULL;
+
+static ConfigOption rcswalk_options[] = {
+  { "rcswalk_experiment", "ex", CS_Use,       CO_Required, CD_String, & rcswalk_experiment },
+  { "rcs_input_dir",      "id", CS_UseAsFile, CO_Required, CD_String, & rcswalk_input_dir }
+};
+
+static ConfigOption config_options[] = {
+  { "config_output_base", "ob", CS_Ignore, CO_Required, CD_String, & config_output_base }
+};
+
+
+void
+rcswalk_free_segment (RcsVersion *v)
+{
+  if (v->segment)
+    g_free (v->segment);
+
+  if (v->filename == tmp_file_1)
+    tmp_file_1_free = TRUE;
+  else if (v->filename == tmp_file_2)
+    tmp_file_2_free = TRUE;
+  else if (v->filename)
+    g_free (v->filename);
+
+  v->segment = NULL;
+  v->filename = NULL;
+}
+
+int
+rcswalk_checkout (RcsFile* rcs, RcsWalker* walker, RcsVersion *v)
+{
+  FILE* out;
+  char cmdbuf[1024];
+  int nread;
+  int alloc = BUFSIZE;
+  int pos = 0;
+
+  sprintf (cmdbuf, "co -ko -p%s %s 2>/dev/null\n", v->vname, rcs->filename);
+
+  g_assert (! v->segment);
+
+  v->segment = g_malloc (alloc);
+
+  if (! (out = popen (cmdbuf, "r")))
+    {
+      g_warning ("popen failed: %s: %s", cmdbuf, g_strerror (errno));
+      return errno;
+    }
+
+  for (;;)
+    {
+      nread = fread (readbuf, 1, BUFSIZE, out);
+
+      if (nread == 0)
+	break;
+
+      if (nread < 0)
+	{
+	  g_warning ("fread failed: %s", g_strerror (errno));
+	  return errno;
+	}
+
+      if (pos + nread > alloc)
+	{
+	  alloc *= 2;
+	  v->segment = g_realloc (v->segment, alloc);
+	}
+
+      memcpy (v->segment + pos, readbuf, nread);
+
+      pos += nread;
+    }
+
+  if (pclose (out) < 0)
+    {
+      g_warning ("pclose failed");
+      return errno;
+    }
+
+  v->size = pos;
+
+  if (walker->write_files)
+    {
+      char* file = NULL;
+
+      if (! file && tmp_file_1_free)
+	{
+	  file = tmp_file_1;
+	  tmp_file_1_free = FALSE;
+	}
+
+      if (! file && tmp_file_2_free)
+	{
+	  file = tmp_file_2;
+	  tmp_file_2_free = FALSE;
+	}
+
+      g_assert (file);
+
+      v->filename = file;
+
+      if (! (out = fopen (file, "w")))
+	{
+	  g_warning ("fopen failed: %s\n", file);
+	  return errno;
+	}
+
+      if (fwrite (v->segment, v->size, 1, out) != 1)
+	{
+	  g_warning ("fwrite failed: %s\n", file);
+	  return errno;
+	}
+
+      if (fclose (out) < 0)
+	{
+	  g_warning ("fclose failed: %s\n", file);
+	  return errno;
+	}
+    }
+
+  return 0;
+}
+
+int
+rcswalk_delta_date (RcsFile* rcs, RcsWalker* walker, void* data)
+{
+  int i;
+  int ret;
+  RcsVersion *vf = NULL;
+  RcsVersion *vt = NULL;
+
+  for (i = 0; i < (rcs->version_count-1); i += 1)
+    {
+      vf = rcs->versions_date[i+1];
+      vt = rcs->versions_date[i];
+
+      if (! vt->segment && (ret = rcswalk_checkout (rcs, walker, vt))) {
+	return ret;
+      }
+
+      if ((ret = rcswalk_checkout (rcs, walker, vf))) {
+	return ret;
+      }
+
+      if ((ret = walker->delta_date (rcs, vf, vt, data))) {
+	return ret;
+      }
+
+      rcswalk_free_segment (vt);
+    }
+
+  if (vf) rcswalk_free_segment (vf);
+  if (vt) rcswalk_free_segment (vt);
+
+  return 0;
+}
+
+int
+rcswalk_delta_orig (RcsFile* rcs, RcsWalker* walker, RcsVersion* version, int *count, void* data)
+{
+  int ret;
+  GSList *c;
+  RcsVersion *child;
+
+  for (c = version->children; c; c = c->next)
+    {
+      gboolean reverse;
+
+      child = c->data;
+
+      if (! version->segment)
+	{
+	  if ((ret = rcswalk_checkout (rcs, walker, version))) {
+	    return ret;
+	  }
+	}
+
+      if ((ret = rcswalk_checkout (rcs, walker, child))) {
+	return ret;
+      }
+
+      reverse = version->on_trunk && child->on_trunk;
+
+      (* count) += 1;
+
+      if ((ret = walker->delta_orig (rcs, reverse ? child : version, reverse ? version : child, data))) {
+	return ret;
+      }
+
+      rcswalk_free_segment (version);
+
+      if ((ret = rcswalk_delta_orig (rcs, walker, child, count, data))) {
+	return ret;
+      }
+    }
+
+  rcswalk_free_segment (version);
+  return 0;
+}
+
+int
+rcswalk_dateorder (RcsFile* rcs, RcsWalker *walker, RcsStats *stats, void* data)
+{
+  int i, ret;
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    {
+      RcsVersion *v = rcs->versions_date[i];
+
+      if ((ret = rcswalk_checkout (rcs, walker, v))) {
+	return ret;
+      }
+
+      stat_bincount_add_item (stats->avg_version_size, i, v->size);
+
+      if ((ret = walker->dateorder (rcs, v, data))) {
+	return ret;
+      }
+
+      rcswalk_free_segment (v);
+    }
+
+  return 0;
+}
+
+gboolean
+rcswalk_match (char** line_p, char* str)
+{
+  int len = strlen (str);
+
+  if (strncmp (*line_p, str, len) == 0)
+    {
+      (*line_p) += len;
+      return TRUE;
+    }
+
+  return FALSE;
+}
+
+void
+rcswalk_find_parent (RcsFile *rcs, GHashTable* hash, RcsVersion *v)
+{
+  char *lastdot;
+  char  mbuf[1024];
+  int   lastn;
+  RcsVersion *p;
+
+  strcpy (mbuf, v->vname);
+
+  if (! (lastdot = strchr (mbuf, '.')))
+    abort ();
+
+  if (! (lastdot = strchr (lastdot+1, '.')))
+    v->on_trunk = TRUE;
+
+  lastdot = strrchr (mbuf, '.');
+  lastn = atoi (lastdot + 1);
+
+  do
+    {
+      if (lastn == 1)
+	{
+	  (*lastdot) = 0;
+
+	  if (strcmp (mbuf, "1") == 0)
+	    {
+	      /* Assuming the first version is always "1.1".
+	       */
+	      rcs->root_version = v;
+	      return;
+	    }
+	  else if (! (lastdot = strrchr (mbuf, '.')))
+	    {
+	      int i = 1;
+	      int br = atoi (mbuf) - 1;
+	      RcsVersion *p2 = NULL;
+
+	      /* Now we have something like "2.1" and need to
+	       * search for the highest "1.x" version.
+	       */
+
+	      do
+		{
+		  sprintf (mbuf, "%d.%d", br, i++);
+		  p = p2;
+		}
+	      while ((p2 = g_hash_table_lookup (hash, mbuf)));
+
+	      if (p == NULL)
+		{
+		  rcs->root_version = v;
+		  return;
+		}
+
+	      break;
+	    }
+	  else
+	    {
+	      /* 1.2.3.1 => 1.2 */
+	      (*lastdot) = 0;
+	      lastdot = strrchr (mbuf, '.');
+	      lastn = atoi (lastdot + 1);
+	    }
+	}
+      else
+	{
+	  lastn -= 1;
+	  sprintf (lastdot, ".%d", lastn);
+	}
+    }
+  while (! (p = g_hash_table_lookup (hash, mbuf)));
+
+  g_assert (p);
+
+  v->parent = p;
+
+  p->children = g_slist_prepend (p->children, v);
+}
+
+int
+rcswalk_traverse_graph (RcsFile* rcs, RcsVersion* version, RcsVersion *parent)
+{
+  GSList *c;
+  int distance = -1;
+
+  version->cc = g_slist_length (version->children);
+
+  if (version->cc > 1)
+    rcs->branch_count += (version->cc - 1);
+
+  if (parent)
+    {
+      /* Insure that there is proper date ordering. */
+      if (version->date <= parent->date)
+	version->date = parent->date + 1;
+
+      if (parent->on_trunk && version->on_trunk)
+	rcs->reverse_count += 1;
+      else
+	rcs->forward_count += 1;
+    }
+
+  for (c = version->children; c; c = c->next)
+    {
+      int c_dist = rcswalk_traverse_graph (rcs, c->data, version);
+
+      distance = MAX (distance, c_dist);
+    }
+
+  if (version == rcs->head_version)
+    distance = 0;
+
+  if (distance >= 0)
+    {
+      version->chain_length = distance;
+
+      return distance + 1;
+    }
+
+  return -1;
+}
+
+void
+rcswalk_compute_chain_length (RcsFile* rcs, RcsVersion* version, RcsVersion *parent)
+{
+  GSList *c;
+
+  if (! parent)
+    {
+      g_assert (version->chain_length >= 0);
+    }
+  else if (version->chain_length < 0)
+    {
+      version->chain_length = parent->chain_length + 1;
+    }
+
+  for (c = version->children; c; c = c->next)
+    {
+      rcswalk_compute_chain_length (rcs, c->data, version);
+    }
+}
+
+int
+rcswalk_date_compare (const void* a, const void* b)
+{
+  RcsVersion **ra = (void*) a;
+  RcsVersion **rb = (void*) b;
+
+  return (*ra)->date - (*rb)->date;
+}
+
+int
+rcswalk_build_graph (RcsFile* rcs)
+{
+  GHashTable* hash = g_hash_table_new (g_str_hash, g_str_equal);
+  int i;
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    g_hash_table_insert (hash, rcs->versions[i].vname, rcs->versions + i);
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    {
+      RcsVersion *v = rcs->versions + i;
+
+      v->chain_length = -1;
+      v->rcs = rcs;
+
+      rcswalk_find_parent (rcs, hash, v);
+    }
+
+  rcs->head_version = g_hash_table_lookup (hash, rcs->headname);
+
+  rcswalk_traverse_graph (rcs, rcs->root_version, NULL);
+
+  rcswalk_compute_chain_length (rcs, rcs->root_version, NULL);
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    rcs->versions_date[i] = rcs->versions + i;
+
+  qsort (rcs->versions_date, rcs->version_count, sizeof (RcsVersion*), & rcswalk_date_compare);
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    {
+      RcsVersion *v = rcs->versions_date[i];
+
+      v->dateseq = i;
+    }
+
+  g_hash_table_destroy (hash);
+
+  return 0;
+}
+
+#define HEAD_STATE 0
+#define BAR_STATE 1
+#define REV_STATE 2
+#define DATE_STATE 3
+
+int
+rcswalk_load (RcsFile *rcs, gboolean *skip)
+{
+  FILE* rlog;
+  char cmdbuf[1024];
+  char oneline[1024], *oneline_p;
+  char rbuf[1024];
+  int version_i = 0, ret;
+  int read_state = HEAD_STATE;
+
+  sprintf (cmdbuf, "rlog %s", rcs->filename);
+
+  if (! (rlog = popen (cmdbuf, "r")))
+    {
+      g_warning ("popen failed: %s", cmdbuf);
+      return errno;
+    }
+
+  rcs->headname = NULL;
+
+  while (fgets (oneline, 1024, rlog))
+    {
+      oneline_p = oneline;
+
+      if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "total revisions: "))
+	{
+	  if (sscanf (oneline_p, "%d", & rcs->version_count) != 1)
+	    goto badscan;
+
+	  rcs->versions = g_new0 (RcsVersion, rcs->version_count);
+	  rcs->versions_date = g_new (RcsVersion*, rcs->version_count);
+	  read_state = BAR_STATE;
+	}
+      else if (read_state == HEAD_STATE && rcswalk_match (& oneline_p, "head: "))
+	{
+	  if (sscanf (oneline_p, "%s", rbuf) != 1)
+	    goto badscan;
+
+	  rcs->headname = g_strdup (rbuf);
+	  read_state = HEAD_STATE; /* no change */
+	}
+      else if (read_state == BAR_STATE && rcswalk_match (& oneline_p, "----------------------------"))
+	{
+	  read_state = REV_STATE;
+	}
+      else if (read_state == REV_STATE && rcswalk_match (& oneline_p, "revision "))
+	{
+	  if (version_i >= rcs->version_count)
+	    {
+	      /* jkh likes to insert the rlog of one RCS file into the log
+	       * message of another, and this can confuse things.  Why, oh why,
+	       * doesn't rlog have an option to not print the log?
+	       */
+	      fprintf (stderr, "rcswalk: too many versions: skipping file %s\n", rcs->filename);
+	      *skip = TRUE;
+	      skip_count += 1;
+	      pclose (rlog);
+	      return 0;
+	    }
+
+	  if (sscanf (oneline_p, "%s", rbuf) != 1)
+	    goto badscan;
+
+	  rcs->versions[version_i].vname = g_strdup (rbuf);
+	  read_state = DATE_STATE;
+
+	  g_assert (rcs->versions[version_i].vname);
+	}
+      else if (read_state == DATE_STATE && rcswalk_match (& oneline_p, "date: "))
+	{
+	  char* semi = strchr (oneline_p, ';');
+
+	  if (! semi)
+	    goto badscan;
+
+	  strncpy (rbuf, oneline_p, semi - oneline_p);
+
+	  rbuf[semi - oneline_p] = 0;
+
+	  rcs->versions[version_i].date = str2time (rbuf, 0, 0);
+
+	  version_i += 1;
+	  read_state = BAR_STATE;
+	}
+    }
+
+  if (! rcs->headname)
+    {
+      fprintf (stderr, "rcswalk: no head version: skipping file %s\n", rcs->filename);
+      *skip = TRUE;
+      skip_count += 1;
+      pclose (rlog);
+      return 0;
+    }
+
+  if (pclose (rlog) < 0)
+    {
+      g_warning ("pclose failed: %s", cmdbuf);
+      return errno;
+    }
+
+  if ((ret = rcswalk_build_graph (rcs))) {
+    return ret;
+  }
+
+  return 0;
+
+ badscan:
+
+  pclose (rlog);
+
+  g_warning ("rlog syntax error");
+  return -1;
+}
+
+void
+rcswalk_free (RcsFile* rcs)
+{
+  int i;
+
+  for (i = 0; i < rcs->version_count; i += 1)
+    {
+      g_free (rcs->versions[i].vname);
+      g_slist_free (rcs->versions[i].children);
+    }
+
+  g_free (rcs->filename);
+  g_free (rcs->headname);
+  g_free (rcs->versions);
+  g_free (rcs->versions_date);
+  g_free (rcs);
+}
+
+int
+rcswalk_one (char* rcsfile, char* copyfile, RcsWalker* walker, RcsStats* stats, void* data)
+{
+  RcsFile* rcs;
+  int i, ret;
+  long long maxsize = 0;
+  gboolean skip = FALSE;
+
+  rcs = g_new0 (RcsFile, 1);
+
+  rcs->filename = g_strdup (rcsfile);
+  rcs->copyname = copyfile;
+
+  if ((ret = rcswalk_load (rcs, & skip))) {
+    return ret;
+  }
+
+  if (walker->min_versions > rcs->version_count)
+    {
+      small_count += 1;
+      skip = TRUE;
+    }
+
+  if (walker->max_versions < rcs->version_count)
+    {
+      large_count += 1;
+      skip = TRUE;
+    }
+
+  if (! skip)
+    {
+      process_count += 1;
+
+      if (walker->dateorder && (ret = rcswalk_dateorder (rcs, walker, stats, data))) {
+	return ret;
+      }
+
+      if (walker->delta_orig)
+	{
+	  int count = 0;
+
+	  if ((ret = rcswalk_delta_orig (rcs, walker, rcs->root_version, & count, data))) {
+	    return ret;
+	  }
+
+	  g_assert (count == (rcs->version_count - 1));
+	}
+
+      if (walker->delta_date && (ret = rcswalk_delta_date (rcs, walker, data))) {
+	return ret;
+      }
+
+      for (i = 0; i < rcs->version_count; i += 1)
+	{
+	  rcs->total_size += rcs->versions[i].size;
+	  maxsize = MAX (rcs->versions[i].size, maxsize);
+	}
+
+      stat_int_add_item (stats->version_stat, rcs->version_count);
+      stat_int_add_item (stats->forward_stat, rcs->forward_count);
+      stat_int_add_item (stats->reverse_stat, rcs->reverse_count);
+      stat_int_add_item (stats->branch_stat, rcs->branch_count);
+      stat_int_add_item (stats->unencoded_stat, rcs->total_size);
+      stat_int_add_item (stats->literal_stat, maxsize);
+
+      if (walker->onefile && (ret = walker->onefile (rcs, stats, data))) {
+	return ret;
+      }
+    }
+
+  rcswalk_free (rcs);
+
+  return 0;
+}
+
+int
+rcswalk_dir (const char* dir, RcsWalker* walker, RcsStats* stats, void* data, const char* copy_dir)
+{
+  int ret;
+  DIR* thisdir;
+  struct dirent* ent;
+
+  if (copy_dir && (ret = config_create_dir (copy_dir))) {
+    return ret;
+  }
+
+  if (! (thisdir = opendir (dir)))
+    {
+      g_warning ("opendir failed: %s", dir);
+      return errno;
+    }
+
+  while ((ent = readdir (thisdir)))
+    {
+      char* name = ent->d_name;
+      int len;
+      struct stat buf;
+      char* fullname;
+      char* copyname = NULL;
+
+      if (strcmp (name, ".") == 0)
+	continue;
+
+      if (strcmp (name, "..") == 0)
+	continue;
+
+      len = strlen (name);
+
+      fullname = g_strdup_printf ("%s/%s", dir, name);
+
+      if (copy_dir)
+	copyname = g_strdup_printf ("%s/%s", copy_dir, name);
+
+      if (len > 2 && strcmp (name + len - 2, ",v") == 0)
+	{
+	  if ((ret = rcswalk_one (fullname, copyname, walker, stats, data))) {
+	    goto abort;
+	  }
+	}
+      else
+	{
+	  if (stat (fullname, & buf) < 0)
+	    {
+	      g_warning ("stat failed: %s\n", fullname);
+	      goto abort;
+	    }
+
+	  if (S_ISDIR (buf.st_mode))
+	    {
+	      if ((ret = rcswalk_dir (fullname, walker, stats, data, copyname))) {
+		goto abort;
+	      }
+	    }
+	}
+
+      g_free (fullname);
+
+      if (copyname)
+	g_free (copyname);
+    }
+
+  if (closedir (thisdir) < 0)
+    {
+      g_warning ("closedir failed: %s", dir);
+      return errno;
+    }
+
+  return 0;
+
+ abort:
+
+  if (thisdir)
+    closedir (thisdir);
+
+  return -1;
+}
+
+void
+rcswalk_init (void)
+{
+  config_register (rcswalk_options, ARRAY_SIZE (rcswalk_options));
+}
+
+int
+rcswalk (RcsWalker *walker, const char* copy_base)
+{
+  void* data = NULL;
+  RcsStats stats;
+  int ret;
+
+  skip_count = 0;
+  small_count = 0;
+  process_count = 0;
+  large_count = 0;
+
+  memset (& stats, 0, sizeof (stats));
+
+  stats.avg_version_size = stat_bincount_new ("AvgVersionSize"); /* @@@ leak */
+  stats.version_stat = stat_int_new ("Version"); /* @@@ leak */
+  stats.forward_stat = stat_int_new ("Forward"); /* @@@ leak */
+  stats.reverse_stat = stat_int_new ("Reverse"); /* @@@ leak */
+  stats.branch_stat  = stat_int_new ("Branch"); /* @@@ leak */
+  stats.unencoded_stat = stat_int_new ("Unencoded"); /* @@@ leak */
+  stats.literal_stat   = stat_int_new ("Literal"); /* @@@ leak */
+
+  tmp_file_1 = g_strdup_printf ("%s/rcs1.%d", g_get_tmp_dir (), (int) getpid ());
+  tmp_file_2 = g_strdup_printf ("%s/rcs2.%d", g_get_tmp_dir (), (int) getpid ());
+
+  if (walker->initialize)
+    data = walker->initialize ();
+
+  if ((ret = rcswalk_dir (rcswalk_input_dir, walker, & stats, data, copy_base))) {
+    return ret;
+  }
+
+  if (walker->finalize)
+    {
+      if ((ret = walker->finalize (& stats, data))) {
+	return ret;
+      }
+    }
+
+  unlink (tmp_file_1);
+  unlink (tmp_file_2);
+
+  fprintf (stderr, "rcswalk: processed %d files: too small %d; too large: %d; damaged: %d\n", process_count, small_count, large_count, skip_count);
+
+  return 0;
+}
+
+/* Statistics
+ */
+
+void
+rcswalk_report (RcsStats* set)
+{
+  stat_bincount_report (set->avg_version_size);
+  stat_int_report (set->version_stat);
+  stat_int_report (set->forward_stat);
+  stat_int_report (set->reverse_stat);
+  stat_int_report (set->branch_stat);
+  stat_int_report (set->unencoded_stat);
+  stat_int_report (set->literal_stat);
+}
+
+/* Int stat
+ */
+IntStat*
+stat_int_new (const char* name)
+{
+  IntStat* s = g_new0 (IntStat, 1);
+
+  s->name = name;
+  s->values = g_array_new (FALSE, FALSE, sizeof (long long));
+
+  return s;
+}
+
+void
+stat_int_add_item (IntStat* stat, long long v)
+{
+  if (! stat->count)
+    stat->min = v;
+  stat->count += 1;
+  stat->min = MIN (v, stat->min);
+  stat->max = MAX (v, stat->max);
+  stat->sum += v;
+
+  g_array_append_val (stat->values, v);
+}
+
+double
+stat_int_stddev (IntStat *stat)
+{
+  double f = 0;
+  double m = (double) stat->sum / (double) stat->count;
+  double v;
+  int i;
+
+  for (i = 0; i < stat->count; i += 1)
+    {
+      long long x = g_array_index (stat->values, long long, i);
+
+      f += (m - (double) x) * (m - (double) x);
+    }
+
+  v = f / (double) stat->count;
+
+  return sqrt (v);
+}
+
+int
+ll_comp (const void* a, const void* b)
+{
+  const long long* lla = a;
+  const long long* llb = b;
+  return (*lla) - (*llb);
+}
+
+void
+stat_int_histogram (IntStat *stat)
+{
+  int i, consec;
+  long long cum = 0;
+
+  FILE* p_out;
+  FILE* s_out;
+
+  if (! (p_out = config_output ("%s.pop.hist", stat->name)))
+    abort ();
+
+  if (! (s_out = config_output ("%s.sum.hist", stat->name)))
+    abort ();
+
+  qsort (stat->values->data, stat->count, sizeof (long long), ll_comp);
+
+  for (i = 0; i < stat->count; i += consec)
+    {
+      long long ix = g_array_index (stat->values, long long, i);
+
+      for (consec = 1; (i+consec) < stat->count; consec += 1)
+	{
+	  long long jx = g_array_index (stat->values, long long, i+consec);
+
+	  if (ix != jx)
+	    break;
+	}
+
+      cum += consec * g_array_index (stat->values, long long, i);
+
+      fprintf (p_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) (i+consec) / (double) stat->count);
+      fprintf (s_out, "%qd, %0.3f\n", g_array_index (stat->values, long long, i), (double) cum / (double) stat->sum);
+    }
+
+  if (fclose (p_out) < 0 || fclose (s_out) < 0)
+    {
+      g_error ("fclose failed\n");
+    }
+}
+
+void
+stat_int_report (IntStat* stat)
+{
+  FILE* out;
+
+  if (! (out = config_output ("%s.stat", stat->name)))
+    abort ();
+
+  fprintf (out, "Name: %s\n", stat->name);
+  fprintf (out, "Count: %d\n", stat->count);
+  fprintf (out, "Min: %qd\n", stat->min);
+  fprintf (out, "Max: %qd\n", stat->max);
+  fprintf (out, "Sum: %qd\n", stat->sum);
+  fprintf (out, "Mean: %0.2f\n", (double) stat->sum / (double) stat->count);
+  fprintf (out, "Stddev: %0.2f\n", stat_int_stddev (stat));
+
+  if (fclose (out) < 0)
+    g_error ("fclose failed");
+
+  stat_int_histogram (stat);
+}
+
+/* Dbl stat
+ */
+
+DblStat*
+stat_dbl_new (const char* name)
+{
+  DblStat* s = g_new0 (DblStat, 1);
+
+  s->name = name;
+  s->values = g_array_new (FALSE, FALSE, sizeof (double));
+
+  return s;
+}
+
+void
+stat_dbl_add_item (DblStat* stat, double v)
+{
+  if (! stat->count)
+    stat->min = v;
+  stat->count += 1;
+  stat->min = MIN (v, stat->min);
+  stat->max = MAX (v, stat->max);
+  stat->sum += v;
+
+  g_array_append_val (stat->values, v);
+}
+
+double
+stat_dbl_stddev (DblStat *stat)
+{
+  double f = 0;
+  double m = stat->sum / stat->count;
+  double v;
+  int i;
+
+  for (i = 0; i < stat->count; i += 1)
+    {
+      double x = g_array_index (stat->values, double, i);
+
+      f += (m - x) * (m - x);
+    }
+
+  v = f / stat->count;
+
+  return sqrt (v);
+}
+
+int
+dbl_comp (const void* a, const void* b)
+{
+  const double* da = a;
+  const double* db = b;
+  double diff = (*da) - (*db);
+
+  if (diff > 0.0)
+    return 1;
+  else if (diff < 0.0)
+    return -1;
+  else
+    return 0;
+}
+
+void
+stat_dbl_histogram (DblStat *stat)
+{
+  int i, consec;
+  double cum = 0.0;
+
+  FILE* p_out;
+  FILE* s_out;
+
+  if (! (p_out = config_output ("%s.pop.hist", stat->name)))
+    abort ();
+
+  if (! (s_out = config_output ("%s.sum.hist", stat->name)))
+    abort ();
+
+  qsort (stat->values->data, stat->count, sizeof (double), dbl_comp);
+
+  for (i = 0; i < stat->count; i += consec)
+    {
+      double ix = g_array_index (stat->values, double, i);
+
+      for (consec = 1; (i+consec) < stat->count; consec += 1)
+	{
+	  double jx = g_array_index (stat->values, double, i+consec);
+
+	  if (ix != jx)
+	    break;
+	}
+
+      cum += ((double) consec) * g_array_index (stat->values, double, i);
+
+      fprintf (p_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), (double) (i+consec) / (double) stat->count);
+      fprintf (s_out, "%0.6f, %0.3f\n", g_array_index (stat->values, double, i), cum / stat->sum);
+    }
+
+  if (fclose (p_out) < 0 || fclose (s_out) < 0)
+    {
+      g_error ("fclose failed\n");
+    }
+}
+
+void
+stat_dbl_report (DblStat* stat)
+{
+  FILE* out;
+
+  if (! (out = config_output ("%s.stat", stat->name)))
+    abort ();
+
+  fprintf (out, "Name:   %s\n", stat->name);
+  fprintf (out, "Count:  %d\n", stat->count);
+  fprintf (out, "Min:    %0.6f\n", stat->min);
+  fprintf (out, "Max:    %0.6f\n", stat->max);
+  fprintf (out, "Sum:    %0.6f\n", stat->sum);
+  fprintf (out, "Mean:   %0.6f\n", stat->sum / stat->count);
+  fprintf (out, "Stddev: %0.6f\n", stat_dbl_stddev (stat));
+
+  if (fclose (out) < 0)
+    g_error ("fclose failed");
+
+  stat_dbl_histogram (stat);
+}
+
+/* Bincount
+ */
+BinCounter*
+stat_bincount_new (const char* name)
+{
+  BinCounter* bc = g_new0 (BinCounter, 1);
+
+  bc->name = name;
+  bc->bins = g_ptr_array_new ();
+
+  return bc;
+}
+
+void
+stat_bincount_add_item (BinCounter* bc, int bin, double val)
+{
+  GArray* one;
+  int last;
+
+  if (bin >= bc->bins->len)
+    {
+      g_ptr_array_set_size (bc->bins, bin+1);
+    }
+
+  if (! (one = bc->bins->pdata[bin]))
+    {
+      one = bc->bins->pdata[bin] = g_array_new (FALSE, TRUE, sizeof (double));
+    }
+
+  g_assert (one);
+
+  last = one->len;
+
+  g_array_set_size (one, last + 1);
+
+  g_array_index (one, double, last) = val;
+}
+
+void
+stat_bincount_report (BinCounter* bc)
+{
+  FILE *avg_out;
+  FILE *raw_out;
+  int i;
+
+  if (! (avg_out = config_output ("%s.avg", bc->name)))
+    abort ();
+
+  if (! (raw_out = config_output ("%s.raw", bc->name)))
+    abort ();
+
+  for (i = 0; i < bc->bins->len; i += 1)
+    {
+      GArray* one = bc->bins->pdata[i];
+
+      double sum = 0.0;
+      int j;
+
+      for (j = 0; j < one->len; j += 1)
+	{
+	  double d = g_array_index (one, double, j);
+
+	  sum += d;
+
+	  fprintf (raw_out, "%e ", d);
+	}
+
+      fprintf (raw_out, "\n");
+      fprintf (avg_out, "%e %d\n", sum / one->len, one->len);
+    }
+
+  if (fclose (avg_out) < 0)
+    g_error ("fclose failed");
+
+  if (fclose (raw_out) < 0)
+    g_error ("fclose failed");
+}
+
+/* Config stuff
+ */
+
+int
+config_create_dir (const char* dirname)
+{
+  struct stat buf;
+
+  if (stat (dirname, & buf) < 0)
+    {
+      if (mkdir (dirname, 0777) < 0)
+	{
+	  fprintf (stderr, "mkdir failed: %s\n", dirname);
+	  return errno;
+	}
+    }
+  else
+    {
+      if (! S_ISDIR (buf.st_mode))
+	{
+	  fprintf (stderr, "not a directory: %s\n", dirname);
+	  return errno;
+	}
+    }
+
+  return 0;
+}
+
+int
+config_clear_dir (const char* dir)
+{
+  char buf[1024];
+
+  if (dir)
+    {
+      sprintf (buf, "rm -rf %s", dir);
+
+      system (buf);
+    }
+
+  return 0;
+}
+
+static ConfigOption all_options[64];
+static int          option_count;
+
+void
+config_init ()
+{
+  static gboolean once = FALSE;
+  if (! once)
+    {
+      once = TRUE;
+      config_register (config_options, ARRAY_SIZE (config_options));
+    }
+}
+
+void
+config_register (ConfigOption *opts, int nopts)
+{
+  int i;
+
+  config_init ();
+
+  for (i = 0; i < nopts; i += 1)
+    {
+      all_options[option_count++] = opts[i];
+    }
+}
+
+void
+config_set_string (const char* var, const char* val)
+{
+  int i;
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      if (strcmp (opt->name, var) == 0)
+	{
+	  (* (const char**) opt->value) = val;
+	  opt->found = TRUE;
+	  return;
+	}
+    }
+}
+
+int
+config_parse (const char* config_file)
+{
+  FILE *in;
+  char oname[1024], value[1024];
+  int i;
+
+  if (! (in = fopen (config_file, "r")))
+    {
+      fprintf (stderr, "fopen failed: %s\n", config_file);
+      return errno;
+    }
+
+  for (;;)
+    {
+      ConfigOption *opt = NULL;
+
+      if (fscanf (in, "%s", oname) != 1)
+	break;
+
+      for (i = 0; i < option_count; i += 1)
+	{
+	  if (strcmp (oname, all_options[i].name) == 0)
+	    {
+	      opt = all_options + i;
+	      break;
+	    }
+	}
+
+      if (opt && opt->arg == CO_None)
+	{
+	  (* (gboolean*) opt->value) = TRUE;
+	  opt->found = TRUE;
+	  continue;
+	}
+
+      if (fscanf (in, "%s", value) != 1)
+	{
+	  fprintf (stderr, "no value for option: %s; file: %s\n", oname, config_file);
+	  goto abort;
+	}
+
+      if (! opt)
+	{
+	  /*fprintf (stderr, "unrecognized option: %s\n", oname);*/
+	  continue;
+	}
+
+      switch (opt->type)
+	{
+	case CD_Bool:
+
+	  if (strcasecmp (value, "yes") == 0 ||
+	      strcasecmp (value, "true") == 0 ||
+	      strcmp     (value, "1") == 0 ||
+	      strcasecmp (value, "on") == 0)
+	    {
+	      ((gboolean*) opt->value) = TRUE;
+	    }
+	  else
+	    {
+	      ((gboolean*) opt->value) = FALSE;
+	    }
+
+	  break;
+	case CD_Int32:
+
+	  if (sscanf (value, "%d", (gint32*) opt->value) != 1)
+	    {
+	      fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file);
+	      goto abort;
+	    }
+
+	  break;
+	case CD_Double:
+
+	  if (sscanf (value, "%lf", (double*) opt->value) != 1)
+	    {
+	      fprintf (stderr, "parse error for option: %s; file: %s\n", oname, config_file);
+	      goto abort;
+	    }
+
+	  break;
+	case CD_String:
+
+	  (* (const char**) opt->value) = g_strdup (value);
+
+	  break;
+	}
+
+      opt->found = TRUE;
+    }
+
+  fclose (in);
+
+  return 0;
+
+ abort:
+
+  fclose (in);
+
+  return -1;
+}
+
+int
+config_compute_output_dir ()
+{
+  char tmp[1024];
+  char buf[1024];
+  int i;
+  gboolean last = FALSE;
+
+  buf[0] = 0;
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      if (opt->style == CS_Ignore)
+	continue;
+
+      if (! opt->found)
+	continue;
+
+      if (last)
+	strcat (buf, ",");
+
+      last = TRUE;
+
+      strcat (buf, opt->abbrev);
+      strcat (buf, "=");
+
+      switch (opt->type)
+	{
+	case CD_Bool:
+
+	  if (* (gboolean*) opt->value)
+	    strcat (buf, "true");
+	  else
+	    strcat (buf, "false");
+
+	  break;
+	case CD_Int32:
+
+	  sprintf (tmp, "%d", (* (gint32*) opt->value));
+	  strcat (buf, tmp);
+
+	  break;
+	case CD_Double:
+
+	  sprintf (tmp, "%0.2f", (* (double*) opt->value));
+	  strcat (buf, tmp);
+
+	  break;
+	case CD_String:
+
+	  if (opt->style == CS_UseAsFile)
+	    {
+	      const char* str = (* (const char**) opt->value);
+	      const char* ls = strrchr (str, '/');
+
+	      strcat (buf, ls ? (ls + 1) : str);
+	    }
+	  else
+	    {
+	      strcat (buf, (* (const char**) opt->value));
+	    }
+
+	  break;
+	}
+    }
+
+  config_output_dir = g_strdup_printf ("%s/%s", config_output_base, buf);
+
+  return 0;
+}
+
+int
+config_done (void)
+{
+  int i, ret;
+  FILE *out;
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      if (! opt->found && opt->arg == CO_Required)
+	{
+	  fprintf (stderr, "required option not found: %s\n", all_options[i].name);
+	  return -1;
+	}
+    }
+
+  if ((ret = config_compute_output_dir ())) {
+    return ret;
+  }
+
+  if ((ret = config_clear_dir (config_output_dir))) {
+    return ret;
+  }
+
+  if ((ret = config_create_dir (config_output_dir))) {
+    return ret;
+  }
+
+  if (! (out = config_output ("Options")))
+    abort ();
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      fprintf (out, "option: %s; value: ", all_options[i].name);
+
+      switch (opt->type)
+	{
+	case CD_Bool:
+
+	  fprintf (out, "%s", (* (gboolean*) opt->value) ? "TRUE" : "FALSE");
+
+	  break;
+	case CD_Int32:
+
+	  fprintf (out, "%d", (* (gint32*) opt->value));
+
+	  break;
+	case CD_Double:
+
+	  fprintf (out, "%0.2f", (* (double*) opt->value));
+
+	  break;
+	case CD_String:
+
+	  fprintf (out, "%s", (* (const char**) opt->value));
+
+	  break;
+	}
+
+      fprintf (out, "\n");
+    }
+
+  if (fclose (out))
+    {
+      fprintf (stderr, "fclose failed\n");
+      return errno;
+    }
+
+  return 0;
+}
+
+const char*
+config_help_arg (ConfigOption *opt)
+{
+  switch (opt->arg)
+    {
+    case CO_Required:
+      return "required";
+    case CO_Optional:
+      return "optional";
+    case CO_None:
+      return "no value";
+    }
+
+  return "unknown";
+}
+
+const char*
+config_help_type (ConfigOption *opt)
+{
+  switch (opt->arg)
+    {
+    case CO_None:
+      return "boolean";
+    default:
+      break;
+    }
+
+  switch (opt->type)
+    {
+    case CD_Bool:
+      return "boolean";
+    case CD_Int32:
+      return "int";
+    case CD_Double:
+      return "double";
+    case CD_String:
+      return "string";
+    }
+
+  return "unknown";
+}
+
+void
+config_help (void)
+{
+  int i;
+
+  fprintf (stderr, "Expecting the following options in one or more config files on the command line:\n");
+
+  for (i = 0; i < option_count; i += 1)
+    {
+      ConfigOption *opt = all_options + i;
+
+      fprintf (stderr, "%s: %s %s\n",
+	       opt->name,
+	       config_help_arg (opt),
+	       config_help_type (opt));
+    }
+}
+
+FILE*
+config_output (const char* format, ...)
+{
+  gchar *buffer;
+  gchar *file;
+  va_list args;
+  FILE *f;
+
+  va_start (args, format);
+  buffer = g_strdup_vprintf (format, args);
+  va_end (args);
+
+  file = g_strdup_printf ("%s/%s", config_output_dir, buffer);
+
+  if (! (f = fopen (file, "w")))
+    g_error ("fopen failed: %s\n", buffer);
+
+  g_free (file);
+
+  g_free (buffer);
+
+  return f;
+}
+
+
+#include <edsio.h>
+#include <edsiostdio.h>
+#include <ctype.h>
+#include "xdfs.h"
+
+/* Warning: very cheesy!
+ */
+
+#ifdef DEBUG_EXTRACT
+  FileHandle *fh2 = handle_read_file (filename);
+
+  guint8* debug_buf = g_malloc (buflen);
+
+  if (! handle_read (fh2, debug_buf, buflen))
+    g_error ("read failed");
+#endif
+
+gboolean
+rcs_count (const char* filename, guint *encoded_size)
+{
+  char *readbuf0, *readbuf;
+  gboolean in_string = FALSE;
+  gboolean in_text = FALSE;
+  guint string_start = 0;
+  guint string_end = 0;
+  guint current_pos = 0;
+  /*char *current_delta = NULL;*/
+  FileHandle *fh = handle_read_file (filename);
+  guint buflen = handle_length (fh);
+
+  (* encoded_size) = 0;
+
+  readbuf0 = g_new (guint8, buflen);
+
+  for (;;)
+    {
+      int c = handle_gets (fh, readbuf0, buflen);
+
+      readbuf = readbuf0;
+
+      if (c < 0)
+	break;
+
+      if (strncmp (readbuf, "text", 4) == 0)
+	in_text = TRUE;
+
+      if (! in_string && readbuf[0] == '@')
+	{
+	  string_start = current_pos + 1;
+	  in_string = TRUE;
+	  readbuf += 1;
+	}
+
+      current_pos += c;
+
+      if (in_string)
+	{
+	  while ((readbuf = strchr (readbuf, '@')))
+	    {
+	      if (readbuf[1] == '@')
+		{
+		  string_start += 1; /* @@@ bogus, just counting. */
+		  readbuf += 2;
+		  continue;
+		}
+
+	      in_string = FALSE;
+	      break;
+	    }
+
+	  string_end = current_pos - 2;
+
+	  if (in_text && ! in_string)
+	    {
+	      in_text = FALSE;
+
+	      /*g_free (current_delta);
+		current_delta = NULL;*/
+
+	      (* encoded_size) += (string_end - string_start);
+	    }
+
+	  continue;
+	}
+
+      if (isdigit (readbuf[0]))
+	{
+#if 0
+	  (* strchr (readbuf, '\n')) = 0;
+	  if (current_delta)
+	    g_free (current_delta);
+	  current_delta = g_strdup (readbuf);
+#endif
+	}
+    }
+
+  handle_close (fh);
+
+  g_free (readbuf0);
+
+#if 0
+  if (current_delta)
+    g_free (current_delta);
+#endif
+
+  return TRUE;
+}
+
+#if 0
+int
+main (int argc, char** argv)
+{
+  guint size;
+
+  if (argc != 2)
+    g_error ("usage: %s RCS_file\n", argv[0]);
+
+  if (! rcs_count (argv[1], &size))
+    g_error ("rcs_parse failed");
+
+  return 0;
+}
+#endif
diff --git a/third-party/xdelta3/xdelta3/run_release.sh b/third-party/xdelta3/xdelta3/run_release.sh
new file mode 100644
index 0000000000..4f76d09f2a
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/run_release.sh
@@ -0,0 +1,288 @@
+#!/bin/bash
+
+# Run from the source dir.
+SRCDIR=${PWD}
+
+# TODO replace w/ wget
+LZMA="xz-5.2.1"
+LZMA_FILE="${SRCDIR}/../${LZMA}.tar.gz"
+
+MAKEFLAGS="-j 10"
+
+BUILDDIR=${SRCDIR}/build
+LZMASRC=${BUILDDIR}/${LZMA}
+
+NONWIN_CFLAGS=""
+MINGW_CFLAGS="-DEXTERNAL_COMPRESSION=0 -DXD3_WIN32=1 -DSHELL_TESTS=0"
+
+MYOS=`uname`
+DATE=`date`
+
+CLEAN=""
+
+LINUXTGTS=""
+LINUXTEST1=""
+LINUXTEST2=""
+
+WINTGTS=""
+WINTEST1=""
+WINTEST2=""
+
+OSXTGTS=""
+OSXTEST1=""
+OSXTEST2=""
+
+XTMP="/tmp"
+if [ "${TMP}" != "" ]; then
+    XTMP="${TMP}"
+fi
+if [ "${TMPDIR}" != "" ]; then
+    XTMP="${TMPDIR}"
+fi
+
+BUILDFILES=`ls -A ${BUILDDIR} 2> /dev/null`
+if [ -d "${BUILDDIR}" ]; then
+    if [ -n "${BUILDFILES}" ]; then
+	echo "Directory ${BUILDDIR} should be empty"
+	exit 1
+    fi
+else
+    mkdir "${BUILDDIR}"
+fi
+
+function setup {
+    libtoolize || glibtoolize
+    automake --add-missing
+    aclocal -I m4
+    autoheader
+    automake
+    autoconf
+}
+
+function try {
+    local w=$1
+    shift
+    local dir=$1
+    shift
+    echo -n "	${w} ... "
+    (cd "${dir}" && "$@" >${w}.stdout 2>${w}.stderr)
+    local s=$?
+    if [ ${s} -eq 0 ]; then
+	echo " success"
+    else
+	echo " failed!"
+	echo "Error $1 in ${dir}" >&2
+    fi
+    return ${s}
+}
+
+function buildlzma {
+    host=$1
+    march=$2
+    local target="${BUILDDIR}/lib-${host}${march}"
+
+    echo "	... liblzma"
+    
+    mkdir -p ${target}
+
+    try configure-lzma ${target} ${LZMASRC}/configure \
+	--host=${host} \
+	--prefix=${target} \
+	--disable-shared \
+	"CC=${CC}" \
+	"CXX=${CXX}" \
+	"CFLAGS=${march}" \
+	"CXXFLAGS=${march}" \
+	"LDFLAGS=${march}"
+    if [ $? -ne 0 ]; then
+	return
+    fi
+
+    try build-lzma ${target} make ${MAKEFLAGS}
+    if [ $? -ne 0 ]; then
+    	return
+    fi
+    try install-lzma ${target} make install
+    if [ $? -ne 0 ]; then
+    	return
+    fi
+}
+
+function buildit {
+    local host=$1
+    local march=$2
+    local usizebits=$3
+    local offsetbits=$4
+    local cargs=$5
+    local afl=$6
+    local BM="${host}${march}"
+    local USECC="${CC}"
+    local USECXX="${CXX}"
+    local LIBBM="${BM}"
+
+    if [ "${afl}" = "1" ]; then
+	USECC="afl-gcc"
+	USECXX="afl-g++"
+	BM="${BM}-afl"
+    fi
+
+    local D="build/${BM}/usize${usizebits}/xoff${offsetbits}"
+    local BMD="${BM}-${usizebits}-${offsetbits}"
+
+    local FULLD="${SRCDIR}/${D}"
+    local CFLAGS="${march} ${cargs} -I${SRCDIR}/build/lib-${LIBBM}/include"
+    local CXXFLAGS="${march} ${cargs} -I${SRCDIR}/build/lib-${LIBBM}/include"
+    local CPPFLAGS="-I${SRCDIR}/build/lib-${LIBBM}/include"
+    local LDFLAGS="${march} -L${SRCDIR}/build/lib-${LIBBM}/lib"
+
+    local EXEC_PREAMBLE=""
+    local EXEC_SUFFIX=""
+
+    case ${host} in
+	*mingw*)
+	    EXEC_PREAMBLE="wine"
+	    EXEC_SUFFIX=".exe"
+	    ;;
+    esac
+    
+    mkdir -p ${D}
+
+    echo "	... ${BMD}"
+    
+    cat >> Makefile.test <<EOF
+
+# ${BMD}
+# ${CFLAGS}
+.PHONY: build-${BMD}
+build-${BMD}:
+	(cd ${D} && make all && make install)
+
+.PHONY: clean-${BMD}
+clean-${BMD}:
+	(cd ${D} && make clean)
+
+.PHONY: regtest-${BMD}
+regtest-${BMD}:
+	(cd ${D} && ${EXEC_PREAMBLE} ./bin/xdelta3regtest${EXEC_SUFFIX} 1> \${TMP}/regtest.${BMD}.stdout 2> \${TMP}/regtest.${BMD}.stderr)
+
+.PHONY: selftest-${BMD}
+selftest-${BMD}:
+	(cd ${D} && ${EXEC_PREAMBLE} ./bin/xdelta3${EXEC_SUFFIX} test 1> \${TMP}/selftest.${BMD}.stdout 2> \${TMP}/selftest.${BMD}.stderr)
+
+
+EOF
+
+    case ${host} in
+	*linux*)
+	    LINUXTGTS="${LINUXTGTS} build-${BMD}"
+	    LINUXTEST1="${LINUXTEST1} selftest-${BMD}"
+	    LINUXTEST2="${LINUXTEST2} regtest-${BMD}"
+	    ;;
+	*mingw*)
+	    WINTGTS="${WINTGTS} build-${BMD}"
+	    WINTEST1="${WINTEST1} selftest-${BMD}"
+	    WINTEST2="${WINTEST2} regtest-${BMD}"
+	    ;;
+	*apple*)
+	    OSXTGTS="${OSXTGTS} build-${BMD}"
+	    OSXTEST1="${OSXTEST1} selftest-${BMD}"
+	    OSXTEST2="${OSXTEST2} regtest-${BMD}"
+	    ;;
+    esac
+    CLEAN="${CLEAN} clean-${BMD}"
+
+    try configure-xdelta ${FULLD} ${SRCDIR}/configure \
+    		  --host=${host} \
+    		  --prefix=${FULLD} \
+    		  --enable-static \
+    		  --disable-shared \
+    		  --enable-debug-symbols \
+		  "CFLAGS=${CFLAGS}" \
+		  "CXXFLAGS=${CXXFLAGS}" \
+		  "CPPFLAGS=${CPPFLAGS}" \
+		  "LDFLAGS=${LDFLAGS}" \
+		  "CC=${USECC}" \
+		  "CXX=${USECXX}"
+    if [ $? -ne 0 ]; then
+	return
+    fi
+
+    # try build-xdelta ${FULLD} make ${MAKEFLAGS} all
+    # if [ $? -ne 0 ]; then
+    # 	return
+    # fi
+
+    # try install-xdelta ${FULLD} make install
+}
+
+function buildall {
+    echo ""
+    echo "Host $1$2 afl=$4"
+    echo ""
+
+    buildlzma "$1" "$2"
+    buildit "$1" "$2" 32 32 "-DXD3_USE_LARGESIZET=0 -DXD3_USE_LARGEFILE64=0 $3" "$4"
+    buildit "$1" "$2" 32 64 "-DXD3_USE_LARGESIZET=0 -DXD3_USE_LARGEFILE64=1 $3" "$4"
+    buildit "$1" "$2" 64 64 "-DXD3_USE_LARGESIZET=1 -DXD3_USE_LARGEFILE64=1 $3" "$4"
+}
+
+setup
+
+try untar-lzma ${BUILDDIR} tar -xvf "${LZMA_FILE}"
+if [ $? -ne 0 ]; then
+    exit $?
+fi
+
+cat > Makefile.test <<EOF
+# Auto-generated ${DATE} -*- Mode: Makefile -*-
+TMP = ${XTMP}
+
+all: linux windows apple
+
+EOF
+
+# Native compiles
+if [ "${MYOS}" == "Linux" ]; then
+    # Linux
+    buildall x86_64-pc-linux-gnu -m32 "${NONWIN_CFLAGS}" "0"
+    buildall x86_64-pc-linux-gnu -m32 "${NONWIN_CFLAGS}" "1"
+    buildall x86_64-pc-linux-gnu -m64 "${NONWIN_CFLAGS}" "0"
+    buildall x86_64-pc-linux-gnu -m64 "${NONWIN_CFLAGS}" "1"
+fi
+
+if [ "${MYOS}" == "Darwin" ]; then
+    # OS X
+    buildall x86_64-apple-darwin -m32 "${NONWIN_CFLAGS}" "0"
+    buildall x86_64-apple-darwin -m64 "${NONWIN_CFLAGS}" "0"
+fi
+
+# Cross compile
+buildall i686-w64-mingw32 -mconsole "${MINGW_CFLAGS}" "0"
+buildall x86_64-w64-mingw32 -mconsole "${MINGW_CFLAGS}" "0"
+
+cat >> Makefile.test <<EOF
+
+clean: ${CLEAN}
+
+.PHONY: linux windows apple
+.PHONY: linux-build windows-build apple-build
+.PHONY: linux-selftest windows-selftest apple-selftest
+.PHONY: linux-regtest windows-regtest apple-regtest
+
+linux: linux-build linux-selftest linux-regtest
+windows: windows-build windows-selftest windows-regtest
+apple: apple-build apple-selftest apple-regtest
+
+linux-build: ${LINUXTGTS}
+linux-selftest: ${LINUXTEST1}
+linux-regtest: ${LINUXTEST2}
+
+windows-build: ${WINTGTS}
+windows-selftest: ${WINTEST1}
+windows-regtest: ${WINTEST2}
+
+apple-build: ${OSXTGTS}
+apple-selftest: ${OSXTEST1}
+apple-regtest: ${OSXTEST2}
+
+EOF
diff --git a/third-party/xdelta3/xdelta3/testing/checksum_test.cc b/third-party/xdelta3/xdelta3/testing/checksum_test.cc
new file mode 100644
index 0000000000..9cd37407d1
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/checksum_test.cc
@@ -0,0 +1,770 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#include "test.h"
+#include <assert.h>
+#include <list>
+#include <vector>
+#include <algorithm>
+
+#include "../cpp-btree/btree_map.h"
+
+extern "C" {
+uint32_t xd3_large32_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
+uint32_t xd3_large32_cksum_update_old (xd3_hash_cfg *cfg, uint32_t cksum, 
+				       const uint8_t *base, const usize_t look);
+
+uint64_t xd3_large64_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
+uint64_t xd3_large64_cksum_update_old (xd3_hash_cfg *cfg, uint64_t cksum, 
+				       const uint8_t *base, const usize_t look);
+}
+
+using btree::btree_map;
+using std::list;
+using std::vector;
+
+// MLCG parameters
+// a, a*
+uint32_t good_32bit_values[] = {
+  1597334677U, // ...
+  741103597U, 887987685U,
+};
+
+// a, a*
+uint64_t good_64bit_values[] = {
+  1181783497276652981ULL, 4292484099903637661ULL,
+  7664345821815920749ULL, // ...
+};
+
+void print_header() {
+  static int hdr_cnt = 0;
+  if (hdr_cnt++ % 20 == 0) {
+    printf("%-32sConf\t\tCount\tUniq\tFull\tCover\tColls"
+	   "\tMB/s\tIters\t#Colls\n", "Name");
+  }
+}
+
+struct true_type { };
+struct false_type { };
+
+template <typename Word>
+usize_t bitsof();
+
+template<>
+usize_t bitsof<unsigned int>() {
+  return sizeof(unsigned int) * 8;
+}
+
+template<>
+usize_t bitsof<unsigned long>() {
+  return sizeof(unsigned long) * 8;
+}
+
+template<>
+usize_t bitsof<unsigned long long>() {
+  return sizeof(unsigned long long) * 8;
+}
+
+template <typename Word>
+struct hhash {  // shift "s" bits leaving the high bits as a hash value for
+		// this checksum, which are the most "distant" in terms of the
+		// spectral test for the rabin_karp MLCG.  For short windows,
+		// the high bits aren't enough, XOR "mask" worth of these in.
+  Word operator()(const Word t, const Word s, const Word mask) {
+    return (t >> s) ^ (t & mask);
+  }
+};
+
+template <typename Word>
+Word good_word();
+
+template<>
+uint32_t good_word<uint32_t>() {
+  return good_32bit_values[0];
+}
+
+template<>
+uint64_t good_word<uint64_t>() {
+  return good_64bit_values[0];
+}
+
+// CLASSES
+
+#define SELF Word, CksumSize, CksumSkip, Hash, Compaction
+#define MEMBER template <typename Word,		\
+			 int CksumSize,		\
+			 int CksumSkip,		\
+			 typename Hash,		\
+                         int Compaction>
+
+MEMBER
+struct cksum_params {
+  typedef Word word_type;
+  typedef Hash hash_type;
+
+  static const int cksum_size = CksumSize;
+  static const int cksum_skip = CksumSkip;
+  static const int compaction = Compaction;
+};
+
+MEMBER
+struct rabin_karp : public cksum_params<SELF> {
+  // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
+  rabin_karp()
+    : powers(make_powers()),
+      product(powers[0] * good_word<Word>()),
+      incr_state(0) { }
+
+  static Word* make_powers() {
+    Word *p = new Word[CksumSize];
+    p[CksumSize - 1] = 1;
+    for (int i = CksumSize - 2; i >= 0; i--) {
+      p[i] = p[i + 1] * good_word<Word>();
+    }
+    return p;
+  }
+
+  ~rabin_karp() {
+    delete [] powers;
+  }
+
+  Word step(const uint8_t *ptr) {
+    Word h = 0;
+    for (int i = 0; i < CksumSize; i++) {
+      h += (ptr[i]) * powers[i];
+    }
+    return h;
+  }
+
+  Word state0(const uint8_t *ptr) {
+    incr_state = step(ptr);
+    return incr_state;
+  }
+
+  Word incr(const uint8_t *ptr) {
+    incr_state = good_word<Word>() * incr_state -
+      product * (ptr[-1]) + (ptr[CksumSize - 1]);
+    return incr_state;
+  }
+
+  const Word *const powers;
+  const Word  product;
+  Word        incr_state;
+};
+
+MEMBER
+struct with_stream : public cksum_params<SELF> {
+  xd3_stream stream;
+
+  with_stream()
+  {
+    xd3_config cfg;
+    memset (&stream, 0, sizeof (stream));
+    xd3_init_config (&cfg, 0);
+    cfg.smatch_cfg = XD3_SMATCH_SOFT;
+    cfg.smatcher_soft.large_look = CksumSize;
+    cfg.smatcher_soft.large_step = CksumSkip;
+    cfg.smatcher_soft.small_look = 4;
+    cfg.smatcher_soft.small_chain = 4;
+    cfg.smatcher_soft.small_lchain = 4;
+    cfg.smatcher_soft.max_lazy = 4;
+    cfg.smatcher_soft.long_enough = 4;
+    CHECK_EQ(0, xd3_config_stream (&stream, &cfg));
+
+    CHECK_EQ(0, xd3_size_hashtable (&stream,
+				    1<<10 /* ignored */,
+				    stream.smatcher.large_look,
+				    & stream.large_hash));
+  }
+  ~with_stream() 
+  {
+    xd3_free_stream (&stream);
+  }
+};
+
+MEMBER
+struct large_cksum : public with_stream<SELF> {
+  Word step(const uint8_t *ptr) {
+    return xd3_large_cksum (&this->stream.large_hash, ptr, CksumSize);
+  }
+
+  Word state0(const uint8_t *ptr) {
+    incr_state = step(ptr);
+    return incr_state;
+  }
+
+  Word incr(const uint8_t *ptr) {
+    incr_state = xd3_large_cksum_update (&this->stream.large_hash, 
+					 incr_state, ptr - 1, CksumSize);
+    return incr_state;
+  }
+
+  Word incr_state;
+};
+
+#if SIZEOF_USIZE_T == 4
+#define xd3_large_cksum_old         xd3_large32_cksum_old
+#define xd3_large_cksum_update_old  xd3_large32_cksum_update_old
+#elif SIZEOF_USIZE_T == 8
+#define xd3_large_cksum_old         xd3_large64_cksum_old
+#define xd3_large_cksum_update_old  xd3_large64_cksum_update_old
+#endif
+
+MEMBER
+struct large_cksum_old : public with_stream<SELF> {
+  Word step(const uint8_t *ptr) {
+    return xd3_large_cksum_old (&this->stream.large_hash, ptr, CksumSize);
+  }
+
+  Word state0(const uint8_t *ptr) {
+    incr_state = step(ptr);
+    return incr_state;
+  }
+
+  Word incr(const uint8_t *ptr) {
+    incr_state = xd3_large_cksum_update_old (&this->stream.large_hash, 
+					     incr_state, ptr - 1, CksumSize);
+    return incr_state;
+  }
+
+  Word incr_state;
+};
+
+// TESTS
+
+template <typename Word>
+struct file_stats {
+  typedef const uint8_t* ptr_type;
+  typedef Word word_type;
+  typedef btree::btree_multimap<word_type, ptr_type> table_type;
+  typedef typename table_type::iterator table_iterator;
+
+  usize_t cksum_size;
+  usize_t cksum_skip;
+  usize_t unique;
+  usize_t unique_values;
+  usize_t count;
+  table_type table;
+
+  file_stats(usize_t size, usize_t skip)
+    : cksum_size(size),
+      cksum_skip(skip),
+      unique(0),
+      unique_values(0),
+      count(0) {
+  }
+
+  void reset() {
+    unique = 0;
+    unique_values = 0;
+    count = 0;
+    table.clear();
+  }
+
+  void update(word_type word, ptr_type ptr) {
+    table_iterator t_i = table.find(word);
+
+    count++;
+    if (t_i != table.end()) {
+      int collisions = 0;
+      for (table_iterator p_i = t_i;
+	   p_i != table.end() && p_i->first == word;
+	   ++p_i) {
+	if (memcmp(p_i->second, ptr, cksum_size) == 0) {
+	  return;
+	}
+	collisions++;
+      }
+      if (collisions >= 1000) {
+	fprintf(stderr, "Something is not right, lots of collisions=%d\n", 
+		collisions);
+	abort();
+      }
+    } else {
+      unique_values++;
+    }
+    unique++;
+    table.insert(std::make_pair(word, ptr));
+    return;
+  }
+
+  void freeze() {
+    table.clear();
+  }
+};
+
+struct test_result_base;
+
+static vector<test_result_base*> all_tests;
+
+struct test_result_base {
+  virtual ~test_result_base() {
+  }
+  virtual void reset() = 0;
+  virtual void print() = 0;
+  virtual void get(const uint8_t* buf, const size_t buf_size, 
+		   usize_t iters) = 0;
+  virtual void stat() = 0;
+  virtual usize_t count() = 0;
+  virtual usize_t dups() = 0;
+  virtual double uniqueness() = 0;
+  virtual double fullness() = 0;
+  virtual double collisions() = 0;
+  virtual double coverage() = 0;
+  virtual double compression() = 0;
+  virtual double time() = 0;
+  virtual double total_time() = 0;
+  virtual usize_t total_count() = 0;
+  virtual usize_t total_dups() = 0;
+};
+
+template <typename Checksum>
+struct test_result : public test_result_base {
+  Checksum cksum;
+  const char *test_name;
+  file_stats<typename Checksum::word_type> fstats;
+  usize_t test_size;
+  usize_t n_steps;
+  usize_t n_incrs;
+  typename Checksum::word_type s_bits;
+  typename Checksum::word_type s_mask;
+  usize_t t_entries;
+  usize_t h_bits;
+  usize_t h_buckets_full;
+  char *hash_table;
+  long accum_millis;
+  usize_t accum_iters;
+
+  // These are not reset
+  double accum_time;
+  usize_t accum_count;
+  usize_t accum_dups;
+  usize_t accum_colls;
+  size_t accum_size;
+
+  test_result(const char *name)
+    : test_name(name),
+      fstats(Checksum::cksum_size, Checksum::cksum_skip),
+      hash_table(NULL),
+      accum_millis(0),
+      accum_iters(0),
+      accum_time(0.0),
+      accum_count(0),
+      accum_dups(0),
+      accum_colls(0),
+      accum_size(0) {
+    all_tests.push_back(this);
+  }
+
+  ~test_result() {
+    reset();
+  }
+
+  void reset() {
+    // size of file
+    test_size = 0;
+
+    // count
+    n_steps = 0;
+    n_incrs = 0;
+
+    // four values used by new_table()/summarize_table()
+    s_bits = 0;
+    s_mask = 0;
+    t_entries = 0;
+    h_bits = 0;
+    h_buckets_full = 0;
+
+    accum_millis = 0;
+    accum_iters = 0;
+
+    fstats.reset();
+
+    // temporary
+    if (hash_table) {
+      delete(hash_table);
+      hash_table = NULL;
+    }
+  }
+
+  usize_t count() {
+    if (Checksum::cksum_skip == 1) {
+      return n_incrs;
+    } else {
+      return n_steps;
+    }
+  }
+
+  usize_t dups() {
+    return fstats.count - fstats.unique;
+  }
+
+  /* Fraction of distinct strings of length cksum_size which are not
+   * represented in the hash table. */
+  double collisions() {
+    return (fstats.unique - fstats.unique_values) / (double) fstats.unique;
+  }
+  usize_t colls() {
+    return (fstats.unique - fstats.unique_values);
+  }
+
+  double uniqueness() {
+    return 1.0 - (double) dups() / count();
+  }
+
+  double fullness() {
+    return (double) h_buckets_full / (1 << h_bits);
+  }
+
+  double coverage() {
+    return (double) h_buckets_full / uniqueness() / count();
+  }
+
+  double compression() {
+    return 1.0 - coverage();
+  }
+
+  double time() {
+    return (double) accum_millis / accum_iters;
+  }
+
+  double total_time() {
+    return accum_time;
+  }
+
+  usize_t total_count() {
+    return accum_count;
+  }
+
+  usize_t total_dups() {
+    return accum_dups;
+  }
+
+  usize_t total_colls() {
+    return accum_dups;
+  }
+
+  void stat() {
+    accum_time += time();
+    accum_count += count();
+    accum_dups += dups();
+    accum_colls += colls();
+    accum_size += test_size;
+  }
+
+  void print() {
+    if (fstats.count != count()) {
+      fprintf(stderr, "internal error: %" W "d != %" W "d\n", fstats.count, count());
+      abort();
+    }
+    print_header();
+    printf("%-32s%d/%d 2^%" W "u\t%" W "u\t%0.4f\t%.4f\t%.4f\t%.1e\t%.2f\t"
+	   "%" W "u\t%" W "u\n",
+	   test_name,
+	   Checksum::cksum_size,
+	   Checksum::cksum_skip,
+	   h_bits,
+	   count(),
+	   uniqueness(),
+	   fullness(),
+	   coverage(),
+	   collisions(),
+	   0.001 * accum_iters * test_size / accum_millis,
+	   accum_iters,
+	   colls());
+  }
+
+  usize_t size_log2 (usize_t slots) {
+    usize_t bits = bitsof<typename Checksum::word_type>() - 1;
+    usize_t i;
+
+    for (i = 3; i <= bits; i += 1) {
+      if (slots <= (1U << i)) {
+	return i - Checksum::compaction;
+      }
+    }
+
+    return bits;
+  }
+
+  void new_table(usize_t entries) {
+    t_entries = entries;
+    h_bits = size_log2(entries);
+
+    usize_t n = 1 << h_bits;
+
+    s_bits = bitsof<typename Checksum::word_type>() - h_bits;
+    s_mask = n - 1U;
+
+    hash_table = new char[n / 8];
+    memset(hash_table, 0, n / 8);
+  }
+
+  int get_table_bit(usize_t i) {
+    return hash_table[i/8] & (1 << i%8);
+  }
+
+  int set_table_bit(usize_t i) {
+    return hash_table[i/8] |= (1 << i%8);
+  }
+
+  void summarize_table() {
+    usize_t n = 1 << h_bits;
+    usize_t f = 0;
+    for (usize_t i = 0; i < n; i++) {
+      if (get_table_bit(i)) {
+	f++;
+      }
+    }
+    h_buckets_full = f;
+  }
+
+  void get(const uint8_t* buf, const size_t buf_size, usize_t test_iters) {
+    typename Checksum::hash_type hash;
+    const uint8_t *ptr;
+    const uint8_t *end;
+    usize_t periods;
+    int64_t last_offset;
+    int64_t stop;
+
+    test_size = buf_size;
+    last_offset = buf_size - Checksum::cksum_size;
+
+    if (last_offset < 0) {
+      periods = 0;
+      n_steps = 0;
+      n_incrs = 0;
+      stop = -Checksum::cksum_size;
+    } else {
+      periods = last_offset / Checksum::cksum_skip;
+      n_steps = periods + 1;
+      n_incrs = last_offset + 1;
+      stop = last_offset - (periods + 1) * Checksum::cksum_skip;
+    }
+
+    // Compute file stats once.
+    if (fstats.unique_values == 0) {
+      if (Checksum::cksum_skip == 1) {
+	for (size_t i = 0; i <= buf_size - Checksum::cksum_size; i++) {
+	  fstats.update(hash(cksum.step(buf + i), s_bits, s_mask), buf + i);
+	}
+      } else {
+	ptr = buf + last_offset;
+	end = buf + stop;
+
+	for (; ptr != end; ptr -= Checksum::cksum_skip) {
+	  fstats.update(hash(cksum.step(ptr), s_bits, s_mask), ptr);
+	}
+      }
+      fstats.freeze();
+    }
+
+    long start_test = get_millisecs_now();
+
+    if (Checksum::cksum_skip != 1) {
+      new_table(n_steps);
+
+      for (usize_t i = 0; i < test_iters; i++) {
+	ptr = buf + last_offset;
+	end = buf + stop;
+
+	for (; ptr != end; ptr -= Checksum::cksum_skip) {
+	  set_table_bit(hash(cksum.step(ptr), s_bits, s_mask));
+	}
+      }
+
+      summarize_table();
+    }
+
+    stop = buf_size - Checksum::cksum_size + 1;
+    if (stop < 0) {
+      stop = 0;
+    }
+
+    if (Checksum::cksum_skip == 1) {
+      new_table(n_incrs);
+
+      for (usize_t i = 0; i < test_iters; i++) {
+	ptr = buf;
+	end = buf + stop;
+
+	if (ptr != end) {
+	  set_table_bit(hash(cksum.state0(ptr++), s_bits, s_mask));
+	}
+
+	for (; ptr != end; ptr++) {
+	  typename Checksum::word_type w = cksum.incr(ptr);
+	  CHECK_EQ(w, cksum.step(ptr));
+	  set_table_bit(hash(w, s_bits, s_mask));
+	}
+      }
+
+      summarize_table();
+    }
+
+    accum_iters += test_iters;
+    accum_millis += get_millisecs_now() - start_test;
+  }
+};
+
+static int read_whole_file(const char *name,
+			   uint8_t **buf_ptr,
+			   size_t *buf_len) {
+  main_file file;
+  int ret;
+  xoff_t len;
+  size_t nread;
+  main_file_init(&file);
+  file.filename = name;
+  ret = main_file_open(&file, name, XO_READ);
+  if (ret != 0) {
+    fprintf(stderr, "open failed\n");
+    goto exit;
+  }
+  ret = main_file_stat(&file, &len);
+  if (ret != 0) {
+    fprintf(stderr, "stat failed\n");
+    goto exit;
+  }
+  
+  (*buf_len) = (size_t)len;
+  (*buf_ptr) = (uint8_t*) main_malloc(*buf_len);
+  ret = main_file_read(&file, *buf_ptr, *buf_len, &nread,
+		       "read failed");
+  if (ret == 0 && *buf_len == nread) {
+    ret = 0;
+  } else {
+    fprintf(stderr, "invalid read\n");
+    ret = XD3_INTERNAL;
+  }
+ exit:
+  main_file_cleanup(&file);
+  return ret;
+}
+
+int main(int argc, char** argv) {
+  int i;
+  uint8_t *buf = NULL;
+  size_t buf_len = 0;
+  int ret;
+
+  if (argc <= 1) {
+    fprintf(stderr, "usage: %s file ...\n", argv[0]);
+    return 1;
+  }
+
+// TODO: The xdelta3-hash.h code is identical now; add sameness test.
+// using rabin_karp<> template.
+#define TEST(T,Z,S,C)					\
+  test_result<large_cksum<T,Z,S,hhash<T>,C>>		\
+    _xck_ ## T ## _ ## Z ## _ ## S ## _ ## C		\
+    ("xck_" #T "_" #Z "_" #S "_" #C);			\
+  test_result<large_cksum_old<T,Z,S,hhash<T>,C>>	\
+    _old_ ## T ## _ ## Z ## _ ## S ## _ ## C		\
+    ("old_" #T "_" #Z "_" #S "_" #C)
+
+#define TESTS(SIZE, SKIP)	 \
+  TEST(usize_t, SIZE, SKIP, 1);  \
+  TEST(usize_t, SIZE, SKIP, 2)
+   
+  TESTS(5, 1);
+  TESTS(6, 1);
+  TESTS(7, 1);
+  TESTS(8, 1);
+  TESTS(9, 1);
+  TESTS(10, 1);
+  TESTS(11, 1);
+  TESTS(12, 1);
+  TESTS(13, 1);
+  TESTS(14, 1);
+  TESTS(15, 1);
+  TESTS(16, 1);
+  TESTS(17, 1);
+  TESTS(18, 1);
+  TESTS(19, 1);
+  TESTS(20, 1);
+  TESTS(21, 1);
+  TESTS(22, 1);
+  TESTS(23, 1);
+  TESTS(24, 1);
+  TESTS(25, 1);
+  TESTS(26, 1);
+  TESTS(27, 1);
+  TESTS(28, 1);
+  TESTS(29, 1);
+  TESTS(30, 1);
+  TESTS(31, 1);
+  TESTS(32, 1);
+  TESTS(33, 1);
+  TESTS(34, 1);
+  TESTS(35, 1);
+  TESTS(36, 1);
+  TESTS(37, 1);
+  TESTS(38, 1);
+  TESTS(39, 1);
+
+
+  for (i = 1; i < argc; i++) {
+    if ((ret = read_whole_file(argv[i],
+			       & buf,
+			       & buf_len))) {
+      return 1;
+    }
+
+    fprintf(stderr, "file %s is %zu bytes\n",
+	    argv[i], buf_len);
+
+    double min_time = -1.0;
+    double min_compression = 0.0;
+
+    for (vector<test_result_base*>::iterator iter = all_tests.begin();
+	 iter != all_tests.end(); ++iter) {
+      test_result_base *test = *iter;
+      test->reset();
+
+      usize_t iters = 1;
+      long start_test = get_millisecs_now();
+
+      do {
+	test->get(buf, buf_len, iters);
+	iters *= 3;
+	iters /= 2;
+      } while (get_millisecs_now() - start_test < 2000);
+
+      test->stat();
+
+      if (min_time < 0.0) {
+	min_compression = test->compression();
+	min_time = test->time();
+      }
+
+      if (min_time > test->time()) {
+	min_time = test->time();
+      }
+
+      if (min_compression > test->compression()) {
+	min_compression = test->compression();
+      }
+
+      test->print();
+    }
+
+    main_free(buf);
+    buf = NULL;
+  }
+
+  return 0;      
+}
diff --git a/third-party/xdelta3/xdelta3/testing/checksum_test_c.c b/third-party/xdelta3/xdelta3/testing/checksum_test_c.c
new file mode 100644
index 0000000000..7b2ab4499c
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/checksum_test_c.c
@@ -0,0 +1,189 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#include "../xdelta3.c"
+
+// OLD CHECKSUM CODE
+
+#define PERMUTE32(x) (__single_hash32[x])
+#define PERMUTE64(x) (__single_hash64[x])
+
+const uint16_t __single_hash32[256] =
+{
+  /* This hashes the input alphabet (Scheme SLIB pseudo-random). */
+  0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
+  0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
+  0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
+  0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
+  0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
+  0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
+  0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
+  0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
+  0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
+  0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
+  0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
+  0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
+  0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
+  0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
+  0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
+  0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
+  0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
+  0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
+  0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
+  0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
+  0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
+  0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
+  0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
+  0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
+  0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
+  0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
+  0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
+  0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
+  0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
+  0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
+  0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
+  0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
+};
+
+const uint32_t __single_hash64[256] =
+{
+  /* http://random.org 2014.10.24 */
+  0xd25e9f0a, 0xb1af9d5e, 0xb753dfa2, 0x157050f7,  /* 0 */
+  0xc84b072c, 0xdd14fe7c, 0xf92208c3, 0xdf08a0c0,
+  0x63a5c118, 0x76f5d90f, 0xa2f8b93e, 0xb6c12d22,
+  0xaf074957, 0x966fb7d9, 0x62f7b785, 0xb40e8a09,
+  0x0a811d5d, 0x323a6daa, 0xb62f7c5b, 0xfdcb9a53,
+  0xf25a9067, 0x4506bc7a, 0xff58a74b, 0x5ae62817,
+  0x74097675, 0x722c0fd9, 0x116a2a66, 0x65f76728,
+  0x72c79651, 0xe043cf9d, 0x64b867c7, 0x6604834f,
+  0xcdca58a6, 0x0f164e2d, 0x24515f05, 0x632cdbf8,
+  0x18091d4a, 0x3eff4128, 0x673d1c33, 0xd8e10c71,
+  0x1a3edf11, 0xba52892f, 0xa56949e0, 0xf3e1dd77,  /* 10 */
+  0x86fcbe3e, 0x138d66d0, 0x4fc98359, 0xc22e5dd6,
+  0xc59f2267, 0x6c6dd739, 0xe03da190, 0x07e8469c,
+  0xadcfb02c, 0x00d3b0d9, 0xa1f44918, 0x8bd84d87,
+  0x08ec9ec1, 0xbbcd156f, 0xb57718e3, 0x3177e752,
+  0xf52a4d70, 0xde7aaad9, 0x075f1da0, 0x21ba00c6,
+  0xb9469a5c, 0xcf08d5ba, 0x91ac9edc, 0xc6167b63,
+  0xc1974919, 0xc8c8d195, 0x4b1996dd, 0xeff8991c,
+  0xf7f66c6b, 0x25b012e2, 0x59d12a98, 0xea40d3cc,
+  0x41f9970b, 0xec48101a, 0xa3bdcf90, 0x99f16905,
+  0x27af6c97, 0xc849af37, 0x49cad89b, 0xf48c2278,  /* 20 */
+  0x5529c3d8, 0x9e7d6dce, 0x16feb52d, 0xf1b0aca1,
+  0xaf28fccb, 0x48e4ce3c, 0xc4436617, 0x64524e3e,
+  0x61806681, 0x6384f2d7, 0x1172880f, 0x34a5ef5f,
+  0xcc8cc0a8, 0x66e8f100, 0x2866085f, 0xba9b1b2d,
+  0x51285949, 0x2be4b574, 0x889b1ef5, 0x3dbe920d,
+  0x9277a62f, 0x0584a9f6, 0x085d8fc4, 0x4b5d403d,
+  0x4e46ca78, 0x3294c2f9, 0x29313e70, 0xe4f09b24,
+  0xe73b331c, 0x072f5552, 0x2e390b78, 0xea0021ca,
+  0xd8f40320, 0xed0e16fd, 0x7de9cf7a, 0xf17e3d6c,
+  0x8df1bd85, 0x052cae67, 0x3486e512, 0x3a1c09b8,  /* 30 */
+  0x6c2a7b4e, 0x83455753, 0xbc0353ac, 0x0ffe20b6,
+  0x5fdcef85, 0x010f506c, 0x595ce972, 0xe28680d0,
+  0xa7e216b2, 0xa392ee0f, 0x25b73faa, 0x2b1f4983,
+  0xeeaefe98, 0x1d3d9cbc, 0x6aebe97b, 0x8b7b3584,
+  0x9e6a9a07, 0xd37f1e99, 0x4ac2a441, 0x8ae9a213,
+  0x7d0e27d7, 0x5de54b9a, 0x8621de1f, 0xf0f2f866,
+  0xcb08d275, 0x49c3f87e, 0xd5ee68c1, 0x9802fc77,
+  0x68be6c5e, 0x65aa8c27, 0xf423d5f7, 0x10ec5502,
+  0x9909bce1, 0x509cdf1b, 0x338fea72, 0x2733e9bf,
+  0xf92f4fd7, 0x87738ea2, 0x931a8bbc, 0x0a5c9155,  /* 40 */
+  0xbe5edd9b, 0xadbf5838, 0x0338f8d2, 0x290da210,
+  0x390c37d8, 0xe7cffae8, 0x20617ebe, 0x464322dd,
+  0x7b3c4e78, 0xac142dcb, 0x2d5cef76, 0xd8fe49fc,
+  0x60f4e9a9, 0x7473816f, 0x0dc35f39, 0x5eed80c1,
+  0x0cb55ab6, 0x1d3ac541, 0x13c7f529, 0x7bffdf4a,
+  0xe334785b, 0x85263ec1, 0xd132ae56, 0x7c868b9e,
+  0x47f60638, 0x1012b979, 0x81c31dd3, 0x1af868c8,
+  0x0c5d0742, 0xd1b3e1a2, 0x5873200a, 0xf848465c,
+  0x0fc4d596, 0x609c18af, 0xc9f5a480, 0xd1a94a84,
+  0xa1431a3f, 0x7de8bb1a, 0x25f1256b, 0x1dcc732c,  /* 50 */
+  0x6aa1549a, 0xa2367281, 0x32f2a77e, 0x82e62a0f,
+  0x045cbb56, 0x74b2027c, 0xd71a32d9, 0x022e7cb5,
+  0xe99be177, 0x60222fdf, 0xd69681ca, 0x9008ee2c,
+  0x32923db4, 0xcf82bf97, 0x38960a5b, 0xb3503d5b,
+  0x9bd4c7f2, 0x33c029c8, 0x1ef504a3, 0xdb249d3b,
+  0x91e89676, 0x4ca43b36, 0x9191433c, 0x465d5dc4,
+  0xf4dcb118, 0x9d11dd00, 0xb592f058, 0xdbe5ce30,
+  0x74790d92, 0x779850a8, 0x7180d25b, 0xfa951d99,
+  0x5990935a, 0x921cb022, 0x3b7c39bc, 0x6a38a7c7,
+  0xdc22703b, 0x142bab3b, 0x4e3d9479, 0x44bb8482,  /* 60 */
+  0x8043abce, 0xfebe832a, 0x8e6a2f98, 0x4d43c4fe,
+  0xd192a70a, 0x802f3c3a, 0x5d11bbab, 0x2665d241,
+  0xb3f3a680, 0x3a8d223f, 0xcf82cdb4, 0x4ed28743,
+};
+
+uint64_t
+xd3_large64_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look)
+{
+  static const uint64_t kBits = 32;
+  static const uint64_t kMask = 0xffffffff;
+  usize_t i = 0;
+  uint64_t low  = 0;
+  uint64_t high = 0;
+
+  for (; i < look; i += 1)
+    {
+      low  += PERMUTE64(*base++);
+      high += low;
+    }
+
+  return ((high & kMask) << kBits) | (low & kMask);
+}
+
+uint64_t
+xd3_large64_cksum_update_old (xd3_hash_cfg *ignore, const uint64_t cksum,
+			      const uint8_t *base, const usize_t look)
+{
+  static const uint64_t kBits = 32;
+  static const uint64_t kMask = 0xffffffff;
+  uint64_t old_c = PERMUTE64(base[0]);
+  uint64_t new_c = PERMUTE64(base[look]);
+  uint64_t low   = ((cksum & kMask) - old_c + new_c) & kMask;
+  uint64_t high  = ((cksum >> kBits) - (old_c * look) + low) & kMask;
+  return (high << kBits) | low;
+}
+
+uint32_t
+xd3_large32_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look)
+{
+  static const uint32_t kBits = 16;
+  static const uint32_t kMask = 0xffff;
+  usize_t i = 0;
+  uint32_t low  = 0;
+  uint32_t high = 0;
+
+  for (; i < look; i += 1)
+    {
+      low  += PERMUTE32(*base++);
+      high += low;
+    }
+
+  return ((high & kMask) << kBits) | (low & kMask);
+}
+
+uint32_t
+xd3_large32_cksum_update_old (xd3_hash_cfg *ignore, const uint32_t cksum,
+			      const uint8_t *base, const usize_t look)
+{
+  static const uint32_t kBits = 16;
+  static const uint32_t kMask = 0xffff;
+  uint32_t old_c = PERMUTE32(base[0]);
+  uint32_t new_c = PERMUTE32(base[look]);
+  uint32_t low   = ((cksum & kMask) - old_c + new_c) & kMask;
+  uint32_t high  = ((cksum >> kBits) - (old_c * look) + low) & kMask;
+  return (high << kBits) | low;
+}
diff --git a/third-party/xdelta3/xdelta3/testing/cmp.h b/third-party/xdelta3/xdelta3/testing/cmp.h
new file mode 100644
index 0000000000..60748cb89e
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/cmp.h
@@ -0,0 +1,67 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+static size_t CmpDifferentBlockBytes(const Block &a, const Block &b) {
+  size_t total = 0;
+  size_t i = 0; 
+  size_t m = min(a.Size(), b.Size());
+
+  for (; i < m; i++) {
+    if (a[i] != b[i]) {
+      total++;
+    }
+  }
+
+  total += a.Size() - i;
+  total += b.Size() - i;
+
+  return total;
+}
+
+static xoff_t CmpDifferentBytes(const FileSpec &a, const FileSpec &b) {
+  Block block_a, block_b;
+  xoff_t total = 0;
+  typename FileSpec::iterator a_i(a), b_i(b);
+
+  for (; !a_i.Done() && !b_i.Done(); a_i.Next(), b_i.Next()) {
+
+    a_i.Get(&block_a);
+    b_i.Get(&block_b);
+
+    total += CmpDifferentBlockBytes(block_a, block_b);
+  }
+
+  for (; !a_i.Done(); a_i.Next()) {
+    total += a_i.BytesOnBlock();
+  }
+  for (; !b_i.Done(); b_i.Next()) {
+    total += b_i.BytesOnBlock();
+  }
+
+  return total;
+}
+
+static size_t CmpDifferentBlockBytesAtOffset(const Block &a,
+					     const FileSpec &b_spec,
+					     xoff_t offset) {
+  Block b;
+  size_t size = a.Size();
+  CHECK_LE(offset, b_spec.Size());
+  if (b_spec.Size() < offset + size) {
+    size = b_spec.Size() - offset;
+  }
+  b_spec.Get(&b, offset, size);
+  return CmpDifferentBlockBytes(a, b);
+}
diff --git a/third-party/xdelta3/xdelta3/testing/delta.h b/third-party/xdelta3/xdelta3/testing/delta.h
new file mode 100644
index 0000000000..bd38c6c767
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/delta.h
@@ -0,0 +1,87 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+class Delta {
+public:
+  Delta(const Block &block) {
+    int ret;
+    xd3_config config;
+    memset(&stream_, 0, sizeof (stream_));
+    memset(&config, 0, sizeof (config));
+
+    xd3_init_config(&config, XD3_SKIP_EMIT | XD3_ADLER32_NOVER);
+
+    CHECK_EQ(0, xd3_config_stream (&stream_, &config));
+
+    xd3_avail_input (&stream_, block.Data(), block.Size());
+
+    bool done = false;
+    while (!done) {
+      ret = xd3_decode_input(&stream_);
+    
+      switch (ret) {
+      case XD3_INPUT:
+	done = true;
+	break;
+      case XD3_OUTPUT:
+	CHECK_EQ(0, xd3_whole_append_window (&stream_));
+	break;
+      case XD3_GOTHEADER:
+      case XD3_WINSTART:
+      case XD3_WINFINISH:
+	break;
+      default:
+	cerr << "decode: " << done;
+	abort();
+      }
+    }
+  }
+
+  ~Delta() {
+    xd3_free_stream(&stream_);
+  }
+
+  xoff_t AddedBytes() const {
+    return stream_.whole_target.addslen;
+  }
+
+  xoff_t Windows() const {
+    return stream_.whole_target.wininfolen;
+  }
+
+// Note: This does not benefit from -Wformat= checking, due to the
+// enclosing template. Further, it was not used.
+// void Print() const {
+//     for (size_t i = 0; i < stream_.whole_target.instlen; i++) {
+//       xd3_winst &winst = stream_.whole_target.inst[i];
+//       switch (winst.type) {
+//       case XD3_RUN: 
+// 	DP(RINT, "%" Q "u run %" W "u\n", winst.position, winst.size);
+// 	break;
+//       case XD3_ADD: 
+// 	DP(RINT "%" Q "u add %" W "u\n", winst.position, winst.size);
+// 	break;
+//       default:
+// 	DP(RINT "%" Q "u copy %" W "u @ %" Q "u (mode %u)\n", 
+// 	   winst.position, winst.size, winst.addr, winst.mode);
+// 	break;
+//       }
+//     }
+//   }
+
+private:
+  xd3_stream stream_;
+};
diff --git a/third-party/xdelta3/xdelta3/testing/file.h b/third-party/xdelta3/xdelta3/testing/file.h
new file mode 100644
index 0000000000..d1828cfab7
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/file.h
@@ -0,0 +1,399 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+class Block;
+class BlockIterator;
+class TmpFile;
+
+class Block {
+public:
+  Block()
+    : data_(NULL),
+      data_size_(0),
+      size_(0) { }
+
+  ~Block() {
+    if (data_) {
+      delete [] data_;
+    }
+  }
+
+  size_t Size() const {
+    return size_;
+  }
+
+  uint8_t operator[](size_t i) const {
+    CHECK_LT(i, size_);
+    return data_[i];
+  }
+
+  uint8_t* Data() const {
+    if (data_ == NULL) {
+      CHECK_EQ(0, size_);
+      data_size_ = 1;
+      data_ = new uint8_t[1];
+    }
+    return data_;
+  }
+
+  // For writing to blocks
+  void Append(const uint8_t *data, size_t size) {
+    if (data_ == NULL) {
+      CHECK_EQ(0, size_);
+      CHECK_EQ(0, data_size_);
+      data_ = new uint8_t[Constants::BLOCK_SIZE];
+      data_size_ = Constants::BLOCK_SIZE;
+    }
+
+    if (size_ + size > data_size_) {
+      uint8_t *tmp = data_;
+      while (size_ + size > data_size_) {
+	data_size_ *= 2;
+      }
+      data_ = new uint8_t[data_size_];
+      memcpy(data_, tmp, size_);
+      delete [] tmp;
+    }
+
+    memcpy(data_ + size_, data, size);
+    size_ += size;
+  }
+
+  // For cleaing a block
+  void Reset() {
+    size_ = 0;
+  }
+
+  // Note: This does not benefit from -Wformat= checking, due to the
+  // enclosing template. Further, it was not used.
+  // void Print() const {
+  //   xoff_t pos = 0;
+  //   for (size_t i = 0; i < Size(); i++) {
+  //     if (pos % 16 == 0) {
+  // 	DP(RINT "%5" Q "x: ", pos);
+  //     }
+  //     DP(RINT "%02x ", (*this)[i]);
+  //     if (pos % 16 == 15) {
+  // 	DP(RINT "\n");
+  //     }
+  //     pos++;
+  //   }
+  //   DP(RINT "\n");
+  // }
+
+  void WriteTmpFile(TmpFile *f) const {
+    f->Append(this);
+  }
+
+  void SetSize(size_t size) {
+    uint8_t *t = NULL;
+    if (data_size_ < size) {
+      if (data_) {
+	t = data_;
+      }
+      data_ = new uint8_t[size];
+      data_size_ = size;
+    }
+    if (t && size < size_) {
+      memcpy(data_, t, size);
+    }
+    delete [] t;
+    size_ = size;
+  }
+
+private:
+  friend class BlockIterator;
+
+  mutable uint8_t *data_;
+  mutable size_t data_size_;
+  size_t size_;
+};
+
+class FileSpec {
+ public:
+  FileSpec(MTRandom *rand)
+    : rand_(rand) {
+  }
+
+  // Generates a file with a known size
+  void GenerateFixedSize(xoff_t size) {
+    Reset();
+
+    for (xoff_t p = 0; p < size; ) {
+      xoff_t t = min(Constants::BLOCK_SIZE, size - p);
+      table_.insert(make_pair(p, Segment(t, rand_)));
+      p += t;
+    }
+  }
+
+  // Generates a file with exponential-random distributed size
+  void GenerateRandomSize(xoff_t mean) {
+    GenerateFixedSize(rand_->ExpRand(mean));
+  }
+
+  // Returns the size of the file
+  xoff_t Size() const {
+    if (table_.empty()) {
+      return 0;
+    }
+    ConstSegmentMapIterator i = --table_.end();
+    return i->first + i->second.Size();
+  }
+
+  // Returns the number of blocks
+  xoff_t Blocks(size_t blksize = Constants::BLOCK_SIZE) const {
+    if (table_.empty()) {
+      return 0;
+    }
+    return ((Size() - 1) / blksize) + 1;
+  }
+
+  // Returns the number of segments
+  xoff_t Segments() const {
+    return table_.size();
+  }
+
+  // Create a mutation according to "what".
+  void ModifyTo(const Mutator &mutator,
+		FileSpec *modify) const {
+    modify->Reset();
+    mutator.Mutate(&modify->table_, &table_, rand_);
+    modify->CheckSegments();
+  }
+
+  void CheckSegments() const {
+    for (ConstSegmentMapIterator iter(table_.begin());
+	 iter != table_.end(); ) {
+      ConstSegmentMapIterator iter0(iter++);
+      if (iter == table_.end()) {
+	break;
+      }
+      CHECK_EQ(iter0->first + iter0->second.Size(), iter->first);
+    }
+  }
+
+  void Reset() {
+    table_.clear();
+  }
+
+  void Print() const {
+    for (ConstSegmentMapIterator iter(table_.begin());
+	 iter != table_.end();
+	 ++iter) {
+      const Segment &seg = iter->second;
+      cerr << "Segment at " << iter->first
+	   << " (" << seg.ToString() << ")" << endl;
+    }
+  }
+
+  void PrintData() const {
+    Block block;
+    for (BlockIterator iter(*this); !iter.Done(); iter.Next()) {
+      iter.Get(&block);
+      block.Print();
+    }
+  }
+
+  void WriteTmpFile(TmpFile *f) const {
+    Block block;
+    for (BlockIterator iter(*this); !iter.Done(); iter.Next()) {
+      iter.Get(&block);
+      f->Append(&block);
+    }
+  }
+
+  void Get(Block *block, xoff_t offset, size_t size) const {
+    size_t got = 0;
+    block->SetSize(size);
+
+    ConstSegmentMapIterator pos = table_.upper_bound(offset);
+    if (pos == table_.begin()) {
+      CHECK_EQ(0, Size());
+      return;
+    }
+    --pos;
+
+    while (got < size) {
+      CHECK(pos != table_.end());
+      CHECK_GE(offset, pos->first);
+
+      const Segment &seg = pos->second;
+
+      // The position of this segment may start before this block starts,
+      // and then the position of the data may be offset from the seeding
+      // position.
+      size_t seg_offset = offset - pos->first;
+      size_t advance = min(seg.Size() - seg_offset,
+			   size - got);
+
+      seg.Fill(seg_offset, advance, block->Data() + got);
+
+      got += advance;
+      offset += advance;
+      ++pos;
+    }
+  }
+
+  typedef BlockIterator iterator;
+
+ private:
+  friend class BlockIterator;
+
+  MTRandom *rand_;
+  SegmentMap table_;
+};
+
+class BlockIterator {
+public:
+  explicit BlockIterator(const FileSpec& spec)
+    : spec_(spec),
+      blkno_(0),
+      blksize_(Constants::BLOCK_SIZE) { }
+
+  BlockIterator(const FileSpec& spec,
+		size_t blksize)
+    : spec_(spec),
+      blkno_(0),
+      blksize_(blksize) { }
+
+  bool Done() const {
+    return blkno_ >= spec_.Blocks(blksize_);
+  }
+
+  void Next() {
+    blkno_++;
+  }
+
+  xoff_t Blkno() const {
+    return blkno_;
+  }
+
+  xoff_t Blocks() const {
+    return spec_.Blocks(blksize_);
+  }
+
+  xoff_t Offset() const {
+    return blkno_ * blksize_;
+  }
+
+  void SetBlock(xoff_t blkno) {
+    CHECK_LE(blkno, Blocks());
+    blkno_ = blkno;
+  }
+
+  void Get(Block *block) const {
+    spec_.Get(block, blkno_ * blksize_, BytesOnBlock());
+  }
+
+  size_t BytesOnBlock() const {
+    xoff_t blocks = spec_.Blocks(blksize_);
+    xoff_t size = spec_.Size();
+
+    DCHECK((blkno_ < blocks) ||
+	   (blkno_ == blocks && size % blksize_ == 0));
+
+    if (blkno_ == blocks) {
+      return 0;
+    }
+    if (blkno_ + 1 == blocks) {
+      return ((size - 1) % blksize_) + 1;
+    }
+    return blksize_;
+  }
+
+  size_t BlockSize() const {
+    return blksize_;
+  }
+
+private:
+  const FileSpec& spec_;
+  xoff_t blkno_;
+  size_t blksize_;
+};
+
+class ExtFile {
+public:
+  ExtFile() {
+    static int static_counter = 0;
+    pid_t pid = getpid();
+    char buf[64];
+    xoff_t xpid = pid;
+    snprintf(buf, 64, "/tmp/regtest.%" Q "u.%d", xpid, static_counter++);
+    filename_.append(buf);
+    unlink(filename_.c_str());
+  }
+
+  ~ExtFile() {
+    unlink(filename_.c_str());
+  }
+
+  const char* Name() const {
+    return filename_.c_str();
+  }
+
+  // Check whether a real file matches a file spec.
+  bool EqualsSpec(const FileSpec &spec) const {
+    main_file t;
+    main_file_init(&t);
+    CHECK_EQ(0, main_file_open(&t, Name(), XO_READ));
+
+    Block tblock;
+    Block sblock;
+    for (BlockIterator iter(spec); !iter.Done(); iter.Next()) {
+      iter.Get(&sblock);
+      tblock.SetSize(sblock.Size());
+      size_t tread;
+      CHECK_EQ(0, main_file_read(&t,
+				 tblock.Data(),
+				 tblock.Size(), &tread, "read failed"));
+      CHECK_EQ(0, CmpDifferentBlockBytes(tblock, sblock));
+    }
+
+    CHECK_EQ(0, main_file_close(&t));
+    main_file_cleanup(&t);
+    return true;
+  }
+
+protected:
+  string filename_;
+};
+
+class TmpFile : public ExtFile {
+public:
+  TmpFile() {
+    main_file_init(&file_);
+    CHECK_EQ(0, main_file_open(&file_, Name(), XO_WRITE));
+  }
+
+  ~TmpFile() {
+    main_file_cleanup(&file_);
+  }
+
+  void Append(const Block *block) {
+    CHECK_EQ(0, main_file_write(&file_,
+				block->Data(), block->Size(),
+				"tmpfile write failed"));
+  }
+
+  const char* Name() const {
+    if (main_file_isopen(&file_)) {
+      CHECK_EQ(0, main_file_close(&file_));
+    }
+    return ExtFile::Name();
+  }
+
+private:
+  mutable main_file file_;
+};
diff --git a/third-party/xdelta3/xdelta3/testing/modify.h b/third-party/xdelta3/xdelta3/testing/modify.h
new file mode 100644
index 0000000000..6590ccdbbc
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/modify.h
@@ -0,0 +1,400 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+class Mutator {
+public:
+  virtual ~Mutator() { }
+  virtual void Mutate(SegmentMap *table,
+		      const SegmentMap *source_table,
+		      MTRandom *rand) const = 0;
+};
+
+class Change {
+public:
+  enum Kind {
+    MODIFY = 1,     // Mutate a certain range w/ random or supplied data
+    ADD = 2,        // Insert random or supplied data
+    DELRANGE = 3,     // Delete a specified range of data
+    COPY = 4,       // Copy from one region, inserting elsewhere
+    MOVE = 5,       // Copy then delete copied-from range
+    COPYOVER = 6    // Copy then delete copied-to range
+
+    // ADD, DELRANGE, and COPY change the file size
+    // MODIFY, MOVE, COPYOVER preserve the file size
+  };
+
+  // Constructor for modify, add, delete.
+  Change(Kind kind0, xoff_t size0, xoff_t addr1_0)
+    : kind(kind0),
+      size(size0),
+      addr1(addr1_0),
+      addr2(0),
+      insert(NULL) {
+    CHECK(kind != MOVE && kind != COPY && kind != COPYOVER);
+  }
+
+  // Constructor for modify, add w/ provided data.
+  Change(Kind kind0, xoff_t size0, xoff_t addr1_0, Segment *insert0)
+    : kind(kind0),
+      size(size0),
+      addr1(addr1_0),
+      addr2(0),
+      insert(insert0) {
+    CHECK(kind != MOVE && kind != COPY && kind != COPYOVER);
+  }
+
+  // Constructor for move, copy, overwrite
+  Change(Kind kind0, xoff_t size0, xoff_t addr1_0, xoff_t addr2_0)
+    : kind(kind0),
+      size(size0),
+      addr1(addr1_0),
+      addr2(addr2_0),
+      insert(NULL) {
+    CHECK(kind == MOVE || kind == COPY || kind == COPYOVER);
+  }
+
+  Kind kind;
+  xoff_t size;
+  xoff_t addr1;
+  xoff_t addr2;
+  Segment *insert;  // For modify and/or add
+};
+
+typedef list<Change> ChangeList;
+typedef typename ChangeList::const_iterator ConstChangeListIterator;
+typedef typename ChangeList::iterator ChangeListIterator;
+
+class ChangeListMutator : public Mutator {
+public:
+  ChangeListMutator(const ChangeList &cl)
+    : cl_(cl) { }
+
+  ChangeListMutator() { }
+
+  void Mutate(SegmentMap *table,
+	      const SegmentMap *source_table,
+	      MTRandom *rand) const {
+    // The speed of processing gigabytes of data is so slow compared with
+    // these table-copy operations, no attempt to make this fast.
+    SegmentMap tmp;
+
+    for (ConstChangeListIterator iter(cl_.begin());
+	 iter != cl_.end(); ++iter) {
+      const Change &ch = *iter;
+      tmp.clear();
+      Mutate(ch, &tmp, source_table, rand);
+      tmp.swap(*table);
+      source_table = table;
+    }
+  }
+
+  static void Mutate(const Change &ch,
+		     SegmentMap *table,
+		     const SegmentMap *source_table,
+		     MTRandom *rand) {
+    switch (ch.kind) {
+    case Change::ADD:
+      AddChange(ch, table, source_table, rand);
+      break;
+    case Change::MODIFY:
+      ModifyChange(ch, table, source_table, rand);
+      break;
+    case Change::DELRANGE:
+      DeleteChange(ch, table, source_table, rand);
+      break;
+    case Change::COPY:
+      CopyChange(ch, table, source_table, rand);
+      break;
+    case Change::MOVE:
+      MoveChange(ch, table, source_table, rand);
+      break;
+    case Change::COPYOVER:
+      OverwriteChange(ch, table, source_table, rand);
+      break;
+    }
+  }
+
+  static void ModifyChange(const Change &ch,
+			   SegmentMap *table,
+			   const SegmentMap *source_table,
+			   MTRandom *rand) {
+    xoff_t m_start = ch.addr1;
+    xoff_t m_end = m_start + ch.size;
+    xoff_t i_start = 0;
+    xoff_t i_end = 0;
+
+    for (ConstSegmentMapIterator iter(source_table->begin());
+	 iter != source_table->end();
+	 ++iter) {
+      const Segment &seg = iter->second;
+      i_start = iter->first;
+      i_end = i_start + seg.Size();
+
+      if (i_end <= m_start || i_start >= m_end) {
+	table->insert(table->end(), make_pair(i_start, seg));
+	continue;
+      }
+
+      if (i_start < m_start) {
+	table->insert(table->end(),
+		      make_pair(i_start,
+				seg.Subseg(0, m_start - i_start)));
+      }
+
+      // Insert the entire segment, even though it may extend into later
+      // segments.  This condition avoids inserting it during later
+      // segments.
+      if (m_start >= i_start) {
+	if (ch.insert != NULL) {
+	  table->insert(table->end(), make_pair(m_start, *ch.insert));
+	} else {
+	  Segment part(m_end - m_start, rand);
+	  table->insert(table->end(), make_pair(m_start, part));
+	}
+      }
+
+      if (i_end > m_end) {
+	table->insert(table->end(),
+		      make_pair(m_end,
+				seg.Subseg(m_end - i_start, i_end - m_end)));
+      }
+    }
+
+    // This check verifies that the modify does not extend past the
+    // source_table EOF.
+    CHECK_LE(m_end, i_end);
+  }
+
+  static void AddChange(const Change &ch,
+			SegmentMap *table,
+			const SegmentMap *source_table,
+			MTRandom *rand) {
+    xoff_t m_start = ch.addr1;
+    xoff_t i_start = 0;
+    xoff_t i_end = 0;
+
+    for (ConstSegmentMapIterator iter(source_table->begin());
+	 iter != source_table->end();
+	 ++iter) {
+      const Segment &seg = iter->second;
+      i_start = iter->first;
+      i_end = i_start + seg.Size();
+
+      if (i_end <= m_start) {
+	table->insert(table->end(), make_pair(i_start, seg));
+	continue;
+      }
+
+      if (i_start > m_start) {
+	table->insert(table->end(), make_pair(i_start + ch.size, seg));
+	continue;
+      }
+
+      if (i_start < m_start) {
+	table->insert(table->end(),
+		      make_pair(i_start,
+				seg.Subseg(0, m_start - i_start)));
+      }
+
+      if (ch.insert != NULL) {
+	table->insert(table->end(), make_pair(m_start, *ch.insert));
+      } else {
+	Segment addseg(ch.size, rand);
+	table->insert(table->end(), make_pair(m_start, addseg));
+      }
+
+      if (m_start < i_end) {
+	table->insert(table->end(),
+		      make_pair(m_start + ch.size,
+				seg.Subseg(m_start - i_start,
+					   i_end - m_start)));
+      }
+    }
+
+    CHECK_LE(m_start, i_end);
+
+    // Special case for add at end-of-input.
+    if (m_start == i_end) {
+      Segment addseg(ch.size, rand);
+      table->insert(table->end(), make_pair(m_start, addseg));
+    }
+  }
+
+  static void DeleteChange(const Change &ch,
+			   SegmentMap *table,
+			   const SegmentMap *source_table,
+			   MTRandom *rand) {
+    xoff_t m_start = ch.addr1;
+    xoff_t m_end = m_start + ch.size;
+    xoff_t i_start = 0;
+    xoff_t i_end = 0;
+
+    for (ConstSegmentMapIterator iter(source_table->begin());
+	 iter != source_table->end();
+	 ++iter) {
+      const Segment &seg = iter->second;
+      i_start = iter->first;
+      i_end = i_start + seg.Size();
+
+      if (i_end <= m_start) {
+	table->insert(table->end(), make_pair(i_start, seg));
+	continue;
+      }
+
+      if (i_start >= m_end) {
+	table->insert(table->end(), make_pair(i_start - ch.size, seg));
+	continue;
+      }
+
+      if (i_start < m_start) {
+	table->insert(table->end(),
+		      make_pair(i_start,
+				seg.Subseg(0, m_start - i_start)));
+      }
+
+      if (i_end > m_end) {
+	table->insert(table->end(),
+		      make_pair(m_end - ch.size,
+				seg.Subseg(m_end - i_start, i_end - m_end)));
+      }
+    }
+
+    CHECK_LT(m_start, i_end);
+    CHECK_LE(m_end, i_end);
+  }
+
+  // A move is a copy followed by delete of the copied-from range.
+  static void MoveChange(const Change &ch,
+			 SegmentMap *table,
+			 const SegmentMap *source_table,
+			 MTRandom *rand) {
+    SegmentMap tmp;
+    CHECK_NE(ch.addr1, ch.addr2);
+    CopyChange(ch, &tmp, source_table, rand);
+    Change d(Change::DELRANGE, ch.size,
+	     ch.addr1 < ch.addr2 ? ch.addr1 : ch.addr1 + ch.size);
+    DeleteChange(d, table, &tmp, rand);
+  }
+
+  // An overwrite is a copy followed by a delete of the copied-to range.
+  static void OverwriteChange(const Change &ch,
+			      SegmentMap *table,
+			      const SegmentMap *source_table,
+			      MTRandom *rand) {
+    SegmentMap tmp;
+    CHECK_NE(ch.addr1, ch.addr2);
+    CopyChange(ch, &tmp, source_table, rand);
+    Change d(Change::DELRANGE, ch.size, ch.addr2 + ch.size);
+    DeleteChange(d, table, &tmp, rand);
+  }
+
+  static void CopyChange(const Change &ch,
+			 SegmentMap *table,
+			 const SegmentMap *source_table,
+			 MTRandom *ignore) {
+    xoff_t m_start = ch.addr2;
+    xoff_t c_start = ch.addr1;
+    xoff_t i_start = 0;
+    xoff_t i_end = 0;
+
+    // Like AddChange() with AppendCopy instead of a random segment.
+    for (ConstSegmentMapIterator iter(source_table->begin());
+	 iter != source_table->end();
+	 ++iter) {
+      const Segment &seg = iter->second;
+      i_start = iter->first;
+      i_end = i_start + seg.Size();
+
+      if (i_end <= m_start) {
+	table->insert(table->end(), make_pair(i_start, seg));
+	continue;
+      }
+
+      if (i_start > m_start) {
+	table->insert(table->end(), make_pair(i_start + ch.size, seg));
+	continue;
+      }
+
+      if (i_start < m_start) {
+	table->insert(table->end(),
+		      make_pair(i_start,
+				seg.Subseg(0, m_start - i_start)));
+      }
+
+      AppendCopy(table, source_table, c_start, m_start, ch.size);
+
+      if (m_start < i_end) {
+	table->insert(table->end(),
+		      make_pair(m_start + ch.size,
+				seg.Subseg(m_start - i_start, i_end - m_start)));
+      }
+    }
+
+    CHECK_LE(m_start, i_end);
+
+    // Special case for copy to end-of-input.
+    if (m_start == i_end) {
+      AppendCopy(table, source_table, c_start, m_start, ch.size);
+    }
+  }
+
+  static void AppendCopy(SegmentMap *table,
+			 const SegmentMap *source_table,
+			 xoff_t copy_offset,
+			 xoff_t append_offset,
+			 xoff_t length) {
+    ConstSegmentMapIterator pos(source_table->upper_bound(copy_offset));
+    --pos;
+    xoff_t got = 0;
+
+    while (got < length) {
+      size_t seg_offset = copy_offset - pos->first;
+      size_t advance = min(pos->second.Size() - seg_offset,
+			   (size_t)(length - got));
+
+      table->insert(table->end(),
+		    make_pair(append_offset,
+			      pos->second.Subseg(seg_offset,
+						 advance)));
+
+      got += advance;
+      copy_offset += advance;
+      append_offset += advance;
+      ++pos;
+    }
+  }
+
+  ChangeList* Changes() {
+    return &cl_;
+  }
+
+  const ChangeList* Changes() const {
+    return &cl_;
+  }
+
+private:
+  ChangeList cl_;
+};
+
+class Modify1stByte : public Mutator {
+public:
+  void Mutate(SegmentMap *table,
+	      const SegmentMap *source_table,
+	      MTRandom *rand) const {
+    ChangeListMutator::Mutate(Change(Change::MODIFY, 1, 0),
+			      table, source_table, rand);
+  }
+};
diff --git a/third-party/xdelta3/xdelta3/testing/random.h b/third-party/xdelta3/xdelta3/testing/random.h
new file mode 100644
index 0000000000..6087f16d71
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/random.h
@@ -0,0 +1,157 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#undef MT_LEN
+#undef MT_IA
+class MTRandom {
+ public:
+  enum Constants { 
+    MT_LEN = 624,
+    MT_IA = 397
+  };
+
+  static const uint32_t TEST_SEED1;
+  static const uint32_t UPPER_MASK;
+  static const uint32_t LOWER_MASK;
+  static const uint32_t MATRIX_A;
+
+  MTRandom() {
+    Init(TEST_SEED1);
+  }
+
+  explicit MTRandom(uint32_t seed) {
+    Init(seed);
+  }
+
+  /* This Mersenne Twister code is attributed to Michael Brundage. Thanks!
+   * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html
+   */
+  uint32_t Rand32 () {
+    uint32_t y;
+    static unsigned long mag01[2] = { 
+      0 , MATRIX_A
+    };
+
+    if (mt_index_ >= MT_LEN) {
+      int kk;
+
+      for (kk = 0; kk < MT_LEN - MT_IA; kk++) {
+	y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK);
+	mt_buffer_[kk] = mt_buffer_[kk + MT_IA] ^ (y >> 1) ^ mag01[y & 0x1UL];
+      }
+      for (;kk < MT_LEN - 1; kk++) {
+	y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK);
+	mt_buffer_[kk] = mt_buffer_[kk + (MT_IA - MT_LEN)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+      }
+      y = (mt_buffer_[MT_LEN - 1] & UPPER_MASK) | (mt_buffer_[0] & LOWER_MASK);
+      mt_buffer_[MT_LEN - 1] = mt_buffer_[MT_IA - 1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+      mt_index_ = 0;
+    }
+  
+    y = mt_buffer_[mt_index_++];
+
+    y ^= (y >> 11);
+    y ^= (y << 7) & 0x9d2c5680UL;
+    y ^= (y << 15) & 0xefc60000UL;
+    y ^= (y >> 18);
+
+    return y;
+  }
+
+  uint32_t ExpRand32(uint32_t mean) {
+    double mean_d = mean;
+    double erand  = log (1.0 / (Rand32() / (double)UINT32_MAX));
+    uint32_t x = (uint32_t) (mean_d * erand + 0.5);
+    return x;
+  }
+
+  uint64_t Rand64() {
+    return ((uint64_t)Rand32() << 32) | Rand32();
+  }
+
+  uint64_t ExpRand64(uint64_t mean) {
+    double mean_d = mean;
+    double erand  = log (1.0 / (Rand64() / (double)UINT32_MAX));
+    uint64_t x = (uint64_t) (mean_d * erand + 0.5);
+    return x;
+  }
+
+  template <typename T>
+  T Rand() {
+    switch (sizeof(T)) {
+    case sizeof(uint32_t):
+      return Rand32();
+    case sizeof(uint64_t):
+      return Rand64();
+    default:
+      cerr << "Invalid sizeof T" << endl;
+      abort();
+    }
+  }
+
+  template <typename T>
+  T ExpRand(T mean) {
+    switch (sizeof(T)) {
+    case sizeof(uint32_t):
+      return ExpRand32(mean);
+    case sizeof(uint64_t):
+      return ExpRand64(mean);
+    default:
+      cerr << "Invalid sizeof T" << endl;
+      abort();
+    }
+  }
+
+ private:
+  void Init(uint32_t seed) {
+    mt_buffer_[0] = seed;
+    mt_index_ = MT_LEN;
+    for (int i = 1; i < MT_LEN; i++) {
+      /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+      /* In the previous versions, MSBs of the seed affect   */
+      /* only MSBs of the array mt[].                        */
+      /* 2002/01/09 modified by Makoto Matsumoto             */
+      mt_buffer_[i] = 
+	(1812433253UL * (mt_buffer_[i-1] ^ (mt_buffer_[i-1] >> 30)) + i);
+    }
+  }
+
+  int mt_index_;
+  uint32_t mt_buffer_[MT_LEN];
+};
+
+const uint32_t MTRandom::TEST_SEED1 = 5489UL;
+const uint32_t MTRandom::UPPER_MASK = 0x80000000;
+const uint32_t MTRandom::LOWER_MASK = 0x7FFFFFFF;
+const uint32_t MTRandom::MATRIX_A = 0x9908B0DF;
+
+class MTRandom8 {
+public:
+  MTRandom8(MTRandom *rand)
+    : rand_(rand) {
+  }
+
+  uint8_t Rand8() {
+    uint32_t r = rand_->Rand32();
+
+    // TODO: make this use a single byte at a time?
+    return (r & 0xff) ^ (r >> 7) ^ (r >> 15) ^ (r >> 21);
+  }
+
+private:
+  MTRandom *rand_;
+};
diff --git a/third-party/xdelta3/xdelta3/testing/regtest.cc b/third-party/xdelta3/xdelta3/testing/regtest.cc
new file mode 100644
index 0000000000..daddc0d9d4
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/regtest.cc
@@ -0,0 +1,1321 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#include "test.h"
+#include "random.h"
+#include "sizes.h"
+
+template <typename Constants>
+class Regtest {
+public:
+  typedef typename Constants::Sizes Sizes;
+
+  struct Options {
+    Options()
+      : encode_srcwin_maxsz(1<<20),
+	block_size(Constants::BLOCK_SIZE),
+	window_size(Constants::WINDOW_SIZE),
+	size_known(false),
+	iopt_size(XD3_DEFAULT_IOPT_SIZE),
+	smatch_cfg(XD3_SMATCH_DEFAULT) { }
+
+    xoff_t encode_srcwin_maxsz;
+    size_t block_size;
+    xoff_t window_size;
+    bool size_known;
+    usize_t iopt_size;
+    xd3_smatch_cfg smatch_cfg;
+  };
+
+#include "segment.h"
+#include "modify.h"
+#include "file.h"
+#include "cmp.h"
+#include "delta.h"
+
+  void InMemoryEncodeDecode(const FileSpec &source_file,
+			    const FileSpec &target_file,
+			    Block *coded_data,
+			    const Options &options) {
+    xd3_stream encode_stream;
+    xd3_config encode_config;
+    xd3_source encode_source;
+
+    xd3_stream decode_stream;
+    xd3_config decode_config;
+    xd3_source decode_source;
+    xoff_t verified_bytes = 0;
+    xoff_t encoded_bytes = 0;
+
+    if (coded_data) {
+      coded_data->Reset();
+    }
+
+    memset(&encode_stream, 0, sizeof (encode_stream));
+    memset(&encode_source, 0, sizeof (encode_source));
+
+    memset(&decode_stream, 0, sizeof (decode_stream));
+    memset(&decode_source, 0, sizeof (decode_source));
+
+    xd3_init_config(&encode_config, XD3_ADLER32);
+    xd3_init_config(&decode_config, XD3_ADLER32);
+
+    encode_config.winsize = options.window_size;
+    encode_config.iopt_size = options.iopt_size;
+    encode_config.smatch_cfg = options.smatch_cfg;
+
+    CHECK_EQ(0, xd3_config_stream (&encode_stream, &encode_config));
+    CHECK_EQ(0, xd3_config_stream (&decode_stream, &decode_config));
+
+    encode_source.blksize = options.block_size;
+    decode_source.blksize = options.block_size;
+
+    encode_source.max_winsize = options.encode_srcwin_maxsz;
+    decode_source.max_winsize = options.encode_srcwin_maxsz;
+
+    if (!options.size_known)
+      {
+	xd3_set_source (&encode_stream, &encode_source);
+	xd3_set_source (&decode_stream, &decode_source);
+      }
+    else
+      {
+	xd3_set_source_and_size (&encode_stream, &encode_source,
+				 source_file.Size());
+	xd3_set_source_and_size (&decode_stream, &decode_source,
+				 source_file.Size());
+      }
+
+    BlockIterator source_iterator(source_file, options.block_size);
+    BlockIterator target_iterator(target_file, Constants::WINDOW_SIZE);
+    Block encode_source_block, decode_source_block;
+    Block decoded_block, target_block;
+    bool encoding = true;
+    bool done = false;
+    bool done_after_input = false;
+
+    IF_DEBUG1 (XPR(NTR "source %" Q "u[%" Z "u] target %" Q "u winsize %" Z "u\n",
+		   source_file.Size(), options.block_size,
+		   target_file.Size(),
+		   Constants::WINDOW_SIZE));
+
+    while (!done) {
+      target_iterator.Get(&target_block);
+
+      xoff_t blks = target_iterator.Blocks();
+
+      IF_DEBUG2(XPR(NTR "target in %s: %" Q "u[%" Z "u] %" Q "u(%" Q "u) "
+		    "verified %" Q "u\n",
+		    encoding ? "encoding" : "decoding",
+		    target_iterator.Offset(),
+		    target_block.Size(),
+		    target_iterator.Blkno(),
+		    blks,
+		    verified_bytes));
+
+      if (blks == 0 || target_iterator.Blkno() == (blks - 1)) {
+	xd3_set_flags(&encode_stream, XD3_FLUSH | encode_stream.flags);
+      }
+
+      xd3_avail_input(&encode_stream, target_block.Data(), target_block.Size());
+      encoded_bytes += target_block.Size();
+
+    process:
+      int ret;
+      const char *msg;
+      if (encoding) {
+	ret = xd3_encode_input(&encode_stream);
+	msg = encode_stream.msg;
+      } else {
+	ret = xd3_decode_input(&decode_stream);
+	msg = decode_stream.msg;
+      }
+      (void) msg;
+
+      switch (ret) {
+      case XD3_OUTPUT:
+	if (encoding) {
+	  if (coded_data != NULL) {
+	    // Optional encoded-output to the caller
+	    coded_data->Append(encode_stream.next_out,
+			       encode_stream.avail_out);
+	  }
+	  // Feed this data to the decoder.
+	  xd3_avail_input(&decode_stream,
+			  encode_stream.next_out,
+			  encode_stream.avail_out);
+	  xd3_consume_output(&encode_stream);
+	  encoding = false;
+	} else {
+	  decoded_block.Append(decode_stream.next_out,
+			       decode_stream.avail_out);
+	  xd3_consume_output(&decode_stream);
+	}
+	goto process;
+
+      case XD3_GETSRCBLK: {
+	xd3_source *src = (encoding ? &encode_source : &decode_source);
+	Block *block = (encoding ? &encode_source_block : &decode_source_block);
+	if (encoding) {
+	  IF_DEBUG2(XPR(NTR "[srcblock] %" Q "u last srcpos %" Q "u "
+			"encodepos %" Q "u\n",
+			encode_source.getblkno,
+			encode_stream.match_last_srcpos,
+			encode_stream.input_position + encode_stream.total_in));
+	}
+
+	source_iterator.SetBlock(src->getblkno);
+	source_iterator.Get(block);
+	src->curblkno = src->getblkno;
+	src->onblk = block->Size();
+	src->curblk = block->Data();
+
+	goto process;
+      }
+
+      case XD3_INPUT:
+	if (!encoding) {
+	  encoding = true;
+	  goto process;
+	} else {
+	  if (done_after_input) {
+	    done = true;
+	    continue;
+	  }
+
+	  if (target_block.Size() < target_iterator.BlockSize()) {
+	    encoding = false;
+	  } else {
+	    target_iterator.Next();
+	  }
+	  continue;
+	}
+
+      case XD3_WINFINISH:
+	if (encoding) {
+	  if (encode_stream.flags & XD3_FLUSH) {
+	    done_after_input = true;
+	  }
+	  encoding = false;
+	} else {
+	 CHECK_EQ(0, CmpDifferentBlockBytesAtOffset(decoded_block,
+						    target_file,
+						    verified_bytes));
+	 verified_bytes += decoded_block.Size();
+	 decoded_block.Reset();
+	 encoding = true;
+       }
+       goto process;
+
+     case XD3_WINSTART:
+     case XD3_GOTHEADER:
+       goto process;
+
+     default:
+       XPR(NTR "%s = %s %s\n", encoding ? "E " : " D",
+	   xd3_strerror(ret),
+	   msg == NULL ? "" : msg);
+
+       CHECK_EQ(0, ret);
+       CHECK_EQ(-1, ret);
+     }
+   }
+
+   CHECK_EQ(target_file.Size(), encoded_bytes);
+   CHECK_EQ(target_file.Size(), verified_bytes);
+   CHECK_EQ(0, xd3_close_stream(&decode_stream));
+   CHECK_EQ(0, xd3_close_stream(&encode_stream));
+   xd3_free_stream(&encode_stream);
+   xd3_free_stream(&decode_stream);
+ }
+
+  void MainEncodeDecode(const TmpFile &source_file,
+			const TmpFile &target_file,
+			ExtFile *coded_data,
+			const Options &options) {
+    vector<const char*> ecmd;
+    char bbuf[16];
+    snprintf(bbuf, sizeof(bbuf), "-B%" Q "u", options.encode_srcwin_maxsz);
+    ecmd.push_back("xdelta3");
+    ecmd.push_back(bbuf);
+    ecmd.push_back("-s");
+    ecmd.push_back(source_file.Name());
+    ecmd.push_back(target_file.Name());
+    ecmd.push_back(coded_data->Name());
+    ecmd.push_back(NULL);
+
+    CHECK_EQ(0, xd3_main_cmdline(ecmd.size() - 1,
+				 const_cast<char**>(&ecmd[0])));
+
+    vector<const char*> dcmd;
+    ExtFile recon_file;
+    dcmd.push_back("xdelta3");
+    ecmd.push_back(bbuf);
+    dcmd.push_back("-d");
+    dcmd.push_back("-s");
+    dcmd.push_back(source_file.Name());
+    dcmd.push_back(coded_data->Name());
+    dcmd.push_back(recon_file.Name());
+    dcmd.push_back(NULL);
+
+    CHECK_EQ(0, xd3_main_cmdline(dcmd.size() - 1,
+				 const_cast<char**>(&dcmd[0])));
+
+    CHECK_EQ(0, test_compare_files(recon_file.Name(),
+				   target_file.Name()));
+  }
+
+  // Similar to xd3_process_memory, with support for test Options.
+  // Exercises xd3_process_stream.
+  int TestProcessMemory (int            is_encode,
+			 int          (*func) (xd3_stream *),
+			 const uint8_t *input,
+			 usize_t        input_size,
+			 const uint8_t *source,
+			 usize_t        source_size,
+			 uint8_t       *output,
+			 usize_t       *output_size,
+			 usize_t        output_size_max,
+			 const Options &options) {
+    xd3_stream stream;
+    xd3_config config;
+    xd3_source src;
+    int ret;
+
+    memset (& stream, 0, sizeof (stream));
+    memset (& config, 0, sizeof (config));
+
+    if (is_encode)
+      {
+	config.winsize = input_size;
+	config.iopt_size = options.iopt_size;
+	config.sprevsz = xd3_pow2_roundup (config.winsize);
+      }
+
+    if ((ret = xd3_config_stream (&stream, &config)) != 0)
+      {
+	goto exit;
+      }
+
+    if (source != NULL)
+      {
+	memset (& src, 0, sizeof (src));
+
+	src.blksize = source_size;
+	src.onblk = source_size;
+	src.curblk = source;
+	src.curblkno = 0;
+	src.max_winsize = source_size;
+
+	if ((ret = xd3_set_source_and_size (&stream, &src, source_size)) != 0)
+	  {
+	    goto exit;
+	  }
+      }
+
+    if ((ret = xd3_process_stream (is_encode,
+				   & stream,
+				   func, 1,
+				   input, input_size,
+				   output,
+				   output_size,
+				   output_size_max)) != 0)
+      {
+	goto exit;
+      }
+
+  exit:
+    if (ret != 0)
+      {
+	IF_DEBUG2 (DP(RINT "test_process_memory: %d: %s\n", ret, stream.msg));
+      }
+    xd3_free_stream(&stream);
+    return ret;
+  }
+
+  void EncodeDecodeAPI(const FileSpec &spec0, const FileSpec &spec1, 
+		       Block *delta, const Options &options) {
+    Block from;
+    Block to;
+    spec0.Get(&from, 0, spec0.Size());
+    spec1.Get(&to, 0, spec1.Size());
+
+    delta->SetSize(to.Size() * 1.5);
+    usize_t out_size;
+    int enc_ret = TestProcessMemory(true,
+				    &xd3_encode_input,
+				    to.Data(),
+				    to.Size(),
+				    from.Data(),
+				    from.Size(),
+				    delta->Data(),
+				    &out_size,
+				    delta->Size(),
+				    options);
+    CHECK_EQ(0, enc_ret);
+    delta->SetSize(out_size);
+
+    Block recon;
+    recon.SetSize(to.Size());
+    usize_t recon_size;
+    int dec_ret = xd3_decode_memory(delta->Data(),
+				    delta->Size(),
+				    from.Data(),
+				    from.Size(),
+				    recon.Data(),
+				    &recon_size,
+				    recon.Size(),
+				    0);
+    CHECK_EQ(0, dec_ret);
+    CHECK_EQ(0, CmpDifferentBlockBytes(to, recon));
+  }
+
+//////////////////////////////////////////////////////////////////////
+
+void TestPrintf() {
+  char buf[64];
+  xoff_t x = XOFF_T_MAX;
+  snprintf_func (buf, sizeof(buf), "%" Q "u", x);
+  const char *expect = XD3_USE_LARGEFILE64 ?
+    "18446744073709551615" : "4294967295";
+  XD3_ASSERT(strcmp (buf, expect) == 0);
+}
+
+void TestRandomNumbers() {
+  MTRandom rand;
+  int rounds = 1<<20;
+  uint64_t usum = 0;
+  uint64_t esum = 0;
+
+  for (int i = 0; i < rounds; i++) {
+    usum += rand.Rand32();
+    esum += rand.ExpRand32(1024);
+  }
+
+  double allowed_error = 0.01;
+
+  uint32_t umean = usum / rounds;
+  uint32_t emean = esum / rounds;
+
+  uint32_t uexpect = UINT32_MAX / 2;
+  uint32_t eexpect = 1024;
+
+  if (umean < uexpect * (1.0 - allowed_error) ||
+      umean > uexpect * (1.0 + allowed_error)) {
+    XPR(NT "uniform mean error: %u != %u\n", umean, uexpect);
+    abort();
+  }
+
+  if (emean < eexpect * (1.0 - allowed_error) ||
+      emean > eexpect * (1.0 + allowed_error)) {
+    XPR(NT "exponential mean error: %u != %u\n", emean, eexpect);
+    abort();
+  }
+}
+
+void TestRandomFile() {
+  MTRandom rand1;
+  FileSpec spec1(&rand1);
+  BlockIterator bi(spec1);
+
+  spec1.GenerateFixedSize(0);
+  CHECK_EQ(0, spec1.Size());
+  CHECK_EQ(0, spec1.Segments());
+  CHECK_EQ(0, spec1.Blocks());
+  bi.SetBlock(0);
+  CHECK_EQ(0, bi.BytesOnBlock());
+
+  spec1.GenerateFixedSize(1);
+  CHECK_EQ(1, spec1.Size());
+  CHECK_EQ(1, spec1.Segments());
+  CHECK_EQ(1, spec1.Blocks());
+  bi.SetBlock(0);
+  CHECK_EQ(1, bi.BytesOnBlock());
+
+  spec1.GenerateFixedSize(Constants::BLOCK_SIZE);
+  CHECK_EQ(Constants::BLOCK_SIZE, spec1.Size());
+  CHECK_EQ(1, spec1.Segments());
+  CHECK_EQ(1, spec1.Blocks());
+  bi.SetBlock(0);
+  CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock());
+  bi.SetBlock(1);
+  CHECK_EQ(0, bi.BytesOnBlock());
+
+  spec1.GenerateFixedSize(Constants::BLOCK_SIZE + 1);
+  CHECK_EQ(Constants::BLOCK_SIZE + 1, spec1.Size());
+  CHECK_EQ(2, spec1.Segments());
+  CHECK_EQ(2, spec1.Blocks());
+  bi.SetBlock(0);
+  CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock());
+  bi.SetBlock(1);
+  CHECK_EQ(1, bi.BytesOnBlock());
+
+  spec1.GenerateFixedSize(Constants::BLOCK_SIZE * 2);
+  CHECK_EQ(Constants::BLOCK_SIZE * 2, spec1.Size());
+  CHECK_EQ(2, spec1.Segments());
+  CHECK_EQ(2, spec1.Blocks());
+  bi.SetBlock(0);
+  CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock());
+  bi.SetBlock(1);
+  CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock());
+}
+
+void TestFirstByte() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+
+  spec0.GenerateFixedSize(0);
+  spec1.GenerateFixedSize(1);
+  CHECK_EQ(0, CmpDifferentBytes(spec0, spec0));
+  CHECK_EQ(0, CmpDifferentBytes(spec1, spec1));
+  CHECK_EQ(1, CmpDifferentBytes(spec0, spec1));
+  CHECK_EQ(1, CmpDifferentBytes(spec1, spec0));
+
+  spec0.GenerateFixedSize(1);
+  spec0.ModifyTo(Modify1stByte(), &spec1);
+  CHECK_EQ(1, CmpDifferentBytes(spec0, spec1));
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE + 1);
+  spec0.ModifyTo(Modify1stByte(), &spec1);
+  CHECK_EQ(1, CmpDifferentBytes(spec0, spec1));
+
+  SizeIterator<size_t, Sizes> si(&rand, Constants::TEST_ROUNDS);
+
+  for (; !si.Done(); si.Next()) {
+    size_t size = si.Get();
+    if (size == 0) {
+      continue;
+    }
+    spec0.GenerateFixedSize(size);
+    spec0.ModifyTo(Modify1stByte(), &spec1);
+    InMemoryEncodeDecode(spec0, spec1, NULL, Options());
+  }
+}
+
+void TestModifyMutator() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3);
+
+  struct {
+    size_t size;
+    size_t addr;
+  } test_cases[] = {
+    { Constants::BLOCK_SIZE, 0 },
+    { Constants::BLOCK_SIZE / 2, 1 },
+    { Constants::BLOCK_SIZE, 1 },
+    { Constants::BLOCK_SIZE * 2, 1 },
+  };
+
+  for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+    ChangeList cl1;
+    cl1.push_back(Change(Change::MODIFY, test_cases[i].size,
+			 test_cases[i].addr));
+    spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+    CHECK_EQ(spec0.Size(), spec1.Size());
+
+    size_t diff = CmpDifferentBytes(spec0, spec1);
+    CHECK_LE(diff, test_cases[i].size);
+
+    // There is a 1/256 probability of the changed byte matching the
+    // original value.  The following allows double the probability to
+    // pass.
+    CHECK_GE(diff, test_cases[i].size - (2 * test_cases[i].size / 256));
+
+    InMemoryEncodeDecode(spec0, spec1, NULL, Options());
+  }
+}
+
+void TestAddMutator() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 2);
+  // TODO: fix this test (for all block sizes)!  it's broken because
+  // the same byte could be added?
+
+  struct {
+    size_t size;
+    size_t addr;
+    size_t expected_adds;
+  } test_cases[] = {
+    { 1, 0,                         2 /* 1st byte, last byte (short block) */ },
+    { 1, 1,                         3 /* 1st 2 bytes, last byte */ },
+    { 1, Constants::BLOCK_SIZE - 1, 2 /* changed, last */ },
+    { 1, Constants::BLOCK_SIZE,     2 /* changed, last */ },
+    { 1, Constants::BLOCK_SIZE + 1, 3 /* changed + 1st of 2nd block, last */ },
+    { 1, 2 * Constants::BLOCK_SIZE, 1 /* last byte */ },
+  };
+
+  for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+    ChangeList cl1;
+    cl1.push_back(Change(Change::ADD, test_cases[i].size, test_cases[i].addr));
+    spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+    CHECK_EQ(spec0.Size() + test_cases[i].size, spec1.Size());
+
+    Block coded;
+    InMemoryEncodeDecode(spec0, spec1, &coded, Options());
+
+    Delta delta(coded);
+    CHECK_EQ(test_cases[i].expected_adds,
+	     delta.AddedBytes());
+  }
+}
+
+void TestDeleteMutator() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 4);
+
+  struct {
+    size_t size;
+    size_t addr;
+  } test_cases[] = {
+    // Note: an entry { Constants::BLOCK_SIZE, 0 },
+    // does not work because the xd3_srcwin_move_point logic won't
+    // find a copy if it occurs >= double its size into the file.
+    { Constants::BLOCK_SIZE / 2, 0 },
+    { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2 },
+    { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE / 2 },
+    { Constants::BLOCK_SIZE * 2, Constants::BLOCK_SIZE * 3 / 2 },
+    { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE * 2 },
+  };
+
+  for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+    ChangeList cl1;
+    cl1.push_back(Change(Change::DELRANGE, test_cases[i].size,
+			 test_cases[i].addr));
+    spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+    CHECK_EQ(spec0.Size() - test_cases[i].size, spec1.Size());
+
+    Block coded;
+    InMemoryEncodeDecode(spec0, spec1, &coded, Options());
+
+    Delta delta(coded);
+    CHECK_EQ(0, delta.AddedBytes());
+  }
+}
+
+void TestCopyMutator() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3);
+
+  struct {
+    size_t size;
+    size_t from;
+    size_t to;
+  } test_cases[] = {
+    // Copy is difficult to write tests for because where Xdelta finds
+    // copies, it does not enter checksums.  So these tests copy data from
+    // later to earlier so that checksumming will start.
+    { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2, 0 },
+    { Constants::BLOCK_SIZE, 2 * Constants::BLOCK_SIZE,
+      Constants::BLOCK_SIZE, },
+  };
+
+  for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+    ChangeList cl1;
+    cl1.push_back(Change(Change::COPY, test_cases[i].size,
+			 test_cases[i].from, test_cases[i].to));
+    spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+    CHECK_EQ(spec0.Size() + test_cases[i].size, spec1.Size());
+
+    Block coded;
+    InMemoryEncodeDecode(spec0, spec1, &coded, Options());
+
+    Delta delta(coded);
+    CHECK_EQ(0, delta.AddedBytes());
+  }
+}
+
+void TestMoveMutator() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3);
+
+  struct {
+    size_t size;
+    size_t from;
+    size_t to;
+  } test_cases[] = {
+    // This is easier to test than Copy but has the same trouble as Delete.
+    { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2, 0 },
+    { Constants::BLOCK_SIZE / 2, 0, Constants::BLOCK_SIZE / 2 },
+    { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE, 2 *
+      Constants::BLOCK_SIZE },
+    { Constants::BLOCK_SIZE, 2 * Constants::BLOCK_SIZE,
+      Constants::BLOCK_SIZE },
+    { Constants::BLOCK_SIZE * 3 / 2, Constants::BLOCK_SIZE,
+      Constants::BLOCK_SIZE * 3 / 2 },
+
+    // This is a no-op
+    { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE * 2,
+      3 * Constants::BLOCK_SIZE },
+  };
+
+  for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+    ChangeList cl1;
+    cl1.push_back(Change(Change::MOVE, test_cases[i].size,
+			 test_cases[i].from, test_cases[i].to));
+    spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+    CHECK_EQ(spec0.Size(), spec1.Size());
+
+    Block coded;
+    InMemoryEncodeDecode(spec0, spec1, &coded, Options());
+
+    Delta delta(coded);
+    CHECK_EQ(0, delta.AddedBytes());
+  }
+}
+
+void TestOverwriteMutator() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE);
+
+  ChangeList cl1;
+  cl1.push_back(Change(Change::COPYOVER, 10, 0, 20));
+  spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+  CHECK_EQ(spec0.Size(), spec1.Size());
+
+  Block b0, b1;
+  BlockIterator(spec0).Get(&b0);
+  BlockIterator(spec1).Get(&b1);
+
+  CHECK(memcmp(b0.Data(), b1.Data() + 20, 10) == 0);
+  CHECK(memcmp(b0.Data(), b1.Data(), 20) == 0);
+  CHECK(memcmp(b0.Data() + 30, b1.Data() + 30,
+	       Constants::BLOCK_SIZE - 30) == 0);
+
+  xoff_t zero = 0;
+  cl1.clear();
+  cl1.push_back(Change(Change::COPYOVER, 10, 20, zero));
+  spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+  CHECK_EQ(spec0.Size(), spec1.Size());
+
+  BlockIterator(spec0).Get(&b0);
+  BlockIterator(spec1).Get(&b1);
+
+  CHECK(memcmp(b0.Data() + 20, b1.Data(), 10) == 0);
+  CHECK(memcmp(b0.Data() + 10, b1.Data() + 10,
+	       Constants::BLOCK_SIZE - 10) == 0);
+}
+
+// Note: this test is written to expose a problem, but the problem was
+// only exposed with BLOCK_SIZE = 128.
+void TestNonBlocking() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+  FileSpec spec2(&rand);
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3);
+
+  // This is a lazy target match
+  Change ct(Change::COPYOVER, 22,
+	    Constants::BLOCK_SIZE + 50,
+	    Constants::BLOCK_SIZE + 20);
+
+  // This is a source match just after the block boundary, shorter
+  // than the lazy target match.
+  Change cs1(Change::COPYOVER, 16,
+	     Constants::BLOCK_SIZE + 51,
+	     Constants::BLOCK_SIZE - 1);
+
+  // This overwrites the original source bytes.
+  Change cs2(Change::MODIFY, 108,
+	     Constants::BLOCK_SIZE + 20);
+
+  // This changes the first blocks
+  Change c1st(Change::MODIFY, Constants::BLOCK_SIZE - 2, 0);
+
+  ChangeList csl;
+  csl.push_back(cs1);
+  csl.push_back(cs2);
+  csl.push_back(c1st);
+
+  spec0.ModifyTo(ChangeListMutator(csl), &spec1);
+
+  ChangeList ctl;
+  ctl.push_back(ct);
+  ctl.push_back(c1st);
+
+  spec0.ModifyTo(ChangeListMutator(ctl), &spec2);
+
+  InMemoryEncodeDecode(spec1, spec2, NULL, Options());
+}
+
+void TestEmptyInMemory() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+  Block block;
+
+  spec0.GenerateFixedSize(0);
+  spec1.GenerateFixedSize(0);
+
+  InMemoryEncodeDecode(spec0, spec1, &block, Options());
+
+  Delta delta(block);
+  CHECK_LT(0, block.Size());
+  CHECK_EQ(1, delta.Windows());
+}
+
+void TestBlockInMemory() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+  Block block;
+
+  spec0.GenerateFixedSize(Constants::BLOCK_SIZE);
+  spec1.GenerateFixedSize(Constants::BLOCK_SIZE);
+
+  InMemoryEncodeDecode(spec0, spec1, &block, Options());
+
+  Delta delta(block);
+  CHECK_EQ(spec1.Blocks(Constants::WINDOW_SIZE), delta.Windows());
+}
+
+void TestSmallStride() {
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  usize_t size = Constants::BLOCK_SIZE * 4;
+  spec0.GenerateFixedSize(size);
+
+  // Note: Not very good performance due to hash collisions, note 3x
+  // multiplier below.
+  for (int s = 15; s < 101; s++) {
+    usize_t changes = 0;
+    ChangeList cl;
+    for (usize_t j = s; j < size; j += s, ++changes)
+      {
+	cl.push_back(Change(Change::MODIFY, 1, j));
+      }
+
+    FileSpec spec1(&rand);
+    spec0.ModifyTo(ChangeListMutator(cl), &spec1);
+
+    Options options;
+    options.encode_srcwin_maxsz = size;
+    options.iopt_size = 128;
+    options.smatch_cfg = XD3_SMATCH_SLOW;
+    options.size_known = false;
+
+    Block block;
+    InMemoryEncodeDecode(spec0, spec1, &block, options);
+    Delta delta(block);
+
+    IF_DEBUG1(DP(RINT "[stride=%d] changes=%" W "u adds=%" Q "u\n",
+		 s, changes, delta.AddedBytes()));
+    double allowance = Constants::BLOCK_SIZE < 8192 || s < 30 ? 3.0 : 1.1;
+    CHECK_GE(allowance * changes, (double)delta.AddedBytes());
+  }
+}
+
+void TestCopyWindow() {
+  // Construct an input that has many copies, to fill the IOPT buffer
+  // and force a source window decision.  "srclen" may be set to a
+  // value that goes beyond the end-of-source.
+  const int clen = 16;
+  const int size = 4096;
+  const int nmov = size / clen;
+  const int iters = 16;
+  uint32_t added_01 = 0;
+  uint32_t added_10 = 0;
+  for (int i = 1; i <= iters; i++) {
+    MTRandom rand(MTRandom::TEST_SEED1 * i);
+    FileSpec spec0(&rand);
+    ChangeList cl;
+
+    spec0.GenerateFixedSize(size);
+
+    for (int j = 0; j < nmov; j += 2)
+      {
+	cl.push_back(Change(Change::MOVE,
+			    clen, (j + 1) * clen, j * clen));
+      }
+
+    FileSpec spec1(&rand);
+    spec0.ModifyTo(ChangeListMutator(cl), &spec1);
+
+    Options options;
+    options.encode_srcwin_maxsz = size;
+    options.iopt_size = 128;
+    options.smatch_cfg = XD3_SMATCH_SLOW;
+
+    Block block1;
+    InMemoryEncodeDecode(spec0, spec1, &block1, options);
+    Delta delta1(block1);
+    // Allow one missed window (e.g., hash collisions)
+    added_01 += delta1.AddedBytes();
+
+    Block block2;
+    InMemoryEncodeDecode(spec1, spec0, &block2, options);
+    Delta delta2(block2);
+    // Allow one missed window (e.g., hash collisions)
+    added_10 += delta2.AddedBytes();
+
+    Block block3;
+    Block block4;
+    EncodeDecodeAPI(spec0, spec1, &block3, options);
+    EncodeDecodeAPI(spec1, spec0, &block4, options);
+  }
+  // Average less than 0.5 misses (of length clen) per iteration.
+  CHECK_GE(clen * iters / 2, added_01);
+  CHECK_GE(clen * iters / 2, added_10);
+}
+
+void TestCopyFromEnd() {
+  // Copies from the end of the source buffer, which reach a block
+  // boundary end-of-file.
+  const int size = 4096;
+  const int clen = 16;
+  const int nmov = (size / 2) / clen;
+  const int iters = 16;
+  uint32_t added_01 = 0;
+  uint32_t added_10 = 0;
+  for (int i = 1; i <= iters; i++) {
+    MTRandom rand(MTRandom::TEST_SEED1 * i);
+    FileSpec spec0(&rand);
+    ChangeList cl;
+
+    spec0.GenerateFixedSize(size);
+
+    cl.push_back(Change(Change::MODIFY, 2012, 2048));
+
+    for (int j = 0; j < nmov; j += 2)
+      {
+	cl.push_back(Change(Change::MOVE,
+			    clen, (j + 1) * clen, j * clen));
+      }
+
+    cl.push_back(Change(Change::COPYOVER, 28, 4068, 3000));
+    cl.push_back(Change(Change::COPYOVER, 30, 4066, 3100));
+    cl.push_back(Change(Change::COPYOVER, 32, 4064, 3200));
+
+    FileSpec spec1(&rand);
+    spec0.ModifyTo(ChangeListMutator(cl), &spec1);
+
+    Options options;
+    options.encode_srcwin_maxsz = size;
+    options.iopt_size = 128;
+    options.smatch_cfg = XD3_SMATCH_SLOW;
+
+    Block block1;
+    InMemoryEncodeDecode(spec0, spec1, &block1, options);
+    Delta delta1(block1);
+    added_01 += delta1.AddedBytes();
+
+    Block block2;
+    InMemoryEncodeDecode(spec1, spec0, &block2, options);
+    Delta delta2(block2);
+    added_10 += delta2.AddedBytes();
+
+    Block block3;
+    Block block4;
+    EncodeDecodeAPI(spec0, spec1, &block3, options);
+    EncodeDecodeAPI(spec1, spec0, &block4, options);
+  }
+  CHECK_GE(2000 * iters, added_01);
+  CHECK_LE(2000 * iters, added_10);
+}
+
+void TestHalfBlockCopy() {
+  // Create a half-block copy, 7.5 blocks apart, in a pair of files:
+  //       0             1     ...     6             7
+  // spec0 [bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb][ccccc][bbbb_]
+  // spec1 [aaaaa][ccccc][aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_]
+  // where stage=
+  // 0: the final block is full
+  //   a. (source)spec1->(target)spec0 copies block C: reads 8 source
+  //      blocks during target block 0.
+  //   b. (source)spec0->(target)spec1 does not copy block C b/c attempt
+  //      to read past EOF empties block 0 from (virtual) block cache
+  // 1: the final block is less than full.
+  //   a. (same) copies block C
+  //   b. (same) copies block C, unlike 0a, no attempt to read past EOF
+  //
+  // "virtual" above refers to XD3_TOOFARBACK, since there is no caching
+  // in the API, there is simply a promise not to request blocks that are
+  // beyond source->max_winsize from the last known source file position.
+  for (int stage = 0; stage < 2; stage++)
+    {
+      IF_DEBUG1 (DP(RINT "half_block_copy stage %d\n", stage));
+
+      MTRandom rand;
+      FileSpec spec0(&rand);
+      FileSpec spec1(&rand);
+
+      spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 8 - stage);
+
+      ChangeList cl1;
+      cl1.push_back(Change(Change::MODIFY,
+			   Constants::BLOCK_SIZE / 2,  // size
+			   0));
+      cl1.push_back(Change(Change::COPYOVER,
+			   Constants::BLOCK_SIZE / 2,  // size
+			   Constants::BLOCK_SIZE * 7,  // offset
+			   Constants::BLOCK_SIZE / 2));
+      cl1.push_back(Change(Change::MODIFY,
+			   Constants::BLOCK_SIZE * 7,
+			   Constants::BLOCK_SIZE - stage));
+      spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+
+      Options options;
+      options.encode_srcwin_maxsz = Constants::BLOCK_SIZE * 8;
+
+      Block block0;
+      Block block1;
+      InMemoryEncodeDecode(spec0, spec1, &block0, options);
+      InMemoryEncodeDecode(spec1, spec0, &block1, options);
+
+      Delta delta0(block0);
+      Delta delta1(block1);
+
+      const int yes =
+	Constants::BLOCK_SIZE * 8 - Constants::BLOCK_SIZE / 2;
+      const int no =
+	Constants::BLOCK_SIZE * 8 - Constants::BLOCK_SIZE / 2;
+
+      if (stage == 0)
+	{
+	  CHECK_EQ(yes, delta0.AddedBytes());
+	  CHECK_EQ(no, delta1.AddedBytes());
+	}
+      else
+	{
+	  CHECK_EQ(yes, delta0.AddedBytes());
+	  CHECK_EQ(yes, delta1.AddedBytes());
+	}
+    }
+}
+
+void FourWayMergeTest(const FileSpec &spec0,
+		      const FileSpec &spec1,
+		      const FileSpec &spec2,
+		      const FileSpec &spec3) {
+  TmpFile f0, f1, f2, f3;
+  ExtFile d01, d12, d23;
+  Options options;
+  options.encode_srcwin_maxsz =
+    std::max(spec0.Size(), options.encode_srcwin_maxsz);
+
+  spec0.WriteTmpFile(&f0);
+  spec1.WriteTmpFile(&f1);
+  spec2.WriteTmpFile(&f2);
+  spec3.WriteTmpFile(&f3);
+
+  MainEncodeDecode(f0, f1, &d01, options);
+  MainEncodeDecode(f1, f2, &d12, options);
+  MainEncodeDecode(f2, f3, &d23, options);
+
+  // Merge 2
+  ExtFile out;
+  vector<const char*> mcmd;
+  mcmd.push_back("xdelta3");
+  mcmd.push_back("merge");
+  mcmd.push_back("-m");
+  mcmd.push_back(d01.Name());
+  mcmd.push_back(d12.Name());
+  mcmd.push_back(out.Name());
+  mcmd.push_back(NULL);
+
+  // XPR(NTR "Running one merge: %s\n", CommandToString(mcmd).c_str());
+  CHECK_EQ(0, xd3_main_cmdline(mcmd.size() - 1,
+			       const_cast<char**>(&mcmd[0])));
+
+  ExtFile recon;
+  vector<const char*> tcmd;
+  tcmd.push_back("xdelta3");
+  tcmd.push_back("-d");
+  tcmd.push_back("-s");
+  tcmd.push_back(f0.Name());
+  tcmd.push_back(out.Name());
+  tcmd.push_back(recon.Name());
+  tcmd.push_back(NULL);
+
+  // XPR(NTR "Running one recon! %s\n", CommandToString(tcmd).c_str());
+  CHECK_EQ(0, xd3_main_cmdline(tcmd.size() - 1,
+			       const_cast<char**>(&tcmd[0])));
+  // XPR(NTR "Should equal! %s\n", f2.Name());
+
+  CHECK(recon.EqualsSpec(spec2));
+
+  // Merge 3
+  ExtFile out3;
+  vector<const char*> mcmd3;
+  mcmd3.push_back("xdelta3");
+  mcmd3.push_back("merge");
+  mcmd3.push_back("-m");
+  mcmd3.push_back(d01.Name());
+  mcmd3.push_back("-m");
+  mcmd3.push_back(d12.Name());
+  mcmd3.push_back(d23.Name());
+  mcmd3.push_back(out3.Name());
+  mcmd3.push_back(NULL);
+
+  // XPR(NTR "Running one 3-merge: %s\n", CommandToString(mcmd3).c_str());
+  CHECK_EQ(0, xd3_main_cmdline(mcmd3.size() - 1,
+			       const_cast<char**>(&mcmd3[0])));
+
+  ExtFile recon3;
+  vector<const char*> tcmd3;
+  tcmd3.push_back("xdelta3");
+  tcmd3.push_back("-d");
+  tcmd3.push_back("-s");
+  tcmd3.push_back(f0.Name());
+  tcmd3.push_back(out3.Name());
+  tcmd3.push_back(recon3.Name());
+  tcmd3.push_back(NULL);
+
+  // XPR(NTR "Running one 3-recon %s\n", CommandToString(tcmd3).c_str());
+  CHECK_EQ(0, xd3_main_cmdline(tcmd3.size() - 1,
+			       const_cast<char**>(&tcmd3[0])));
+  // XPR(NTR "Should equal %s\n", f3.Name());
+
+  CHECK(recon3.EqualsSpec(spec3));
+}
+
+void TestMergeCommand1() {
+  /* Repeat random-input testing for a number of iterations.
+   * Test 2, 3, and 4-file scenarios (i.e., 1, 2, and 3-delta merges). */
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+  FileSpec spec2(&rand);
+  FileSpec spec3(&rand);
+
+  SizeIterator<size_t, Sizes> si0(&rand, 10);
+
+  for (; !si0.Done(); si0.Next()) {
+    size_t size0 = si0.Get();
+
+    SizeIterator<size_t, Sizes> si1(&rand, 10);
+    for (; !si1.Done(); si1.Next()) {
+      size_t change1 = si1.Get();
+
+      if (change1 == 0) {
+	continue;
+      }
+
+      // XPR(NTR "S0 = %lu\n", size0);
+      // XPR(NTR "C1 = %lu\n", change1);
+      // XPR(NTR ".");
+
+      size_t add1_pos = size0 ? rand.Rand32() % size0 : 0;
+      size_t del2_pos = size0 ? rand.Rand32() % size0 : 0;
+
+      spec0.GenerateFixedSize(size0);
+
+      ChangeList cl1, cl2, cl3;
+
+      size_t change3 = change1;
+      size_t change3_pos;
+
+      if (change3 >= size0) {
+	change3 = size0;
+	change3_pos = 0;
+      } else {
+	change3_pos = rand.Rand32() % (size0 - change3);
+      }
+
+      cl1.push_back(Change(Change::ADD, change1, add1_pos));
+      cl2.push_back(Change(Change::DELRANGE, change1, del2_pos));
+      cl3.push_back(Change(Change::MODIFY, change3, change3_pos));
+
+      spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+      spec1.ModifyTo(ChangeListMutator(cl2), &spec2);
+      spec2.ModifyTo(ChangeListMutator(cl3), &spec3);
+
+      FourWayMergeTest(spec0, spec1, spec2, spec3);
+      FourWayMergeTest(spec3, spec2, spec1, spec0);
+    }
+  }
+}
+
+void TestMergeCommand2() {
+  /* Same as above, different mutation pattern. */
+  /* TODO: run this with large sizes too */
+  /* TODO: run this with small sizes too */
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+  FileSpec spec2(&rand);
+  FileSpec spec3(&rand);
+
+  SizeIterator<size_t, Sizes> si0(&rand, 10);
+  for (; !si0.Done(); si0.Next()) {
+    size_t size0 = si0.Get();
+
+    SizeIterator<size_t, Sizes> si1(&rand, 10);
+    for (; !si1.Done(); si1.Next()) {
+      size_t size1 = si1.Get();
+
+      SizeIterator<size_t, Sizes> si2(&rand, 10);
+      for (; !si2.Done(); si2.Next()) {
+	size_t size2 = si2.Get();
+
+	SizeIterator<size_t, Sizes> si3(&rand, 10);
+	for (; !si3.Done(); si3.Next()) {
+	  size_t size3 = si3.Get();
+
+	  // We're only interested in three sizes, strictly decreasing. */
+	  if (size3 >= size2 || size2 >= size1 || size1 >= size0) {
+	    continue;
+	  }
+
+	  // XPR(NTR "S0 = %lu\n", size0);
+	  // XPR(NTR "S1 = %lu\n", size1);
+	  // XPR(NTR "S2 = %lu\n", size2);
+	  // XPR(NTR "S3 = %lu\n", size3);
+	  // XPR(NTR ".");
+
+	  spec0.GenerateFixedSize(size0);
+
+	  ChangeList cl1, cl2, cl3;
+
+	  cl1.push_back(Change(Change::DELRANGE, size0 - size1, 0));
+	  cl2.push_back(Change(Change::DELRANGE, size0 - size2, 0));
+	  cl3.push_back(Change(Change::DELRANGE, size0 - size3, 0));
+
+	  spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+	  spec0.ModifyTo(ChangeListMutator(cl2), &spec2);
+	  spec0.ModifyTo(ChangeListMutator(cl3), &spec3);
+
+	  FourWayMergeTest(spec0, spec1, spec2, spec3);
+	  FourWayMergeTest(spec3, spec2, spec1, spec0);
+	}
+      }
+    }
+  }
+}
+
+void TestLastFrontierBlock() {
+  // This test constructs an input that can expose
+  // https://github.com/jmacd/xdelta/issues/188
+  // when run through the command-line with source via a FIFO.
+  // That is not tested here, but the test stays.
+  if (Constants::WINDOW_SIZE < XD3_ALLOCSIZE)
+    {
+      return;
+    }
+
+  MTRandom rand;
+  FileSpec spec0(&rand);
+  FileSpec spec1(&rand);
+  const xoff_t size = XD3_ALLOCSIZE * 64;  // == XD3_MINSRCWINSZ * 2
+  const xoff_t edit = XD3_ALLOCSIZE;
+
+  Options options;
+  options.encode_srcwin_maxsz = XD3_MINSRCWINSZ;
+  options.block_size = XD3_ALLOCSIZE;
+  options.window_size = XD3_MINSRCWINSZ;
+  options.size_known = false;
+
+  spec0.GenerateFixedSize(size);
+
+  ChangeList cl;
+
+  // Modify the 0th byte in order to induce indexing of subsequent
+  // bytes, but allow copying most of the file to keep the test fast.
+  cl.push_back(Change(Change::MODIFY, 1, edit * 31));
+  cl.push_back(Change(Change::COPYOVER, edit, edit * 31, edit * 63));
+
+  spec0.ModifyTo(ChangeListMutator(cl), &spec1);
+
+  Block noblock;
+  InMemoryEncodeDecode(spec0, spec1, &noblock, options);
+  InMemoryEncodeDecode(spec1, spec0, &noblock, options);
+}
+
+};  // class Regtest<Constants>
+
+#define TEST(x) XPR(NTR #x "...\n"); regtest.x()
+
+// These tests are primarily tests of the testing framework itself.
+template <class T>
+void UnitTest() {
+  Regtest<T> regtest;
+  TEST(TestPrintf);
+  TEST(TestRandomNumbers);
+  TEST(TestRandomFile);
+  TEST(TestFirstByte);
+  TEST(TestModifyMutator);
+  TEST(TestAddMutator);
+  TEST(TestDeleteMutator);
+  TEST(TestCopyMutator);
+  TEST(TestMoveMutator);
+  TEST(TestOverwriteMutator);
+}
+
+// These are Xdelta tests.
+template <class T>
+void MainTest() {
+  XPR(NT "Blocksize %" Q "u windowsize %" Z "u\n",
+      T::BLOCK_SIZE, T::WINDOW_SIZE);
+  Regtest<T> regtest;
+  TEST(TestEmptyInMemory);
+  TEST(TestBlockInMemory);
+  TEST(TestSmallStride);
+  TEST(TestCopyWindow);
+  TEST(TestCopyFromEnd);
+  TEST(TestNonBlocking);
+  TEST(TestHalfBlockCopy);
+  TEST(TestLastFrontierBlock);
+  TEST(TestMergeCommand1);
+  TEST(TestMergeCommand2);
+}
+
+#undef TEST
+
+int main(int argc, char **argv)
+{
+  vector<const char*> mcmd;
+  string pn;
+  const char *sp = strrchr(argv[0], '/');
+  if (sp != NULL) {
+    pn.append(argv[0], sp - argv[0] + 1);
+  }
+  pn.append("xdelta3");
+  mcmd.push_back(pn.c_str());
+  mcmd.push_back("test");
+  mcmd.push_back(NULL);
+
+  UnitTest<SmallBlock>();
+  MainTest<SmallBlock>();
+  MainTest<MixedBlock>();
+  MainTest<OversizeBlock>();
+  MainTest<LargeBlock>();
+
+  CHECK_EQ(0, xd3_main_cmdline(mcmd.size() - 1,
+  			       const_cast<char**>(&mcmd[0])));
+
+  return 0;
+}
+
diff --git a/third-party/xdelta3/xdelta3/testing/regtest_c.c b/third-party/xdelta3/xdelta3/testing/regtest_c.c
new file mode 100644
index 0000000000..42e32ce80c
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/regtest_c.c
@@ -0,0 +1,17 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#include "../xdelta3.c"
diff --git a/third-party/xdelta3/xdelta3/testing/run_release.sh b/third-party/xdelta3/xdelta3/testing/run_release.sh
new file mode 100644
index 0000000000..85ed1f7449
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/run_release.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+(cd .. && ./run_release.sh)
diff --git a/third-party/xdelta3/xdelta3/testing/segment.h b/third-party/xdelta3/xdelta3/testing/segment.h
new file mode 100644
index 0000000000..a242ad8596
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/segment.h
@@ -0,0 +1,112 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+class Segment {
+ public:
+  Segment(size_t size, MTRandom *rand)
+    : size_(size),
+      seed_(rand->Rand32()),
+      seed_offset_(0),
+      data_(NULL) {
+    CHECK_GT(size_, 0);
+  }
+
+  Segment(size_t size, uint32_t seed)
+    : size_(size),
+      seed_(seed),
+      seed_offset_(0),
+      data_(NULL) {
+    CHECK_GT(size_, 0);
+  }
+
+  Segment(size_t size, uint8_t *data)
+    : size_(size),
+      seed_(0),
+      seed_offset_(0),
+      data_(data) {
+    CHECK_GT(size_, 0);
+  }
+
+  size_t Size() const {
+    return size_;
+  }
+
+  Segment Subseg(size_t start, size_t size) const {
+    CHECK_LE(start + size, size_);
+    if (data_) {
+      return Segment(size, data_ + start);
+    } else {
+      return Segment(size, seed_, seed_offset_ + start);
+    }
+  }
+
+  void Fill(size_t seg_offset, size_t size, uint8_t *data) const {
+    CHECK_LE(seg_offset + size, size_);
+    if (data_) {
+      memcpy(data, data_ + seg_offset, size);
+    } else {
+      size_t skip = seg_offset + seed_offset_;
+      MTRandom gen(seed_);
+      MTRandom8 gen8(&gen);
+      while (skip--) {
+	gen8.Rand8();
+      }
+      for (size_t i = 0; i < size; i++) {
+	data[i] = gen8.Rand8();
+      }
+    }
+  }
+
+  string ToString() const {
+    string r;
+    if (data_) {
+      for (size_t i = 0; i < size_; i++) {
+	char buf[10];
+	sprintf(buf, "%02x ", data_[i]);
+	r.append(buf);
+      }
+    } else {
+      char buf[256];
+      sprintf(buf, "size=%ld,seed=%ud,skip=%ld", size_, seed_, seed_offset_);
+      r.append(buf);
+    }
+    return r;
+  }
+
+private:
+  // Used by Subseg()
+  Segment(size_t size, uint32_t seed, size_t seed_offset)
+    : size_(size),
+      seed_(seed),
+      seed_offset_(seed_offset),
+      data_(NULL) {
+    CHECK_GT(size_, 0);
+  }
+
+  size_t size_;  // Size of this segment
+
+  // For random segments
+  uint32_t seed_;  // Seed used for generating byte sequence
+  size_t seed_offset_;  // Seed positions the sequence this many bytes
+                        // before its beginning.
+
+  // For literal segments (data is not owned)
+  uint8_t *data_;
+};
+
+typedef map<xoff_t, Segment> SegmentMap;
+typedef typename SegmentMap::const_iterator ConstSegmentMapIterator;
+typedef typename SegmentMap::iterator SegmentMapIterator;
diff --git a/third-party/xdelta3/xdelta3/testing/sizes.h b/third-party/xdelta3/xdelta3/testing/sizes.h
new file mode 100644
index 0000000000..637208bd45
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/sizes.h
@@ -0,0 +1,126 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+template <typename T, typename U>
+class SizeIterator {
+ public:
+  SizeIterator(MTRandom *rand, size_t howmany)
+    : rand_(rand),
+      count_(0),
+      fixed_(U::sizes),
+      fixed_size_(SIZEOF_ARRAY(U::sizes)),
+      howmany_(howmany) { }
+
+  T Get() {
+    if (count_ < fixed_size_) {
+      return fixed_[count_];
+    }
+    return rand_->Rand<T>() % U::max_value;
+  }
+
+  bool Done() {
+    return count_ >= fixed_size_ && count_ >= howmany_;
+  }
+
+  void Next() {
+    count_++;
+  }
+
+ private:
+  MTRandom *rand_;
+  size_t count_;
+  T* fixed_;
+  size_t fixed_size_;
+  size_t howmany_;
+};
+
+// Small sizes
+class SmallSizes {
+public:
+  static size_t sizes[];
+  static size_t max_value;
+};
+
+size_t SmallSizes::sizes[] = {
+  0, 1, 128 / 4, 3333, 
+  128 - (128 / 3),
+  128,
+  128 + (128 / 3),
+  2 * 128 - (128 / 3),
+  2 * 128,
+  2 * 128 + (128 / 3),
+};
+
+size_t SmallSizes::max_value = 128 * 3;
+
+// Large sizes
+class LargeSizes {
+public:
+  static size_t sizes[];
+  static size_t max_value;
+};
+
+size_t LargeSizes::sizes[] = {
+  1 << 20,
+  1 << 18,
+  1 << 16,
+};
+
+size_t LargeSizes::max_value = 1<<20;
+
+// Base constants
+struct BaseConstants {
+  static const size_t TEST_ROUNDS;
+};
+
+const size_t BaseConstants::TEST_ROUNDS = 10;
+
+// Regtest<> arguments
+struct SmallBlock : public BaseConstants {
+  static const xoff_t BLOCK_SIZE;
+  static const size_t WINDOW_SIZE;
+  typedef SmallSizes Sizes;
+};
+
+const xoff_t SmallBlock::BLOCK_SIZE = 1<<7;
+const size_t SmallBlock::WINDOW_SIZE = 1<<7;
+
+struct LargeBlock : public BaseConstants {
+  static const xoff_t BLOCK_SIZE;
+  static const size_t WINDOW_SIZE;
+  typedef LargeSizes Sizes;
+};
+
+const xoff_t LargeBlock::BLOCK_SIZE = (1 << 13);
+const size_t LargeBlock::WINDOW_SIZE = (1 << 13);
+
+struct MixedBlock : public BaseConstants {
+  static const xoff_t BLOCK_SIZE;
+  static const size_t WINDOW_SIZE;
+  typedef SmallSizes Sizes;
+};
+
+const xoff_t MixedBlock::BLOCK_SIZE = 1<<7;
+const size_t MixedBlock::WINDOW_SIZE = 1<<8;
+
+struct OversizeBlock : public BaseConstants {
+  static const xoff_t BLOCK_SIZE;
+  static const size_t WINDOW_SIZE;
+  typedef SmallSizes Sizes;
+};
+
+const xoff_t OversizeBlock::BLOCK_SIZE = 1<<8;
+const size_t OversizeBlock::WINDOW_SIZE = 1<<7;
diff --git a/third-party/xdelta3/xdelta3/testing/test.h b/third-party/xdelta3/xdelta3/testing/test.h
new file mode 100644
index 0000000000..628fb75842
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/test.h
@@ -0,0 +1,84 @@
+/* xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+extern "C" {
+#include "../xdelta3.h"
+#include "../xdelta3-internal.h"
+}
+
+#include <unistd.h>
+#include <math.h>
+#include <string>
+
+#define CHECK_EQ(x,y) CHECK_OP(x,y,==)
+#define CHECK_NE(x,y) CHECK_OP(x,y,!=)
+#define CHECK_LT(x,y) CHECK_OP(x,y,<)
+#define CHECK_GT(x,y) CHECK_OP(x,y,>)
+#define CHECK_LE(x,y) CHECK_OP(x,y,<=)
+#define CHECK_GE(x,y) CHECK_OP(x,y,>=)
+
+#define CHECK_OP(x,y,OP) \
+  do { \
+    __typeof__(x) _x(x); \
+    __typeof__(x) _y(y); \
+    if (!(_x OP _y)) { \
+      cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x " " #OP " " #y << endl; \
+      cerr << __FILE__ << ":" << __LINE__ << " {0} " << _x << endl; \
+      cerr << __FILE__ << ":" << __LINE__ << " {1} " << _y << endl; \
+    abort(); \
+    } } while (false)
+#undef CHECK
+#define CHECK(x) \
+  do {if (!(x)) {				       \
+  cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x << endl; \
+  abort(); \
+    } } while (false)
+
+#define DCHECK(x)
+
+using std::string;
+
+#include <vector>
+using std::vector;
+
+inline string CommandToString(const vector<const char*> &v) {
+  string s(v[0]);
+  for (size_t i = 1; i < v.size() && v[i] != NULL; i++) {
+    s.append(" ");
+    s.append(v[i]);
+  }
+  return s;
+}
+
+#include <iostream>
+using std::cerr;
+using std::endl;
+using std::ostream;
+
+#include <map> 
+using std::map;
+using std::pair;
+
+#include <list>
+using std::list;
+
+template <typename T, typename U>
+pair<T, U> make_pair(const T& t, const U& u) {
+  return pair<T, U>(t, u);
+}
+
+using std::min;
+using std::max;
diff --git a/third-party/xdelta3/xdelta3/testing/xdelta3-regtest.py b/third-party/xdelta3/xdelta3/testing/xdelta3-regtest.py
new file mode 100644
index 0000000000..aa54c4624a
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/xdelta3-regtest.py
@@ -0,0 +1,1264 @@
+#!/usr/bin/python2.7
+# xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+# Copyright 2016 Joshua MacDonald
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO This code is no longer maintained :(
+
+import os, sys, math, re, time, types, array, random
+import xdelta3
+
+RCSDIR = '/tmp/rcs'
+SAMPLEDIR = "/tmp/diff"
+
+#
+MIN_SIZE       = 0
+
+TIME_TOO_SHORT = 0.050
+
+SKIP_TRIALS    = 2
+MIN_TRIALS     = 3
+MAX_TRIALS     = 15
+
+# 10 = fast 1.5 = slow
+MIN_STDDEV_PCT = 1.5
+
+# How many results per round
+MAX_RESULTS = 500
+TEST_ROUNDS = 10
+KEEP_P = (0.5)
+
+# For RCS testing, what percent to select
+FILE_P = (0.50)
+
+# For run-speed tests
+MIN_RUN = 1000 * 1000 * 1
+MAX_RUN = 1000 * 1000 * 10
+
+# Testwide defaults
+ALL_ARGS = [
+    '-q'  # '-vv'
+    ]
+
+# The first 7 args go to -C
+SOFT_CONFIG_CNT = 7
+
+CONFIG_ORDER = [ 'large_look',
+                 'large_step',
+                 'small_look',
+                 'small_chain',
+                 'small_lchain',
+                 'max_lazy',
+                 'long_enough',
+
+                 # > SOFT_CONFIG_CNT
+                 'nocompress',
+                 'winsize',
+                 'srcwinsize',
+                 'sprevsz',
+                 'iopt',
+                 'djw',
+                 'altcode',
+                 ]
+
+CONFIG_ARGMAP = {
+    'winsize'    : '-W',
+    'srcwinsize' : '-B',
+    'sprevsz'    : '-P',
+    'iopt'       : '-I',
+    'nocompress' : '-N',
+    'djw'        : '-Sdjw',
+    'altcode'    : '-T',
+    }
+
+def INPUT_SPEC(rand):
+    return {
+
+    # Time/space costs:
+
+    # -C 1,2,3,4,5,6,7
+    'large_look' : lambda d: rand.choice([9, 10, 11, 12]),
+    'large_step' : lambda d: rand.choice([25, 26, 27, 28, 29, 30]),
+    'small_look'   : lambda d: rand.choice([4]),
+    'small_chain'  : lambda d: rand.choice([1]),
+    'small_lchain' : lambda d: rand.choice([1]),
+    'max_lazy'     : lambda d: rand.choice([4, 5, 6, 7, 8, 9, 10 ]),
+
+    # Note: long_enough only refers to small matching and has no effect if
+    # small_chain == 1.
+    'long_enough'  : lambda d: rand.choice([4]),
+
+    # -N
+    'nocompress'   : lambda d: rand.choice(['false']),
+
+    # -T
+    'altcode'      : lambda d: rand.choice(['false']),
+
+    # -S djw
+    'djw'          : lambda d: rand.choice(['false']),
+
+    # Memory costs:
+
+    # -W
+    'winsize'      : lambda d: 8 * (1<<20),
+
+    # -B
+    'srcwinsize'   : lambda d: 64 * (1<<20),
+
+    # -I 0 is unlimited
+    'iopt'         : lambda d: 0,
+
+    # -P only powers of two
+    'sprevsz'      : lambda d: rand.choice([x * (1<<16) for x in [4]]),
+  }
+#end
+
+#
+TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
+
+RUNFILE = os.path.join(TMPDIR, 'run')
+DFILE   = os.path.join(TMPDIR, 'output')
+RFILE   = os.path.join(TMPDIR, 'recon')
+CMPTMP1 = os.path.join(TMPDIR, 'cmptmp1')
+CMPTMP2 = os.path.join(TMPDIR, 'cmptmp2')
+
+HEAD_STATE = 0
+BAR_STATE  = 1
+REV_STATE  = 2
+DATE_STATE = 3
+
+#
+IGNORE_FILENAME  = re.compile('.*\\.(gif|jpg).*')
+
+# rcs output
+RE_TOTREV  = re.compile('total revisions: (\\d+)')
+RE_BAR     = re.compile('----------------------------')
+RE_REV     = re.compile('revision (.+)')
+RE_DATE    = re.compile('date: ([^;]+);.*')
+# xdelta output
+RE_HDRSZ   = re.compile('VCDIFF header size: +(\\d+)')
+RE_EXTCOMP = re.compile('XDELTA ext comp.*')
+
+def c2str(c):
+    return ' '.join(['%s' % x for x in c])
+#end
+
+def SumList(l):
+    return reduce(lambda x,y: x+y, l)
+#end
+
+# returns (total, mean, stddev, q2 (median),
+#          (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
+class StatList:
+    def __init__(self,l,desc):
+        cnt = len(l)
+        assert(cnt > 1)
+        l.sort()
+        self.cnt    = cnt
+        self.l      = l
+        self.total  = SumList(l)
+        self.mean   = self.total / float(self.cnt)
+        self.s      = math.sqrt(SumList([(x-self.mean) * 
+                                         (x - self.mean) for x in l]) / 
+                                float(self.cnt-1))
+        self.q0     = l[0]
+        self.q1     = l[int(self.cnt/4.0+0.5)]
+        self.q2     = l[int(self.cnt/2.0+0.5)]
+        self.q3     = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
+        self.q4     = l[self.cnt-1]
+        self.siqr   = (self.q3-self.q1)/2.0;
+        self.spread = (self.q4-self.q0)
+        if len(l) == 1:
+            self.str = '%s %s' % (desc, l[0])
+        else:
+            self.str = '%s mean %.1f: 25%-ile %d %d %d %d %d' % \
+                (desc, self.mean, self.q0, self.q1, self.q2, self.q3, self.q4)
+    #end
+#end
+
+def RunCommand(args, ok = [0]):
+    #print 'run command %s' % (' '.join(args))
+    p = os.spawnvp(os.P_WAIT, args[0], args)
+    if p not in ok:
+        raise CommandError(args, 'exited %d' % p)
+    #end
+#end
+
+def RunCommandIO(args,infn,outfn):
+    p = os.fork()
+    if p == 0:
+        os.dup2(os.open(infn,os.O_RDONLY),0)
+        os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
+        os.execvp(args[0], args)
+    else:
+        s = os.waitpid(p,0)
+        o = os.WEXITSTATUS(s[1])
+        if not os.WIFEXITED(s[1]) or o != 0:
+            raise CommandError(args, 'exited %d' % o)
+        #end
+    #end
+#end
+
+class TimedTest:
+    def __init__(self, target, source, runnable,
+                 skip_trials = SKIP_TRIALS,
+                 min_trials = MIN_TRIALS,
+                 max_trials = MAX_TRIALS,
+                 min_stddev_pct = MIN_STDDEV_PCT):
+        self.target = target
+        self.source = source
+        self.runnable = runnable
+
+        self.skip_trials = skip_trials
+        self.min_trials = min(min_trials, max_trials)
+        self.max_trials = max_trials
+        self.min_stddev_pct = min_stddev_pct
+
+        self.encode_time = self.DoTest(DFILE,
+                                       lambda x: x.Encode(self.target, 
+                                                          self.source, DFILE))
+        self.encode_size = runnable.EncodeSize(DFILE)
+
+        self.decode_time = self.DoTest(RFILE,
+                                       lambda x: x.Decode(DFILE, 
+                                                          self.source, RFILE),
+                                       )
+        runnable.Verify(self.target, RFILE)
+    #end
+
+    def DoTest(self, fname, func):
+        trials   = 0
+        measured = []
+
+        while 1:
+            try:
+                os.remove(fname)
+            except OSError:
+                pass
+
+            start_time  = time.time()
+            start_clock = time.clock()
+
+            func(self.runnable)
+
+            total_clock = (time.clock() - start_clock)
+            total_time  = (time.time() - start_time)
+
+            elap_time  = max(total_time,  0.0000001)
+            elap_clock = max(total_clock, 0.0000001)
+
+            trials = trials + 1
+
+            # skip some of the first trials
+            if trials > self.skip_trials:
+                measured.append((elap_clock, elap_time))
+                #print 'measurement total: %.1f ms' % (total_time * 1000.0)
+
+            # at least so many
+            if trials < (self.skip_trials + self.min_trials):
+                #print 'continue: need more trials: %d' % trials
+                continue
+
+            # compute %variance
+            done = 0
+            if self.skip_trials + self.min_trials <= 2:
+                measured = measured + measured;
+                done = 1
+            #end
+
+            time_stat = StatList([x[1] for x in measured], 'elap time')
+            sp = float(time_stat.s) / float(time_stat.mean)
+
+            # what if MAX_TRIALS is exceeded?
+            too_many = (trials - self.skip_trials) >= self.max_trials
+            good = (100.0 * sp) < self.min_stddev_pct
+            if done or too_many or good:
+                trials = trials - self.skip_trials
+                if not done and not good:
+                    #print 'too many trials: %d' % trials
+                    pass
+                #clock = StatList([x[0] for x in measured], 'elap clock')
+                return time_stat
+            #end
+        #end
+    #end
+#end
+
+def Decimals(start, end):
+    l = []
+    step = start
+    while 1:
+        r = range(step, step * 10, step)
+        l = l + r
+        if step * 10 >= end:
+            l.append(step * 10)
+            break
+        step = step * 10
+    return l
+#end
+
+# This tests the raw speed of 0-byte inputs
+def RunSpeedTest():
+    for L in Decimals(MIN_RUN, MAX_RUN):
+        SetFileSize(RUNFILE, L)
+
+        trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
+        ReportSpeed(L, trx, '1MB ')
+
+        trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
+        ReportSpeed(L, trx, '512k')
+
+        trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
+        ReportSpeed(L, trx, '256k')
+
+        trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
+        ReportSpeed(L, trm, 'swig')
+
+        trg = TimedTest(RUNFILE, None, GzipRun1())
+        ReportSpeed(L,trg,'gzip')
+    #end
+#end
+
+def SetFileSize(F,L):
+    fd = os.open(F, os.O_CREAT | os.O_WRONLY)
+    os.ftruncate(fd,L)
+    assert os.fstat(fd).st_size == L
+    os.close(fd)
+#end
+
+def ReportSpeed(L,tr,desc):
+    print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
+          (desc, L,
+           tr.encode_size,
+           tr.encode_time.mean * 1000.0,
+           tr.decode_time.mean * 1000.0)
+#end
+
+class Xdelta3RunClass:
+    def __init__(self, extra):
+        self.extra = extra
+    #end
+
+    def __str__(self):
+        return ' '.join(self.extra)
+    #end
+
+    def New(self):
+        return Xdelta3Runner(self.extra)
+    #end
+#end
+
+class Xdelta3Runner:
+    # Use "forkexec" to get special command-line only features like
+    # external compression support.
+    def __init__(self, extra, forkexec=False):
+        self.forkexec = forkexec
+        self.extra = extra
+    #end
+
+    def Encode(self, target, source, output):
+        args = (ALL_ARGS +
+                self.extra +
+                ['-e'])
+        if source:
+            args.append('-s')
+            args.append(source)
+        #end
+        args = args + [target, output]
+        self.Main(args)
+    #end
+
+    def Decode(self, input, source, output):
+        args = (ALL_ARGS +
+                ['-d'])
+        if source:
+            args.append('-s')
+            args.append(source)
+        #end
+        args = args + [input, output]
+        self.Main(args)
+    #end
+
+    def Verify(self, target, recon):
+        if target[-3:] == ".gz":
+            RunCommandIO(('gzip', '-dc'), target, CMPTMP1)
+            RunCommandIO(('gzip', '-dc'), recon, CMPTMP2)
+            RunCommand(('cmp', CMPTMP1, CMPTMP2))
+        else:
+            RunCommand(('cmp', target, recon))
+    #end
+
+    def EncodeSize(self, output):
+        return os.stat(output).st_size
+    #end
+
+    def Main(self, args):
+        try:
+            if self.forkexec:
+                RunCommand(['../xdelta3'] + args)
+            else:
+                xdelta3.xd3_main_cmdline(args)
+        except Exception, e:
+            raise CommandError(args, "xdelta3.main exception: %s" % e)
+        #end
+    #end
+#end
+
+class Xdelta3Mod1:
+    def __init__(self, file):
+        self.target_data = open(file, 'r').read()
+    #end
+
+    def Encode(self, ignore1, ignore2, ignore3):
+        r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
+        if r1 != 0:
+            raise CommandError('memory', 'encode failed: %s' % r1)
+        #end
+        self.encoded = encoded
+    #end
+
+    def Decode(self, ignore1, ignore2, ignore3):
+        r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
+        if r2 != 0:
+            raise CommandError('memory', 'decode failed: %s' % r1)
+        #end
+        self.decoded = data1
+    #end
+
+    def Verify(self, ignore1, ignore2):
+        if self.target_data != self.decoded:
+            raise CommandError('memory', 'bad decode')
+        #end
+    #end
+
+    def EncodeSize(self, ignore1):
+        return len(self.encoded)
+    #end
+#end
+
+class GzipRun1:
+    def Encode(self, target, source, output):
+        assert source == None
+        RunCommandIO(['gzip', '-cf'], target, output)
+    #end
+
+    def Decode(self, input, source, output):
+        assert source == None
+        RunCommandIO(['gzip', '-dcf'], input, output)
+    #end
+
+    def Verify(self, target, recon):
+        RunCommand(('cmp', target, recon))
+    #end
+
+    def EncodeSize(self, output):
+        return os.stat(output).st_size
+    #end
+#end
+
+class Xdelta1RunClass:
+    def __str__(self):
+        return 'xdelta1'
+    #end
+
+    def New(self):
+        return Xdelta1Runner()
+    #end
+#end
+
+class Xdelta1Runner:
+    def Encode(self, target, source, output):
+        assert source != None
+        args = ['xdelta1', 'delta', '-q', source, target, output]
+        RunCommand(args, [0, 1])
+    #end
+
+    def Decode(self, input, source, output):
+        assert source != None
+        args = ['xdelta1', 'patch', '-q', input, source, output]
+        # Note: for dumb historical reasons, xdelta1 returns 1 or 0
+        RunCommand(args)
+    #end
+
+    def Verify(self, target, recon):
+        RunCommand(('cmp', target, recon))
+    #end
+
+    def EncodeSize(self, output):
+        return os.stat(output).st_size
+    #end
+#end
+
+# exceptions
+class SkipRcsException:
+    def __init__(self,reason):
+        self.reason = reason
+    #end
+#end
+
+class NotEnoughVersions:
+    def __init__(self):
+        pass
+    #end
+#end
+
+class CommandError:
+    def __init__(self,cmd,str):
+        if type(cmd) is types.TupleType or \
+           type(cmd) is types.ListType:
+            cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
+        #end
+        print 'command was: ',cmd
+        print 'command failed: ',str
+        print 'have fun debugging'
+    #end
+#end
+
+class RcsVersion:
+    def __init__(self,vstr):
+        self.vstr = vstr
+    #end
+    def __cmp__(self,other):
+        return cmp(self.date, other.date)
+    #end
+    def __str__(self):
+        return str(self.vstr)
+    #end
+#end
+
+class RcsFile:
+
+    def __init__(self, fname):
+        self.fname    = fname
+        self.versions = []
+        self.state    = HEAD_STATE
+    #end
+
+    def SetTotRev(self,s):
+        self.totrev = int(s)
+    #end
+
+    def Rev(self,s):
+        self.rev = RcsVersion(s)
+        if len(self.versions) >= self.totrev:
+            raise SkipRcsException('too many versions (in log messages)')
+        #end
+        self.versions.append(self.rev)
+    #end
+
+    def Date(self,s):
+        self.rev.date = s
+    #end
+
+    def Match(self, line, state, rx, gp, newstate, f):
+        if state == self.state:
+            m = rx.match(line)
+            if m:
+                if f:
+                    f(m.group(gp))
+                #end
+                self.state = newstate
+                return 1
+            #end
+        #end
+        return None
+    #end
+
+    def Sum1Rlog(self):
+        f = os.popen('rlog '+self.fname, "r")
+        l = f.readline()
+        while l:
+            if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
+                pass
+            elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
+                pass
+            elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
+                pass
+            elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
+                pass
+            #end
+            l = f.readline()
+        #end
+        c = f.close()
+        if c != None:
+            raise c
+        #end
+    #end
+
+    def Sum1(self):
+        st = os.stat(self.fname)
+        self.rcssize = st.st_size
+        self.Sum1Rlog()
+        if self.totrev != len(self.versions):
+            raise SkipRcsException('wrong version count')
+        #end
+        self.versions.sort()
+    #end
+
+    def Checkout(self,n):
+        v      = self.versions[n]
+        out    = open(self.Verf(n), "w")
+        cmd    = 'co -ko -p%s %s' % (v.vstr, self.fname)
+        total  = 0
+        (inf,
+         stream,
+         err)  = os.popen3(cmd, "r")
+        inf.close()
+        buf    = stream.read()
+        while buf:
+            total = total + len(buf)
+            out.write(buf)
+            buf = stream.read()
+        #end
+        v.vsize = total
+        estr = ''
+        buf = err.read()
+        while buf:
+            estr = estr + buf
+            buf = err.read()
+        #end
+        if stream.close():
+            raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
+        #end
+        out.close()
+        err.close()
+    #end
+
+    def Vdate(self,n):
+        return self.versions[n].date
+    #end
+
+    def Vstr(self,n):
+        return self.versions[n].vstr
+    #end
+
+    def Verf(self,n):
+        return os.path.join(TMPDIR, 'input.%d' % n)
+    #end
+
+    def FilePairsByDate(self, runclass):
+        if self.totrev < 2:
+            raise NotEnoughVersions()
+        #end
+        self.Checkout(0)
+        ntrials = []
+        if self.totrev < 2:
+            return vtrials
+        #end
+        for v in range(0,self.totrev-1):
+            if v > 1:
+                os.remove(self.Verf(v-1))
+            #end
+            self.Checkout(v+1)
+            if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
+               os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
+                continue
+            #end
+
+            result = TimedTest(self.Verf(v+1),
+                               self.Verf(v),
+                               runclass.New())
+
+            target_size = os.stat(self.Verf(v+1)).st_size
+
+            ntrials.append(result)
+        #end
+
+        os.remove(self.Verf(self.totrev-1))
+        os.remove(self.Verf(self.totrev-2))
+        return ntrials
+    #end
+
+    def AppendVersion(self, f, n):
+        self.Checkout(n)
+        rf = open(self.Verf(n), "r")
+        data = rf.read()
+        f.write(data)
+        rf.close()
+        return len(data)
+    #end
+
+class RcsFinder:
+    def __init__(self):
+        self.subdirs  = []
+        self.rcsfiles = []
+        self.others   = []
+        self.skipped  = []
+        self.biground = 0
+    #end
+
+    def Scan1(self,dir):
+        dents = os.listdir(dir)
+        subdirs  = []
+        rcsfiles = []
+        others   = []
+        for dent in dents:
+            full = os.path.join(dir, dent)
+            if os.path.isdir(full):
+                subdirs.append(full)
+            elif dent[len(dent)-2:] == ",v":
+                rcsfiles.append(RcsFile(full))
+            else:
+                others.append(full)
+            #end
+        #end
+        self.subdirs  = self.subdirs  + subdirs
+        self.rcsfiles = self.rcsfiles + rcsfiles
+        self.others   = self.others   + others
+        return subdirs
+    #end
+
+    def Crawl(self, dir):
+        subdirs = [dir]
+        while subdirs:
+            s1 = self.Scan1(subdirs[0])
+            subdirs = subdirs[1:] + s1
+        #end
+    #end
+
+    def Summarize(self):
+        good = []
+        for rf in self.rcsfiles:
+            try:
+                rf.Sum1()
+                if rf.totrev < 2:
+                    raise SkipRcsException('too few versions (< 2)')
+                #end
+            except SkipRcsException, e:
+                #print 'skipping file %s: %s' % (rf.fname, e.reason)
+                self.skipped.append(rf)
+            else:
+                good.append(rf)
+            #end
+        self.rcsfiles = good
+    #end
+
+    def AllPairsByDate(self, runclass):
+        results = []
+        good = []
+        for rf in self.rcsfiles:
+            try:
+                results = results + rf.FilePairsByDate(runclass)
+            except SkipRcsException:
+                print 'file %s has compressed versions: skipping' % (rf.fname)
+            except NotEnoughVersions:
+                print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
+            else:
+                good.append(rf)
+            #end
+        self.rcsfiles = good
+        self.ReportPairs(runclass, results)
+        return results
+    #end
+
+    def ReportPairs(self, name, results):
+        encode_time = 0
+        decode_time = 0
+        encode_size = 0
+        for r in results:
+            encode_time += r.encode_time.mean
+            decode_time += r.decode_time.mean
+            encode_size += r.encode_size
+        #end
+        print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
+              (name, encode_time, decode_time, encode_size)
+    #end
+
+    def MakeBigFiles(self, rand):
+        f1 = open(TMPDIR + "/big.1", "w")
+        f2 = open(TMPDIR + "/big.2", "w")
+        population = []
+        for file in self.rcsfiles:
+            if len(file.versions) < 2:
+                continue
+            population.append(file)
+        #end
+        f1sz = 0
+        f2sz = 0
+        fcount = int(len(population) * FILE_P)
+        assert fcount > 0
+        for file in rand.sample(population, fcount):
+            m = IGNORE_FILENAME.match(file.fname)
+            if m != None:
+                continue
+            #end
+            r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
+            f1sz += file.AppendVersion(f1, r1)
+            f2sz += file.AppendVersion(f2, r2)
+            #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], 
+            #file.Vstr(r1), file.Vstr(r2)))
+        #end
+        testkey = 'rcs%d' % self.biground
+        self.biground = self.biground + 1
+
+        print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz)
+        f1.close()
+        f2.close()
+        return (TMPDIR + "/big.1",
+                TMPDIR + "/big.2",
+                testkey)
+    #end
+
+    def Generator(self):
+        return lambda rand: self.MakeBigFiles(rand)
+    #end
+#end
+
+# find a set of RCS files for testing
+def GetTestRcsFiles():
+    rcsf = RcsFinder()
+    rcsf.Crawl(RCSDIR)
+    if len(rcsf.rcsfiles) == 0:
+        raise CommandError('', 'no RCS files')
+    #end
+    rcsf.Summarize()
+    print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (
+        len(rcsf.rcsfiles),
+        len(rcsf.subdirs),
+        len(rcsf.others),
+        len(rcsf.skipped))
+    print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str
+    print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str
+    return rcsf
+#end
+
+class SampleDataTest:
+    def __init__(self, dirs):
+        dirs_in = dirs
+        self.pairs = []
+        while dirs:
+            d = dirs[0]
+            dirs = dirs[1:]
+            l = os.listdir(d)
+            files = []
+            for e in l:
+                p = os.path.join(d, e)
+                if os.path.isdir(p):
+                    dirs.append(p)
+                else:
+                    files.append(p)
+                #end
+            #end
+            if len(files) > 1:
+                files.sort()
+                for x in xrange(len(files)):
+                    for y in xrange(len(files)):
+                        self.pairs.append((files[x], files[y],
+                                           '%s-%s' % (files[x], files[y])))
+                    #end
+                #end
+            #end
+        #end
+        print "Sample data test using %d file pairs in %s" % (
+            len(self.pairs), dirs_in)
+    #end
+
+    def Generator(self):
+        return lambda rand: rand.choice(self.pairs)
+    #end
+#end
+
+# configs are represented as a list of values,
+# program takes a list of strings:
+def ConfigToArgs(config):
+    args = [ '-C',
+             ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])]
+    for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
+        key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
+        val = config[i]
+        if val == 'true' or val == 'false':
+            if val == 'true':
+                args.append('%s' % key)
+            #end
+        else:
+            args.append('%s=%s' % (key, val))
+        #end
+    #end
+    return args
+#end
+
+#
+class RandomTest:
+    def __init__(self, tnum, tinput, config, syntuple = None):
+        self.mytinput = tinput[2]
+        self.myconfig = config
+        self.tnum = tnum
+
+        if syntuple != None:
+            self.runtime = syntuple[0]
+            self.compsize = syntuple[1]
+            self.decodetime = None
+        else:
+            args = ConfigToArgs(config)
+            result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
+
+            self.runtime = result.encode_time.mean
+            self.compsize = result.encode_size
+            self.decodetime = result.decode_time.mean
+        #end
+
+        self.score = None
+        self.time_pos = None
+        self.size_pos = None
+        self.score_pos = None
+    #end
+
+    def __str__(self):
+        decodestr = ' %s' % self.decodetime
+        return 'time %.6f%s size %d%s << %s >>%s' % (
+            self.time(), ((self.time_pos != None) and 
+                          (" (%s)" % self.time_pos) or ""),
+            self.size(), ((self.size_pos != None) and 
+                          (" (%s)" % self.size_pos) or ""),
+            c2str(self.config()),
+            decodestr)
+    #end
+
+    def time(self):
+        return self.runtime
+    #end
+
+    def size(self):
+        return self.compsize
+    #end
+
+    def config(self):
+        return self.myconfig
+    #end
+
+    def score(self):
+        return self.score
+    #end
+
+    def tinput(self):
+        return self.mytinput
+    #end
+#end
+
+def PosInAlist(l, e):
+    for i in range(0, len(l)):
+        if l[i][1] == e:
+            return i;
+        #end
+    #end
+    return -1
+#end
+
+# Generates a set of num_results test configurations, given the list of
+# retest-configs.
+def RandomTestConfigs(rand, input_configs, num_results):
+
+    outputs = input_configs[:]
+    have_set = dict([(c,c) for c in input_configs])
+
+    # Compute a random configuration
+    def RandomConfig():
+        config = []
+        cmap = {}
+        for key in CONFIG_ORDER:
+            val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
+            config.append(val)
+        #end
+        return tuple(config)
+    #end
+
+    while len(outputs) < num_results:
+        newc = None
+        for i in xrange(100):
+            c = RandomConfig()
+            if have_set.has_key(c):
+                continue
+            #end
+            have_set[c] = c
+            newc = c
+            break
+        if newc is None:
+            print 'stopped looking for configs at %d' % len(outputs)
+            break
+        #end
+        outputs.append(c)
+    #end
+    outputs.sort()
+    return outputs
+#end
+
+def RunOptimizationLoop(rand, generator, rounds):
+    configs = []
+    for rnum in xrange(rounds):
+        configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
+        tinput = generator(rand)
+        tests = []
+        for x in xrange(len(configs)):
+            t = RandomTest(x, tinput, configs[x])
+            print 'Round %d test %d: %s' % (rnum, x, t)
+            tests.append(t)
+        #end
+        results = ScoreTests(tests)
+
+        for r in results:
+            c = r.config()
+            if not test_all_config_results.has_key(c):
+                test_all_config_results[c] = [r]
+            else:
+                test_all_config_results[c].append(r)
+            #end
+        #end
+
+        #GraphResults('expt%d' % rnum, results)
+        #GraphSummary('sum%d' % rnum, results)
+
+        # re-test some fraction
+        configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]]
+    #end
+#end
+
+# TODO: cleanup
+test_all_config_results = {}
+
+def ScoreTests(results):
+    scored = []
+    timed = []
+    sized = []
+
+    t_min = float(min([test.time() for test in results]))
+    #t_max = float(max([test.time() for test in results]))
+    s_min = float(min([test.size() for test in results]))
+    #s_max = float(max([test.size() for test in results]))
+
+    for test in results:
+
+        # Hyperbolic function. Smaller scores still better
+        red = 0.999  # minimum factors for each dimension are 1/1000
+        test.score = ((test.size() - s_min * red) *
+                      (test.time() - t_min * red))
+
+        scored.append((test.score, test))
+        timed.append((test.time(), test))
+        sized.append((test.size(), test))
+    #end
+
+    scored.sort()
+    timed.sort()
+    sized.sort()
+
+    best_by_size = []
+    best_by_time = []
+
+    pos = 0
+    for (score, test) in scored:
+        pos += 1
+        test.score_pos = pos
+    #end
+
+    scored = [x[1] for x in scored]
+
+    for test in scored:
+        test.size_pos = PosInAlist(sized, test)
+        test.time_pos = PosInAlist(timed, test)
+    #end
+
+    for test in scored:
+        c = test.config()
+        s = 0.0
+        print 'H-Score: %0.9f %s' % (test.score, test)
+    #end
+
+    return scored
+#end
+
+def GraphResults(desc, results):
+    f = open("data-%s.csv" % desc, "w")
+    for r in results:
+        f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
+    #end
+    f.close()
+    os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
+#end
+
+def GraphSummary(desc, results_ignore):
+    test_population = 0
+    config_ordered = []
+
+    # drops duplicate test/config pairs (TODO: don't retest them)
+    for config, cresults in test_all_config_results.items():
+        input_config_map = {}
+        uniq = []
+        for test in cresults:
+            assert test.config() == config
+            test_population += 1
+            key = test.tinput()
+            if not input_config_map.has_key(key):
+                input_config_map[key] = {}
+            #end
+            if input_config_map[key].has_key(config):
+                print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
+                continue
+            #end
+            input_config_map[key][config] = test
+            uniq.append(test)
+        #end
+        config_ordered.append(uniq)
+    #end
+
+    # sort configs descending by number of tests
+    config_ordered.sort(lambda x, y: len(y) - len(x))
+
+    print 'population %d: %d configs %d results' % \
+          (test_population,
+           len(config_ordered),
+           len(config_ordered[0]))
+
+    if config_ordered[0] == 1:
+        return
+    #end
+
+    # a map from test-key to test-list w/ various configs
+    input_set = {}
+    osize = len(config_ordered)
+
+    for i in xrange(len(config_ordered)):
+        config = config_ordered[i][0].config()
+        config_tests = config_ordered[i]
+
+        #print '%s has %d tested inputs' % (config, len(config_tests))
+
+        if len(input_set) == 0:
+            input_set = dict([(t.tinput(), [t]) for t in config_tests])
+            continue
+        #end
+
+        # a map from test-key to test-list w/ various configs
+        update_set = {}
+        for r in config_tests:
+            t = r.tinput()
+            if input_set.has_key(t):
+                update_set[t] = input_set[t] + [r]
+            else:
+                #print 'config %s does not have test %s' % (config, t)
+                pass
+            #end
+        #end
+
+        if len(update_set) <= 1:
+            break
+        #end
+
+        input_set = update_set
+
+        # continue if there are more w/ the same number of inputs
+        if i < (len(config_ordered) - 1) and \
+           len(config_ordered[i + 1]) == len(config_tests):
+            continue
+        #end
+
+        # synthesize results for multi-test inputs
+        config_num = None
+
+        # map of config to sum(various test-keys)
+        smap = {}
+        for (key, tests) in input_set.items():
+            if config_num == None:
+                # config_num should be the same in all elements
+                config_num = len(tests)
+                smap = dict([(r.config(),
+                              (r.time(),
+                               r.size()))
+                             for r in tests])
+            else:
+                # compuate the per-config sum of time/size
+                assert config_num == len(tests)
+                smap = dict([(r.config(),
+                              (smap[r.config()][0] + r.time(),
+                               smap[r.config()][1] + r.size()))
+                             for r in tests])
+            #end
+        #end
+
+        if config_num == 1:
+            continue
+        #end
+
+        if len(input_set) == osize:
+            break
+        #end
+
+        summary = '%s-%d' % (desc, len(input_set))
+        osize = len(input_set)
+
+        print 'generate %s w/ %d configs' % (summary, config_num)
+        syn = [RandomTest(0, (None, None, summary), config,
+                          syntuple = (smap[config][0], smap[config][1]))
+               for config in smap.keys()]
+        syn = ScoreTests(syn)
+        #print 'smap is %s' % (smap,)
+        #print 'syn is %s' % (' and '.join([str(x) for x in syn]))
+        #GraphResults(summary, syn)
+    #end
+#end
+
+def RunRegressionTest(pairs, rounds):
+    for args in [
+        [],
+        ['-S=djw'],
+        ['-B=412907520'],
+        ['-B 412907520', ],
+
+                 ]:
+        print "Args %s" % (args)
+        for (file1, file2, testkey) in pairs:
+            ttest = TimedTest(file1, file2, Xdelta3Runner(args, forkexec=True),
+                              skip_trials = 0,
+                              min_trials = 1,
+                              max_trials = 1)
+            print "Source %s\nTarget %s\nEncode %s\nDecode %s\nSize %s\n\n" % (
+                file1, file2,
+                ttest.encode_time.str,
+                ttest.decode_time.str,
+                ttest.encode_size)
+    #end
+#end
+
+if __name__ == "__main__":
+    try:
+        RunCommand(['rm', '-rf', TMPDIR])
+        os.mkdir(TMPDIR)
+
+        #rcsf = GetTestRcsFiles()
+        #generator = rcsf.Generator()
+
+        sample = SampleDataTest([SAMPLEDIR])
+        generator = sample.Generator()
+
+        rand = random.Random(135135135135135)
+
+        RunRegressionTest(sample.pairs, TEST_ROUNDS)
+
+        #RunSpeedTest()
+
+        # the idea below is to add the default configurations and
+        # xdelta1 to the optimization loop:
+        #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-3', '-6']))
+        #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
+        #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
+        #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-S', 'djw']))
+        #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T']))
+        #x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
+
+    except CommandError:
+        pass
+    else:
+        RunCommand(['rm', '-rf', TMPDIR])
+        pass
+    #end
+#end
diff --git a/third-party/xdelta3/xdelta3/testing/xdelta3-test.py b/third-party/xdelta3/xdelta3/testing/xdelta3-test.py
new file mode 100644
index 0000000000..468db24fcf
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/testing/xdelta3-test.py
@@ -0,0 +1,153 @@
+#!/usr/bin/python2.7
+# xdelta3 - delta compression tools and library -*- Mode: C++ -*-
+# Copyright 2016 Joshua MacDonald
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import xdelta3
+
+# the test data section is expected to be len('target')
+source = 'source source input0 source source'
+target = 'source source target source source'
+
+#
+#
+
+print 'encode: basic ...'
+result, patch = xdelta3.xd3_encode_memory(target, source, 50)
+
+assert result == 0
+assert len(patch) < len(source)
+
+print 'encode: adler32 ...'
+result, patch_adler32 = xdelta3.xd3_encode_memory(target, source, 50,
+                                                  xdelta3.XD3_ADLER32)
+
+assert result == 0
+assert len(patch_adler32) < len(source)
+assert len(patch_adler32) > len(patch)
+
+print 'encode: secondary ...'
+result, patch_djw = xdelta3.xd3_encode_memory(target, source, 50,
+                                              xdelta3.XD3_SEC_DJW)
+
+assert result == 0
+# secondary compression doesn't help
+assert len(patch_djw) > len(patch)
+
+print 'encode: exact ...'
+result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch))
+
+assert result == 0
+assert len(ignore) < len(source)
+
+print 'encode: out of space ...'
+result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch) - 1)
+
+assert result == 28
+assert ignore == None
+
+print 'encode: zero space ...'
+result, ignore = xdelta3.xd3_encode_memory(target, source, 0)
+
+assert result == 28
+assert ignore == None
+
+print 'encode: no source ...'
+result, zdata = xdelta3.xd3_encode_memory(target, None, 50)
+
+assert result == 0
+assert len(zdata) > len(patch)
+
+print 'encode: no input ...'
+result, ignore = xdelta3.xd3_encode_memory(None, None, 50)
+
+assert result != 0
+
+print 'decode: basic ...'
+result, target1 = xdelta3.xd3_decode_memory(patch, source, len(target))
+
+assert result == 0
+assert len(target1) == len(target)
+assert target1 == target
+
+print 'decode: out of space ...'
+result, ignore = xdelta3.xd3_decode_memory(patch, source, len(target) - 1)
+
+assert result == 28
+assert ignore == None
+
+print 'decode: zero space ...'
+result, ignore = xdelta3.xd3_decode_memory(patch, source, 0)
+
+assert result == 28
+assert ignore == None
+
+print 'decode: single byte error ...'
+# a few expected single-byte errors, e.g., unused address cache bits, see
+# xdelta3-test.h's single-bit error tests
+extra_count = 4
+noverify_count = 0
+for corrupt_pos in range(len(patch_adler32)):
+    input = ''.join([j == corrupt_pos and '\xff' or patch_adler32[j]
+                     for j in range(len(patch_adler32))])
+
+    result, ignore = xdelta3.xd3_decode_memory(input, source, len(target), 0)
+    assert result == -17712
+    assert ignore == None
+
+    # without adler32 verification, the error may be in the data section which
+    # in this case is 6 bytes 'target'
+    result, corrupt = xdelta3.xd3_decode_memory(input, source, len(target),
+                                                xdelta3.XD3_ADLER32_NOVER)
+    if result == 0:
+        noverify_count = noverify_count + 1
+        #print "got %s" % corrupt
+    #end
+#end
+assert noverify_count == len('target') + extra_count
+
+print 'decode: no source ...'
+result, target2 = xdelta3.xd3_decode_memory(zdata, None, len(target))
+
+assert result == 0
+assert target == target2
+
+# Test compression level setting via flags.  assumes a 9 byte checksum
+# and that level 9 steps 2, level 1 steps 15:
+#         01234567890123456789012345678901
+# level 1 only indexes 2 checksums "abcdefghi" and "ABCDEFGHI"
+# outputs 43 vs. 23 bytes
+print 'encode: compression level ...'
+
+source = '_la_la_abcdefghi_la_la_ABCDEFGHI'
+target = 'la_la_ABCDEFGH__la_la_abcdefgh__'
+
+result1, level1 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_1)
+result9, level9 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_9)
+
+assert result1 == 0 and result9 == 0
+assert len(level1) > len(level9)
+
+#
+# Issue 65
+print 'encode: 65 ...'
+source = 'Hello World' 
+target = 'Hello everyone' 
+result, patch = xdelta3.xd3_encode_memory(target, source, len(target))
+assert result != 0
+
+result, patch = xdelta3.xd3_encode_memory(target, source, 2 * len(target))
+assert result == 0
+
+print 'PASS'
diff --git a/third-party/xdelta3/xdelta3/xdelta3-blkcache.h b/third-party/xdelta3/xdelta3/xdelta3-blkcache.h
new file mode 100644
index 0000000000..f7b1d59a66
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-blkcache.h
@@ -0,0 +1,557 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#include "xdelta3-internal.h"
+
+typedef struct _main_blklru      main_blklru;
+typedef struct _main_blklru_list main_blklru_list;
+
+
+#define XD3_INVALID_OFFSET XOFF_T_MAX
+
+struct _main_blklru_list
+{
+  main_blklru_list  *next;
+  main_blklru_list  *prev;
+};
+
+struct _main_blklru
+{
+  uint8_t          *blk;
+  xoff_t            blkno;
+  usize_t           size;
+  main_blklru_list  link;
+};
+
+XD3_MAKELIST(main_blklru_list,main_blklru,link);
+
+static usize_t           lru_size = 0;
+static main_blklru      *lru = NULL;  /* array of lru_size elts */
+static main_blklru_list  lru_list;
+static int               do_src_fifo = 0;  /* set to avoid lru */
+
+static int lru_hits   = 0;
+static int lru_misses = 0;
+static int lru_filled = 0;
+
+static void main_lru_reset (void)
+{
+  lru_size = 0;
+  lru = NULL;
+  do_src_fifo = 0;
+  lru_hits   = 0;
+  lru_misses = 0;
+  lru_filled = 0;
+}
+
+static void main_lru_cleanup (void)
+{
+  if (lru != NULL)
+    {
+      main_buffree (lru[0].blk);
+    }
+
+  main_free (lru);
+  lru = NULL;
+
+  lru_hits = 0;
+  lru_misses = 0;
+  lru_filled = 0;
+}
+
+/* This is called at different times for encoding and decoding.  The
+ * encoder calls it immediately, the decoder delays until the
+ * application header is received.  */
+static int
+main_set_source (xd3_stream *stream, xd3_cmd cmd,
+		 main_file *sfile, xd3_source *source)
+{
+  int ret = 0;
+  usize_t i;
+  xoff_t source_size = 0;
+  usize_t blksize;
+
+  XD3_ASSERT (lru == NULL);
+  XD3_ASSERT (stream->src == NULL);
+  XD3_ASSERT (option_srcwinsz >= XD3_MINSRCWINSZ);
+
+  /* TODO: this code needs refactoring into FIFO, LRU, FAKE.  Yuck!
+   * This is simplified from 3.0z which had issues with sizing the
+   * source buffer memory allocation and the source blocksize. */
+
+  /* LRU-specific */
+  main_blklru_list_init (& lru_list);
+
+  if (allow_fake_source)
+    {
+      /* TODO: refactor
+       * TOOLS/recode-specific: Check "allow_fake_source" mode looks
+       * broken now. */
+      sfile->mode = XO_READ;
+      sfile->realname = sfile->filename;
+      sfile->nread = 0;
+    }
+  else
+    {
+      /* Either a regular file (possibly compressed) or a FIFO
+       * (possibly compressed). */
+      if ((ret = main_file_open (sfile, sfile->filename, XO_READ)))
+	{
+	  return ret;
+	}
+
+      /* If the file is regular we know it's size.  If the file turns
+       * out to be externally compressed, size_known may change. */
+      sfile->size_known = (main_file_stat (sfile, &source_size) == 0);
+    }
+
+  /* Note: The API requires a power-of-two blocksize and srcwinsz
+   * (-B).  The logic here will use a single block if the entire file
+   * is known to fit into srcwinsz. */
+  option_srcwinsz = xd3_xoff_roundup (option_srcwinsz);
+
+  /* Though called "lru", it is not LRU-specific.  We always allocate
+   * a maximum number of source block buffers.  If the entire file
+   * fits into srcwinsz, this buffer will stay as the only
+   * (lru_size==1) source block.  Otherwise, we know that at least
+   * option_srcwinsz bytes are available.  Split the source window
+   * into buffers. */
+  if ((lru = (main_blklru*) main_malloc (MAX_LRU_SIZE *
+					 sizeof (main_blklru))) == NULL)
+    {
+      ret = ENOMEM;
+      return ret;
+    }
+
+  memset (lru, 0, sizeof(lru[0]) * MAX_LRU_SIZE);
+
+  /* Allocate the entire buffer. */
+  if ((lru[0].blk = (uint8_t*) main_bufalloc (option_srcwinsz)) == NULL)
+    {
+      ret = ENOMEM;
+      return ret;
+    }
+
+  /* Main calls main_getblk_func() once before xd3_set_source().  This
+   * is the point at which external decompression may begin.  Set the
+   * system for a single block. */
+  lru_size = 1;
+  lru[0].blkno = XD3_INVALID_OFFSET;
+  blksize = option_srcwinsz;
+  main_blklru_list_push_back (& lru_list, & lru[0]);
+  XD3_ASSERT (blksize != 0);
+
+  /* Initialize xd3_source. */
+  source->blksize  = blksize;
+  source->name     = sfile->filename;
+  source->ioh      = sfile;
+  source->curblkno = XD3_INVALID_OFFSET;
+  source->curblk   = NULL;
+  source->max_winsize = option_srcwinsz;
+
+  if ((ret = main_getblk_func (stream, source, 0)) != 0)
+    {
+      XPR(NT "error reading source: %s: %s\n",
+	  sfile->filename,
+	  xd3_mainerror (ret));
+      return ret;
+    }
+
+  source->onblk = lru[0].size;  /* xd3 sets onblk */
+
+  /* If the file is smaller than a block, size is known. */
+  if (!sfile->size_known && source->onblk < blksize)
+    {
+      source_size = source->onblk;
+      source->onlastblk = source_size;
+      sfile->size_known = 1;
+    }
+
+  /* If the size is not known or is greater than the buffer size, we
+   * split the buffer across MAX_LRU_SIZE blocks (already allocated in
+   * "lru"). */
+  if (!sfile->size_known || source_size > option_srcwinsz)
+    {
+      /* Modify block 0, change blocksize. */
+      blksize = option_srcwinsz / MAX_LRU_SIZE;
+      source->blksize = blksize;
+      source->onblk = blksize;
+      source->onlastblk = blksize;
+      source->max_blkno = MAX_LRU_SIZE - 1;
+
+      lru[0].size = blksize;
+      lru_size = MAX_LRU_SIZE;
+
+      /* Setup rest of blocks. */
+      for (i = 1; i < lru_size; i += 1)
+	{
+	  lru[i].blk = lru[0].blk + (blksize * i);
+	  lru[i].blkno = i;
+	  lru[i].size = blksize;
+	  main_blklru_list_push_back (& lru_list, & lru[i]);
+	}
+    }
+
+  if (! sfile->size_known)
+    {
+      /* If the size is not know, we must use FIFO discipline. */
+      do_src_fifo = 1;
+    }
+
+  /* Call the appropriate set_source method, handle errors, print
+   * verbose message, etc. */
+  if (sfile->size_known)
+    {
+      ret = xd3_set_source_and_size (stream, source, source_size);
+    }
+  else
+    {
+      ret = xd3_set_source (stream, source);
+    }
+
+  if (ret)
+    {
+      XPR(NT XD3_LIB_ERRMSG (stream, ret));
+      return ret;
+    }
+
+  XD3_ASSERT (stream->src == source);
+  XD3_ASSERT (source->blksize == blksize);
+
+  if (option_verbose)
+    {
+      static shortbuf srcszbuf;
+      static shortbuf srccntbuf;
+      static shortbuf winszbuf;
+      static shortbuf blkszbuf;
+      static shortbuf nbufs;
+
+      if (sfile->size_known)
+	{
+	  short_sprintf (srcszbuf, "source size %s [%"Q"u]",
+			 main_format_bcnt (source_size, &srccntbuf),
+			 source_size);
+	}
+      else
+	{
+	  short_sprintf (srcszbuf, "%s", "source size unknown");
+	}
+
+      nbufs.buf[0] = 0;
+
+      if (option_verbose > 1)
+	{
+	  short_sprintf (nbufs, " #bufs %"W"u", lru_size);
+	}
+
+      XPR(NT "source %s %s blksize %s window %s%s%s\n",
+	  sfile->filename,
+	  srcszbuf.buf,
+	  main_format_bcnt (blksize, &blkszbuf),
+	  main_format_bcnt (option_srcwinsz, &winszbuf),
+	  nbufs.buf,
+	  do_src_fifo ? " (FIFO)" : "");
+    }
+
+  return 0;
+}
+
+static int
+main_getblk_lru (xd3_source *source, xoff_t blkno,
+		 main_blklru** blrup, int *is_new)
+{
+  main_blklru *blru = NULL;
+  usize_t i;
+
+  (*is_new) = 0;
+
+  if (do_src_fifo)
+    {
+      /* Direct lookup assumes sequential scan w/o skipping blocks. */
+      int idx = blkno % lru_size;
+      blru = & lru[idx];
+      if (blru->blkno == blkno)
+	{
+	  (*blrup) = blru;
+	  return 0;
+	}
+      /* No going backwards in a sequential scan. */
+      if (blru->blkno != XD3_INVALID_OFFSET && blru->blkno > blkno)
+	{
+	  return XD3_TOOFARBACK;
+	}
+    }
+  else
+    {
+      /* Sequential search through LRU. */
+      for (i = 0; i < lru_size; i += 1)
+	{
+	  blru = & lru[i];
+	  if (blru->blkno == blkno)
+	    {
+	      main_blklru_list_remove (blru);
+	      main_blklru_list_push_back (& lru_list, blru);
+	      (*blrup) = blru;
+	      IF_DEBUG1 (DP(RINT "[getblk_lru] HIT blkno = %"Q"u lru_size=%"W"u\n",
+		    blkno, lru_size));
+	      return 0;
+	    }
+	}
+      IF_DEBUG1 (DP(RINT "[getblk_lru] MISS blkno = %"Q"u lru_size=%"W"u\n",
+		    blkno, lru_size));
+    }
+
+  if (do_src_fifo)
+    {
+      int idx = blkno % lru_size;
+      blru = & lru[idx];
+    }
+  else
+    {
+      XD3_ASSERT (! main_blklru_list_empty (& lru_list));
+      blru = main_blklru_list_pop_front (& lru_list);
+      main_blklru_list_push_back (& lru_list, blru);
+    }
+
+  lru_filled += 1;
+  (*is_new) = 1;
+  (*blrup) = blru;
+  blru->blkno = XD3_INVALID_OFFSET;
+  return 0;
+}
+
+static int
+main_read_seek_source (xd3_stream *stream,
+		       xd3_source *source,
+		       xoff_t      blkno) {
+  xoff_t pos = blkno * source->blksize;
+  main_file *sfile = (main_file*) source->ioh;
+  main_blklru *blru;
+  int is_new;
+  size_t nread = 0;
+  int ret = 0;
+
+  if (!sfile->seek_failed)
+    {
+      ret = main_file_seek (sfile, pos);
+
+      if (ret == 0)
+	{
+	  sfile->source_position = pos;
+	}
+    }
+
+  if (sfile->seek_failed || ret != 0)
+    {
+      /* For an unseekable file (or other seek error, does it
+       * matter?) */
+      if (sfile->source_position > pos)
+	{
+	  /* Could assert !IS_ENCODE(), this shouldn't happen
+	   * because of do_src_fifo during encode. */
+	  if (!option_quiet)
+	    {
+	      XPR(NT "source can't seek backwards; requested block offset "
+		  "%"Q"u source position is %"Q"u\n",
+		  pos, sfile->source_position);
+	    }
+
+	  sfile->seek_failed = 1;
+	  stream->msg = "non-seekable source: "
+	    "copy is too far back (try raising -B)";
+	  return XD3_TOOFARBACK;
+	}
+
+      /* There's a chance here, that an genuine lseek error will cause
+       * xdelta3 to shift into non-seekable mode, entering a degraded
+       * condition.  */
+      if (!sfile->seek_failed && option_verbose)
+	{
+	  XPR(NT "source can't seek, will use FIFO for %s\n",
+	      sfile->filename);
+
+	  if (option_verbose > 1)
+	    {
+	      XPR(NT "seek error at offset %"Q"u: %s\n",
+		  pos, xd3_mainerror (ret));
+	    }
+	}
+
+      sfile->seek_failed = 1;
+
+      if (option_verbose > 1 && pos != sfile->source_position)
+	{
+	  XPR(NT "non-seekable source skipping %"Q"u bytes @ %"Q"u\n",
+	      pos - sfile->source_position,
+	      sfile->source_position);
+	}
+
+      while (sfile->source_position < pos)
+	{
+	  xoff_t skip_blkno;
+	  usize_t skip_offset;
+
+	  xd3_blksize_div (sfile->source_position, source,
+			   &skip_blkno, &skip_offset);
+
+	  /* Read past unused data */
+	  XD3_ASSERT (pos - sfile->source_position >= source->blksize);
+	  XD3_ASSERT (skip_offset == 0);
+
+	  if ((ret = main_getblk_lru (source, skip_blkno,
+				      & blru, & is_new)))
+	    {
+	      return ret;
+	    }
+
+	  XD3_ASSERT (is_new);
+	  blru->blkno = skip_blkno;
+
+	  if ((ret = main_read_primary_input (sfile,
+					      (uint8_t*) blru->blk,
+					      source->blksize,
+					      & nread)))
+	    {
+	      return ret;
+	    }
+
+	  if (nread != source->blksize)
+	    {
+	      IF_DEBUG1 (DP(RINT "[getblk] short skip block nread = %"Z"u\n",
+			    nread));
+	      stream->msg = "non-seekable input is short";
+	      return XD3_INVALID_INPUT;
+	    }
+
+	  sfile->source_position += nread;
+	  blru->size = nread;
+
+	  IF_DEBUG1 (DP(RINT "[getblk] skip blkno %"Q"u size %"W"u\n",
+			skip_blkno, blru->size));
+
+	  XD3_ASSERT (sfile->source_position <= pos);
+	}
+    }
+
+  return 0;
+}
+
+/* This is the callback for reading a block of source.  This function
+ * is blocking and it implements a small LRU.
+ *
+ * Note that it is possible for main_input() to handle getblk requests
+ * in a non-blocking manner.  If the callback is NULL then the caller
+ * of xd3_*_input() must handle the XD3_GETSRCBLK return value and
+ * fill the source in the same way.  See xd3_getblk for details.  To
+ * see an example of non-blocking getblk, see xdelta-test.h. */
+static int
+main_getblk_func (xd3_stream *stream,
+		  xd3_source *source,
+		  xoff_t      blkno)
+{
+  int ret = 0;
+  xoff_t pos = blkno * source->blksize;
+  main_file *sfile = (main_file*) source->ioh;
+  main_blklru *blru;
+  int is_new;
+  size_t nread = 0;
+
+  if (allow_fake_source)
+    {
+      source->curblkno = blkno;
+      source->onblk    = 0;
+      source->curblk   = lru[0].blk;
+      lru[0].size = 0;
+      return 0;
+    }
+
+  if ((ret = main_getblk_lru (source, blkno, & blru, & is_new)))
+    {
+      return ret;
+    }
+
+  if (!is_new)
+    {
+      source->curblkno = blkno;
+      source->onblk    = blru->size;
+      source->curblk   = blru->blk;
+      lru_hits++;
+      return 0;
+    }
+
+  lru_misses += 1;
+
+  if (pos != sfile->source_position)
+    {
+      /* Only try to seek when the position is wrong.  This means the
+       * decoder will fail when the source buffer is too small, but
+       * only when the input is non-seekable. */
+      if ((ret = main_read_seek_source (stream, source, blkno)))
+	{
+	  return ret;
+	}
+    }
+
+  XD3_ASSERT (sfile->source_position == pos);
+
+  if ((ret = main_read_primary_input (sfile,
+				      (uint8_t*) blru->blk,
+				      source->blksize,
+				      & nread)))
+    {
+      return ret;
+    }
+
+  /* Save the last block read, used to handle non-seekable files. */
+  sfile->source_position = pos + nread;
+
+  if (option_verbose > 3)
+    {
+      if (blru->blkno != XD3_INVALID_OFFSET)
+	{
+	  if (blru->blkno != blkno)
+	    {
+	      XPR(NT "source block %"Q"u read %"Z"u ejects %"Q"u (lru_hits=%u, "
+		  "lru_misses=%u, lru_filled=%u)\n",
+		  blkno, nread, blru->blkno, lru_hits, lru_misses, lru_filled);
+	    }
+	  else
+	    {
+	      XPR(NT "source block %"Q"u read %"Z"u (lru_hits=%u, "
+		  "lru_misses=%u, lru_filled=%u)\n",
+		  blkno, nread, lru_hits, lru_misses, lru_filled);
+	    }
+	}
+      else
+	{
+	  XPR(NT "source block %"Q"u read %"Z"u (lru_hits=%u, lru_misses=%u, "
+	      "lru_filled=%u)\n", blkno, nread, 
+	      lru_hits, lru_misses, lru_filled);
+	}
+    }
+
+  source->curblk   = blru->blk;
+  source->curblkno = blkno;
+  source->onblk    = nread;
+  blru->size       = nread;
+  blru->blkno      = blkno;
+
+  IF_DEBUG1 (DP(RINT "[main_getblk] blkno %"Q"u onblk %"Z"u pos %"Q"u "
+		"srcpos %"Q"u\n",
+		blkno, nread, pos, sfile->source_position));
+
+  return 0;
+}
diff --git a/third-party/xdelta3/xdelta3/xdelta3-cfgs.h b/third-party/xdelta3/xdelta3/xdelta3-cfgs.h
new file mode 100644
index 0000000000..84a2221439
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-cfgs.h
@@ -0,0 +1,171 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+/******************************************************************
+ SOFT string matcher
+ ******************************************************************/
+
+#if XD3_BUILD_SOFT
+
+#define TEMPLATE      soft
+#define LLOOK         stream->smatcher.large_look
+#define LSTEP         stream->smatcher.large_step
+#define SLOOK         stream->smatcher.small_look
+#define SCHAIN        stream->smatcher.small_chain
+#define SLCHAIN       stream->smatcher.small_lchain
+#define MAXLAZY       stream->smatcher.max_lazy
+#define LONGENOUGH    stream->smatcher.long_enough
+
+#define SOFTCFG 1
+#include "xdelta3.c"
+#undef  SOFTCFG
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+#define SOFTCFG 0
+
+/************************************************************
+ FASTEST string matcher
+ **********************************************************/
+#if XD3_BUILD_FASTEST
+#define TEMPLATE      fastest
+#define LLOOK         9
+#define LSTEP         26
+#define SLOOK         4U
+#define SCHAIN        1
+#define SLCHAIN       1
+#define MAXLAZY       6
+#define LONGENOUGH    6
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+/************************************************************
+ FASTER string matcher
+ **********************************************************/
+#if XD3_BUILD_FASTER
+#define TEMPLATE      faster
+#define LLOOK         9
+#define LSTEP         15
+#define SLOOK         4U
+#define SCHAIN        1
+#define SLCHAIN       1
+#define MAXLAZY       18
+#define LONGENOUGH    18
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+/******************************************************
+ FAST string matcher
+ ********************************************************/
+#if XD3_BUILD_FAST
+#define TEMPLATE      fast
+#define LLOOK         9
+#define LSTEP         8
+#define SLOOK         4U
+#define SCHAIN        4
+#define SLCHAIN       1
+#define MAXLAZY       18
+#define LONGENOUGH    35
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+/**************************************************
+ SLOW string matcher
+ **************************************************************/
+#if XD3_BUILD_SLOW
+#define TEMPLATE      slow
+#define LLOOK         9
+#define LSTEP         2
+#define SLOOK         4U
+#define SCHAIN        44
+#define SLCHAIN       13
+#define MAXLAZY       90
+#define LONGENOUGH    70
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+/********************************************************
+ DEFAULT string matcher
+ ************************************************************/
+#if XD3_BUILD_DEFAULT
+#define TEMPLATE      default
+#define LLOOK         9
+#define LSTEP         3
+#define SLOOK         4U
+#define SCHAIN        8
+#define SLCHAIN       2
+#define MAXLAZY       36
+#define LONGENOUGH    70
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
diff --git a/third-party/xdelta3/xdelta3/xdelta3-decode.h b/third-party/xdelta3/xdelta3/xdelta3-decode.h
new file mode 100644
index 0000000000..a329591b32
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-decode.h
@@ -0,0 +1,1219 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#ifndef _XDELTA3_DECODE_H_
+#define _XDELTA3_DECODE_H_
+
+#include "xdelta3-internal.h"
+
+#define SRCORTGT(x) ((((x) & VCD_SRCORTGT) == VCD_SOURCE) ? \
+                     VCD_SOURCE : ((((x) & VCD_SRCORTGT) == \
+                                    VCD_TARGET) ? VCD_TARGET : 0))
+
+static inline int
+xd3_decode_byte (xd3_stream *stream, usize_t *val)
+{
+  if (stream->avail_in == 0)
+    {
+      stream->msg = "further input required";
+      return XD3_INPUT;
+    }
+
+  (*val) = stream->next_in[0];
+
+  DECODE_INPUT (1);
+  return 0;
+}
+
+static inline int
+xd3_decode_bytes (xd3_stream *stream, uint8_t *buf, usize_t *pos, usize_t size)
+{
+  usize_t want;
+  usize_t take;
+
+  /* Note: The case where (*pos == size) happens when a zero-length
+   * appheader or code table is transmitted, but there is nothing in
+   * the standard against that. */
+  while (*pos < size)
+    {
+      if (stream->avail_in == 0)
+	{
+	  stream->msg = "further input required";
+	  return XD3_INPUT;
+	}
+
+      want = size - *pos;
+      take = xd3_min (want, stream->avail_in);
+
+      memcpy (buf + *pos, stream->next_in, (size_t) take);
+
+      DECODE_INPUT (take);
+      (*pos) += take;
+    }
+
+  return 0;
+}
+
+/* Initialize the decoder for a new window.  The dec_tgtlen value is
+ * preserved across successive window decodings, and the update to
+ * dec_winstart is delayed until a new window actually starts.  This
+ * is to avoid throwing an error due to overflow until the last
+ * possible moment.  This makes it possible to encode exactly 4GB
+ * through a 32-bit encoder. */
+static int
+xd3_decode_init_window (xd3_stream *stream)
+{
+  stream->dec_cpylen = 0;
+  stream->dec_cpyoff = 0;
+  stream->dec_cksumbytes = 0;
+
+  xd3_init_cache (& stream->acache);
+
+  return 0;
+}
+
+/* Allocates buffer space for the target window and possibly the
+ * VCD_TARGET copy-window.  Also sets the base of the two copy
+ * segments. */
+static int
+xd3_decode_setup_buffers (xd3_stream *stream)
+{
+  /* If VCD_TARGET is set then the previous buffer may be reused. */
+  if (stream->dec_win_ind & VCD_TARGET)
+    {
+      /* Note: this implementation is untested, since Xdelta3 itself
+       * does not implement an encoder for VCD_TARGET mode. Thus, mark
+       * unimplemented until needed. */
+      if (1)
+	{
+	  stream->msg = "VCD_TARGET not implemented";
+	  return XD3_UNIMPLEMENTED;
+	}
+
+      /* But this implementation only supports copying from the last
+       * target window.  If the offset is outside that range, it can't
+       * be done. */
+      if (stream->dec_cpyoff < stream->dec_laststart)
+	{
+	  stream->msg = "unsupported VCD_TARGET offset";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* See if the two windows are the same.  This indicates the
+       * first time VCD_TARGET is used.  This causes a second buffer
+       * to be allocated, after that the two are swapped in the
+       * DEC_FINISH case. */
+      if (stream->dec_lastwin == stream->next_out)
+	{
+	  stream->next_out  = NULL;
+	  stream->space_out = 0;
+	}
+
+      /* TODO: (See note above, this looks incorrect) */
+      stream->dec_cpyaddrbase = stream->dec_lastwin +
+	(usize_t) (stream->dec_cpyoff - stream->dec_laststart);
+    }
+
+  /* See if the current output window is large enough. */
+  if (stream->space_out < stream->dec_tgtlen)
+    {
+      xd3_free (stream, stream->dec_buffer);
+
+      stream->space_out =
+	xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE);
+
+      if ((stream->dec_buffer =
+	   (uint8_t*) xd3_alloc (stream, stream->space_out, 1)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      stream->next_out = stream->dec_buffer;
+    }
+
+  /* dec_tgtaddrbase refers to an invalid base address, but it is
+   * always used with a sufficiently large instruction offset (i.e.,
+   * beyond the copy window).  This condition is enforced by
+   * xd3_decode_output_halfinst. */
+  stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen;
+
+  return 0;
+}
+
+static int
+xd3_decode_allocate (xd3_stream  *stream,
+		     usize_t       size,
+		     uint8_t    **buf_ptr,
+		     usize_t      *buf_alloc)
+{
+  IF_DEBUG2 (DP(RINT "[xd3_decode_allocate] size %"W"u alloc %"W"u\n",
+		size, *buf_alloc));
+  
+  if (*buf_ptr != NULL && *buf_alloc < size)
+    {
+      xd3_free (stream, *buf_ptr);
+      *buf_ptr = NULL;
+    }
+
+  if (*buf_ptr == NULL)
+    {
+      *buf_alloc = xd3_round_blksize (size, XD3_ALLOCSIZE);
+
+      if ((*buf_ptr = (uint8_t*) xd3_alloc (stream, *buf_alloc, 1)) == NULL)
+	{
+	  return ENOMEM;
+	}
+    }
+
+  return 0;
+}
+
+static int
+xd3_decode_section (xd3_stream *stream,
+		    xd3_desect *section,
+		    xd3_decode_state nstate,
+		    int copy)
+{
+  XD3_ASSERT (section->pos <= section->size);
+  XD3_ASSERT (stream->dec_state != nstate);
+
+  if (section->pos < section->size)
+    {
+      usize_t sect_take;
+
+      if (stream->avail_in == 0)
+	{
+	  return XD3_INPUT;
+	}
+
+      if ((copy == 0) && (section->pos == 0))
+	{
+	  /* No allocation/copy needed */
+	  section->buf = stream->next_in;
+	  sect_take    = section->size;
+	  IF_DEBUG1 (DP(RINT "[xd3_decode_section] zerocopy %"W"u @ %"W"u avail %"W"u\n",
+			sect_take, section->pos, stream->avail_in));
+	}
+      else
+	{
+	  usize_t sect_need = section->size - section->pos;
+
+	  /* Allocate and copy */
+	  sect_take = xd3_min (sect_need, stream->avail_in);
+
+	  if (section->pos == 0)
+	    {
+	      int ret;
+
+	      if ((ret = xd3_decode_allocate (stream,
+					      section->size,
+					      & section->copied1,
+					      & section->alloc1)))
+		{
+		  return ret;
+		}
+
+	      section->buf = section->copied1;
+	    }
+
+	  IF_DEBUG2 (DP(RINT "[xd3_decode_section] take %"W"u @ %"W"u [need %"W"u] avail %"W"u\n",
+			sect_take, section->pos, sect_need, stream->avail_in));
+	  XD3_ASSERT (section->pos + sect_take <= section->alloc1);
+
+	  memcpy (section->copied1 + section->pos,
+		  stream->next_in,
+		  sect_take);
+	}
+
+      section->pos += sect_take;
+
+      stream->dec_winbytes += sect_take;
+
+      DECODE_INPUT (sect_take);
+    }
+
+  if (section->pos < section->size)
+    {
+      IF_DEBUG1 (DP(RINT "[xd3_decode_section] further input required %"W"u\n",
+		    section->size - section->pos));
+      stream->msg = "further input required";
+      return XD3_INPUT;
+    }
+
+  XD3_ASSERT (section->pos == section->size);
+
+  stream->dec_state = nstate;
+  section->buf_max  = section->buf + section->size;
+  section->pos      = 0;
+  return 0;
+}
+
+/* Decode the size and address for half of an instruction (i.e., a
+ * single opcode).  This updates the stream->dec_position, which are
+ * bytes already output prior to processing this instruction.  Perform
+ * bounds checking for sizes and copy addresses, which uses the
+ * dec_position (which is why these checks are done here). */
+static int
+xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
+{
+  int ret;
+
+  /* If the size from the instruction table is zero then read a size value. */
+  if ((inst->size == 0) &&
+      (ret = xd3_read_size (stream,
+ 			    & stream->inst_sect.buf,
+			      stream->inst_sect.buf_max,
+			    & inst->size)))
+    {
+      return XD3_INVALID_INPUT;
+    }
+
+  /* For copy instructions, read address. */
+  if (inst->type >= XD3_CPY)
+    {
+      IF_DEBUG2 ({
+	static int cnt = 0;
+	XPR(NT "DECODE:%u: COPY at %"Q"u (winoffset %"W"u) "
+	    "size %"W"u winaddr %"W"u\n",
+	    cnt++,
+	    stream->total_out + (stream->dec_position -
+				 stream->dec_cpylen),
+	    (stream->dec_position - stream->dec_cpylen),
+	    inst->size,
+	    inst->addr);
+      });
+
+      if ((ret = xd3_decode_address (stream,
+				     stream->dec_position,
+				     inst->type - XD3_CPY,
+				     & stream->addr_sect.buf,
+				     stream->addr_sect.buf_max,
+				     & inst->addr)))
+	{
+	  return ret;
+	}
+
+      /* Cannot copy an address before it is filled-in. */
+      if (inst->addr >= stream->dec_position)
+	{
+	  stream->msg = "address too large";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining
+       * buffer space in its own segment. */
+      if (inst->addr < stream->dec_cpylen &&
+	  inst->addr + inst->size > stream->dec_cpylen)
+	{
+	  stream->msg = "size too large";
+	  return XD3_INVALID_INPUT;
+	}
+    }
+  else
+    {
+      IF_DEBUG2 ({
+	if (inst->type == XD3_ADD)
+	  {
+	    static int cnt;
+	    XPR(NT "DECODE:%d: ADD at %"Q"u (winoffset %"W"u) size %"W"u\n",
+	       cnt++,
+	       (stream->total_out + stream->dec_position - stream->dec_cpylen),
+	       stream->dec_position - stream->dec_cpylen,
+	       inst->size);
+	  }
+	else
+	  {
+	    static int cnt;
+	    XD3_ASSERT (inst->type == XD3_RUN);
+	    XPR(NT "DECODE:%d: RUN at %"Q"u (winoffset %"W"u) size %"W"u\n",
+	       cnt++,
+	       stream->total_out + stream->dec_position - stream->dec_cpylen,
+	       stream->dec_position - stream->dec_cpylen,
+	       inst->size);
+	  }
+      });
+    }
+
+  /* Check: The instruction will not overflow the output buffer. */
+  if (stream->dec_position + inst->size > stream->dec_maxpos)
+    {
+      stream->msg = "size too large";
+      return XD3_INVALID_INPUT;
+    }
+
+  stream->dec_position += inst->size;
+  return 0;
+}
+
+/* Decode a single opcode and then decode the two half-instructions. */
+static int
+xd3_decode_instruction (xd3_stream *stream)
+{
+  int ret;
+  const xd3_dinst *inst;
+
+  if (stream->inst_sect.buf == stream->inst_sect.buf_max)
+    {
+      stream->msg = "instruction underflow";
+      return XD3_INVALID_INPUT;
+    }
+
+  inst = &stream->code_table[*stream->inst_sect.buf++];
+
+  stream->dec_current1.type = inst->type1;
+  stream->dec_current2.type = inst->type2;
+  stream->dec_current1.size = inst->size1;
+  stream->dec_current2.size = inst->size2;
+
+  /* For each instruction with a real operation, decode the
+   * corresponding size and addresses if necessary.  Assume a
+   * code-table may have NOOP in either position, although this is
+   * unlikely. */
+  if (inst->type1 != XD3_NOOP &&
+      (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1)))
+    {
+      return ret;
+    }
+  if (inst->type2 != XD3_NOOP &&
+      (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2)))
+    {
+      return ret;
+    }
+  return 0;
+}
+
+/* Output the result of a single half-instruction. OPT: This the
+   decoder hotspot.  Modifies "hinst", see below.  */
+static int
+xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst)
+{
+  /* This method is reentrant for copy instructions which may return
+   * XD3_GETSRCBLK to the caller.  Each time through a copy takes the
+   * minimum of inst->size and the available space on whichever block
+   * supplies the data */
+  usize_t take = inst->size;
+
+  if (USIZE_T_OVERFLOW (stream->avail_out, take) ||
+      stream->avail_out + take > stream->space_out)
+    {
+      stream->msg = "overflow while decoding";
+      return XD3_INVALID_INPUT;
+    }
+
+  XD3_ASSERT (inst->type != XD3_NOOP);
+
+  switch (inst->type)
+    {
+    case XD3_RUN:
+      {
+	/* Only require a single data byte. */
+	if (stream->data_sect.buf == stream->data_sect.buf_max)
+	  {
+	    stream->msg = "data underflow";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	memset (stream->next_out + stream->avail_out,
+		stream->data_sect.buf[0],
+		take);
+
+	stream->data_sect.buf += 1;
+	stream->avail_out += take;
+	inst->type = XD3_NOOP;
+	break;
+      }
+    case XD3_ADD:
+      {
+	/* Require at least TAKE data bytes. */
+	if (stream->data_sect.buf + take > stream->data_sect.buf_max)
+	  {
+	    stream->msg = "data underflow";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	memcpy (stream->next_out + stream->avail_out,
+		stream->data_sect.buf,
+		take);
+
+	stream->data_sect.buf += take;
+	stream->avail_out += take;
+	inst->type = XD3_NOOP;
+	break;
+      }
+    default:
+      {
+	usize_t i;
+	const uint8_t *src;
+	uint8_t *dst;
+	int overlap;
+
+	/* See if it copies from the VCD_TARGET/VCD_SOURCE window or
+	 * the target window.  Out-of-bounds checks for the addresses
+	 * and sizes are performed in xd3_decode_parse_halfinst.  This
+	 * if/else must set "overlap", "src", and "dst". */
+	if (inst->addr < stream->dec_cpylen)
+	  {
+	    /* In both branches we are copying from outside the
+	     * current decoder window, the first (VCD_TARGET) is
+	     * unimplemented. */
+	    overlap = 0;
+	    
+	    /* This branch sets "src".  As a side-effect, we modify
+	     * "inst" so that if we reenter this method after a
+	     * XD3_GETSRCBLK response the state is correct.  So if the
+	     * instruction can be fulfilled by a contiguous block of
+	     * memory then we will set:
+	     *
+	     *  inst->type = XD3_NOOP;
+	     *  inst->size = 0;
+	     */
+	    if (stream->dec_win_ind & VCD_TARGET)
+	      {
+		/* TODO: Users have requested long-distance copies of
+		 * similar material within a target (e.g., for dup
+		 * supression in backups). This code path is probably
+		 * dead due to XD3_UNIMPLEMENTED in xd3_decode_setup_buffers */
+		inst->size = 0;
+		inst->type = XD3_NOOP;
+		stream->msg = "VCD_TARGET not implemented";
+		return XD3_UNIMPLEMENTED;
+	      }
+	    else
+	      {
+		/* In this case we have to read a source block, which
+		 * could return control to the caller.  We need to
+		 * know the first block number needed for this
+		 * copy. */
+		xd3_source *source = stream->src;
+		xoff_t block = source->cpyoff_blocks;
+		usize_t blkoff = source->cpyoff_blkoff;
+		const usize_t blksize = source->blksize;
+		int ret;
+
+		xd3_blksize_add (&block, &blkoff, source, inst->addr);
+		XD3_ASSERT (blkoff < blksize);
+
+		if ((ret = xd3_getblk (stream, block)))
+		  {
+		    /* could be a XD3_GETSRCBLK failure. */
+		    if (ret == XD3_TOOFARBACK)
+		      {
+			stream->msg = "non-seekable source in decode";
+			ret = XD3_INTERNAL;
+		      }
+		    return ret;
+		  }
+
+		src = source->curblk + blkoff;
+
+		/* This block is either full, or a partial block that
+		 * must contain enough bytes. */
+		if ((source->onblk != blksize) &&
+		    (blkoff + take > source->onblk))
+		  {
+		    IF_DEBUG1 (XPR(NT "[srcfile] short at blkno %"Q"u onblk "
+				   "%"W"u blksize %"W"u blkoff %"W"u take %"W"u\n",
+				   block,
+				   source->onblk,
+				   blksize,
+				   blkoff,
+				   take));
+		    stream->msg = "source file too short";
+		    return XD3_INVALID_INPUT;
+		  }
+
+		XD3_ASSERT (blkoff != blksize);
+
+		/* Check if we have enough data on this block to
+		 * finish the instruction. */
+		if (blkoff + take <= blksize)
+		  {
+		    inst->type = XD3_NOOP;
+		    inst->size = 0;
+		  }
+		else
+		  {
+		    take = blksize - blkoff;
+		    inst->size -= take;
+		    inst->addr += take;
+
+		    /* because (blkoff + take > blksize), above */
+		    XD3_ASSERT (inst->size != 0);
+		  }
+	      }
+	  }
+	else
+	  {
+	    /* TODO: the memcpy/overlap optimization, etc.  Overlap
+	     * here could be more specific, it's whether (inst->addr -
+	     * srclen) + inst->size > input_pos ?  And is the system
+	     * memcpy really any good? */
+	    overlap = 1;
+
+	    /* For a target-window copy, we know the entire range is
+	     * in-memory.  The dec_tgtaddrbase is negatively offset by
+	     * dec_cpylen because the addresses start beyond that
+	     * point. */
+	    src = stream->dec_tgtaddrbase + inst->addr;
+	    inst->type = XD3_NOOP;
+	    inst->size = 0;
+	  }
+
+ 	dst = stream->next_out + stream->avail_out;
+
+	stream->avail_out += take;
+
+	if (overlap)
+	  {
+	    /* Can't just memcpy here due to possible overlap. */
+	    for (i = take; i != 0; i -= 1)
+	      {
+		*dst++ = *src++;
+	      }
+	  }
+	else
+	  {
+	    memcpy (dst, src, take);
+	  }
+      }
+    }
+
+  return 0;
+}
+
+static int
+xd3_decode_finish_window (xd3_stream *stream)
+{
+  stream->dec_winbytes  = 0;
+  stream->dec_state     = DEC_FINISH;
+
+  stream->data_sect.pos = 0;
+  stream->inst_sect.pos = 0;
+  stream->addr_sect.pos = 0;
+
+  return XD3_OUTPUT;
+}
+
+static int
+xd3_decode_secondary_sections (xd3_stream *secondary_stream)
+{
+#if SECONDARY_ANY
+  int ret;
+#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \
+  ((secondary_stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \
+   (ret = xd3_decode_secondary (secondary_stream, \
+				& secondary_stream-> LOWER ## _sect,	\
+				& xd3_sec_ ## LOWER (secondary_stream))))
+
+  if (DECODE_SECONDARY_SECTION (DATA, data) ||
+      DECODE_SECONDARY_SECTION (INST, inst) ||
+      DECODE_SECONDARY_SECTION (ADDR, addr))
+    {
+      return ret;
+    }
+#undef DECODE_SECONDARY_SECTION
+#endif
+  return 0;
+}
+
+static int
+xd3_decode_sections (xd3_stream *stream)
+{
+  usize_t need, more, take;
+  int copy, ret;
+
+  if ((stream->flags & XD3_JUST_HDR) != 0)
+    {
+      /* Nothing left to do. */
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* To avoid extra copying, allocate three sections at once (but
+   * check for overflow). */
+  need = stream->inst_sect.size;
+
+  if (USIZE_T_OVERFLOW (need, stream->addr_sect.size))
+    {
+      stream->msg = "decoder section size overflow";
+      return XD3_INTERNAL;
+    }
+  need += stream->addr_sect.size;
+
+  if (USIZE_T_OVERFLOW (need, stream->data_sect.size))
+    {
+      stream->msg = "decoder section size overflow";
+      return XD3_INTERNAL;
+    }
+  need += stream->data_sect.size;
+
+  /* The window may be entirely processed. */
+  XD3_ASSERT (stream->dec_winbytes <= need);
+
+  /* Compute how much more input is needed. */
+  more = (need - stream->dec_winbytes);
+
+  /* How much to consume. */
+  take = xd3_min (more, stream->avail_in);
+
+  /* See if the input is completely available, to avoid copy. */
+  copy = (take != more);
+
+  /* If the window is skipped... */
+  if ((stream->flags & XD3_SKIP_WINDOW) != 0)
+    {
+      /* Skip the available input. */
+      DECODE_INPUT (take);
+
+      stream->dec_winbytes += take;
+
+      if (copy)
+	{
+	  stream->msg = "further input required";
+	  return XD3_INPUT;
+	}
+
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* Process all but the DATA section. */
+  switch (stream->dec_state)
+    {
+    default:
+      stream->msg = "internal error";
+      return XD3_INVALID_INPUT;
+
+    case DEC_DATA:
+      if ((ret = xd3_decode_section (stream, & stream->data_sect,
+				     DEC_INST, copy))) { return ret; }
+    case DEC_INST:
+      if ((ret = xd3_decode_section (stream, & stream->inst_sect,
+				     DEC_ADDR, copy))) { return ret; }
+    case DEC_ADDR:
+      if ((ret = xd3_decode_section (stream, & stream->addr_sect,
+				     DEC_EMIT, copy))) { return ret; }
+    }
+
+  XD3_ASSERT (stream->dec_winbytes == need);
+
+  if ((ret = xd3_decode_secondary_sections (stream))) { return ret; }
+
+  if (stream->flags & XD3_SKIP_EMIT)
+    {
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* OPT: A possible optimization is to avoid allocating memory in
+   * decode_setup_buffers and to avoid a large memcpy when the window
+   * consists of a single VCD_SOURCE copy instruction. */
+  if ((ret = xd3_decode_setup_buffers (stream))) { return ret; }
+
+  return 0;
+}
+
+static int
+xd3_decode_emit (xd3_stream *stream)
+{
+  int ret;
+
+  /* Produce output: originally structured to allow reentrant code
+   * that fills as much of the output buffer as possible, but VCDIFF
+   * semantics allows to copy from anywhere from the target window, so
+   * instead allocate a sufficiently sized buffer after the target
+   * window length is decoded.
+   *
+   * This code still needs to be reentrant to allow XD3_GETSRCBLK to
+   * return control.  This is handled by setting the
+   * stream->dec_currentN instruction types to XD3_NOOP after they
+   * have been processed. */
+  XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT));
+  XD3_ASSERT (stream->dec_tgtlen <= stream->space_out);
+
+  while (stream->inst_sect.buf != stream->inst_sect.buf_max ||
+	 stream->dec_current1.type != XD3_NOOP ||
+	 stream->dec_current2.type != XD3_NOOP)
+    {
+      /* Decode next instruction pair. */
+      if ((stream->dec_current1.type == XD3_NOOP) &&
+	  (stream->dec_current2.type == XD3_NOOP) &&
+	  (ret = xd3_decode_instruction (stream))) { return ret; }
+
+      /* Output dec_current1 */
+      while ((stream->dec_current1.type != XD3_NOOP))
+	{
+	  if ((ret = xd3_decode_output_halfinst (stream, & stream->dec_current1)))
+	    {
+	      return ret;
+	    }
+	}
+      /* Output dec_current2 */
+      while (stream->dec_current2.type != XD3_NOOP)
+	{
+	  if ((ret = xd3_decode_output_halfinst (stream, & stream->dec_current2)))
+	    {
+	      return ret;
+	    }
+	}
+    }
+
+  if (stream->avail_out != stream->dec_tgtlen)
+    {
+      IF_DEBUG2 (DP(RINT "AVAIL_OUT(%"W"u) != DEC_TGTLEN(%"W"u)\n",
+		    stream->avail_out, stream->dec_tgtlen));
+      stream->msg = "wrong window length";
+      return XD3_INVALID_INPUT;
+    }
+
+  if (stream->data_sect.buf != stream->data_sect.buf_max)
+    {
+      stream->msg = "extra data section";
+      return XD3_INVALID_INPUT;
+    }
+
+  if (stream->addr_sect.buf != stream->addr_sect.buf_max)
+    {
+      stream->msg = "extra address section";
+      return XD3_INVALID_INPUT;
+    }
+
+  /* OPT: Should cksum computation be combined with the above loop? */
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0 &&
+      (stream->flags & XD3_ADLER32_NOVER) == 0)
+    {
+      uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out);
+
+      if (a32 != stream->dec_adler32)
+	{
+	  stream->msg = "target window checksum mismatch";
+	  return XD3_INVALID_INPUT;
+	}
+    }
+
+  /* Finished with a window. */
+  return xd3_decode_finish_window (stream);
+}
+
+int
+xd3_decode_input (xd3_stream *stream)
+{
+  int ret;
+
+  if (stream->enc_state != 0)
+    {
+      stream->msg = "encoder/decoder transition";
+      return XD3_INVALID_INPUT;
+    }
+
+#define BYTE_CASE(expr,x,nstate) \
+      do { \
+      if ( (expr) && \
+           ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \
+      stream->dec_state = (nstate); \
+      } while (0)
+
+#define OFFSET_CASE(expr,x,nstate) \
+      do { \
+      if ( (expr) && \
+           ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \
+      stream->dec_state = (nstate); \
+      } while (0)
+
+#define SIZE_CASE(expr,x,nstate) \
+      do { \
+      if ( (expr) && \
+           ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \
+      stream->dec_state = (nstate); \
+      } while (0)
+
+  switch (stream->dec_state)
+    {
+    case DEC_VCHEAD:
+      {
+	if ((ret = xd3_decode_bytes (stream, stream->dec_magic,
+				     & stream->dec_magicbytes, 4)))
+	  {
+	    return ret;
+	  }
+
+	if (stream->dec_magic[0] != VCDIFF_MAGIC1 ||
+	    stream->dec_magic[1] != VCDIFF_MAGIC2 ||
+	    stream->dec_magic[2] != VCDIFF_MAGIC3)
+	  {
+	    stream->msg = "not a VCDIFF input";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	if (stream->dec_magic[3] != 0)
+	  {
+	    stream->msg = "VCDIFF input version > 0 is not supported";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	stream->dec_state = DEC_HDRIND;
+      }
+    case DEC_HDRIND:
+      {
+	if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind)))
+	  {
+	    return ret;
+	  }
+
+	if ((stream->dec_hdr_ind & VCD_INVHDR) != 0)
+	  {
+	    stream->msg = "unrecognized header indicator bits set";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	stream->dec_state = DEC_SECONDID;
+      }
+
+    case DEC_SECONDID:
+      /* Secondary compressor ID: only if VCD_SECONDARY is set */
+      if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
+	{
+	  BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN);
+
+	  switch (stream->dec_secondid)
+	    {
+	    case VCD_FGK_ID:
+	      FGK_CASE (stream);
+	    case VCD_DJW_ID:
+	      DJW_CASE (stream);
+	    case VCD_LZMA_ID:
+	      LZMA_CASE (stream);
+	    default:
+	      stream->msg = "unknown secondary compressor ID";
+	      return XD3_INVALID_INPUT;
+	    }
+	}
+
+    case DEC_TABLEN:
+      /* Length of code table data: only if VCD_CODETABLE is set */
+      SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0,
+		 stream->dec_codetblsz, DEC_NEAR);
+
+      /* The codetblsz counts the two NEAR/SAME bytes */
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) {
+	if (stream->dec_codetblsz <= 2) {
+	  stream->msg = "invalid code table size";
+	  return ENOMEM;
+	}
+	stream->dec_codetblsz -= 2;
+      }
+    case DEC_NEAR:
+      /* Near modes: only if VCD_CODETABLE is set */
+      BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0,
+		stream->acache.s_near, DEC_SAME);
+    case DEC_SAME:
+      /* Same modes: only if VCD_CODETABLE is set */
+      BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0,
+		stream->acache.s_same, DEC_TABDAT);
+    case DEC_TABDAT:
+      /* Compressed code table data */
+
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0)
+	{
+	  stream->msg = "VCD_CODETABLE support was removed";
+	  return XD3_UNIMPLEMENTED;
+	}
+      else
+	{
+	  /* Use the default table. */
+	  stream->acache.s_near = __rfc3284_code_table_desc.near_modes;
+	  stream->acache.s_same = __rfc3284_code_table_desc.same_modes;
+	  stream->code_table    = xd3_rfc3284_code_table ();
+	}
+
+      if ((ret = xd3_alloc_cache (stream))) { return ret; }
+
+      stream->dec_state = DEC_APPLEN;
+
+    case DEC_APPLEN:
+      /* Length of application data */
+      SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0,
+		stream->dec_appheadsz, DEC_APPDAT);
+
+    case DEC_APPDAT:
+      /* Application data */
+      if (stream->dec_hdr_ind & VCD_APPHEADER)
+	{
+	  /* Note: we add an additional byte for padding, to allow
+	     0-termination. Check for overflow: */
+	  if (USIZE_T_OVERFLOW(stream->dec_appheadsz, 1))
+	    {
+	      stream->msg = "exceptional appheader size";
+	      return XD3_INVALID_INPUT;
+	    }
+
+	  if ((stream->dec_appheader == NULL) &&
+	      (stream->dec_appheader =
+	       (uint8_t*) xd3_alloc (stream,
+				     stream->dec_appheadsz+1, 1)) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+
+	  stream->dec_appheader[stream->dec_appheadsz] = 0;
+
+	  if ((ret = xd3_decode_bytes (stream, stream->dec_appheader,
+				       & stream->dec_appheadbytes,
+				       stream->dec_appheadsz)))
+	    {
+	      return ret;
+	    }
+	}
+
+      /* xoff_t -> usize_t is safe because this is the first block. */
+      stream->dec_hdrsize = (usize_t) stream->total_in;
+      stream->dec_state = DEC_WININD;
+
+    case DEC_WININD:
+      {
+	/* Start of a window: the window indicator */
+	if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind)))
+	  {
+	    return ret;
+	  }
+
+	stream->current_window = stream->dec_window_count;
+
+	if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen))
+	  {
+	    stream->msg = "decoder file offset overflow";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	stream->dec_winstart += stream->dec_tgtlen;
+
+	if ((stream->dec_win_ind & VCD_INVWIN) != 0)
+	  {
+	    stream->msg = "unrecognized window indicator bits set";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	if ((ret = xd3_decode_init_window (stream))) { return ret; }
+
+	stream->dec_state = DEC_CPYLEN;
+
+	IF_DEBUG2 (DP(RINT "--------- TARGET WINDOW %"Q"u -----------\n",
+		      stream->current_window));
+      }
+
+    case DEC_CPYLEN:
+      /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */
+      SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen,
+		DEC_CPYOFF);
+
+      /* Set the initial, logical decoder position (HERE address) in
+       * dec_position.  This is set to just after the source/copy
+       * window, as we are just about to output the first byte of
+       * target window. */
+      stream->dec_position = stream->dec_cpylen;
+
+    case DEC_CPYOFF:
+      /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */
+      OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff,
+		  DEC_ENCLEN);
+
+      /* Copy offset and copy length may not overflow. */
+      if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen))
+	{
+	  stream->msg = "decoder copy window overflows a file offset";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Check copy window bounds: VCD_TARGET window may not exceed
+	 current position. */
+      if ((stream->dec_win_ind & VCD_TARGET) &&
+	  (stream->dec_cpyoff + stream->dec_cpylen >
+	   stream->dec_winstart))
+	{
+	  stream->msg = "VCD_TARGET window out of bounds";
+	  return XD3_INVALID_INPUT;
+	}
+
+    case DEC_ENCLEN:
+      /* Length of the delta encoding */
+      SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN);
+    case DEC_TGTLEN:
+      /* Length of target window */
+      SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND);
+
+      /* Set the maximum decoder position, beyond which we should not
+       * decode any data.  This is the maximum value for dec_position.
+       * This may not exceed the size of a usize_t. */
+      if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen))
+	{
+	  stream->msg = "decoder target window overflows a usize_t";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Check for malicious files. */
+      if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE)
+	{
+	  stream->msg = "hard window size exceeded";
+	  return XD3_INVALID_INPUT;
+	}
+
+      stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen;
+
+    case DEC_DELIND:
+      /* Delta indicator */
+      BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN);
+
+      if ((stream->dec_del_ind & VCD_INVDEL) != 0)
+	{
+	  stream->msg = "unrecognized delta indicator bits set";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Delta indicator is only used with secondary compression. */
+      if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL))
+	{
+	  stream->msg = "invalid delta indicator bits set";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Section lengths */
+    case DEC_DATALEN:
+      SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN);
+    case DEC_INSTLEN:
+      SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN);
+    case DEC_ADDRLEN:
+      SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM);
+
+    case DEC_CKSUM:
+      /* Window checksum. */
+      if ((stream->dec_win_ind & VCD_ADLER32) != 0)
+	{
+	  int i;
+
+	  if ((ret = xd3_decode_bytes (stream, stream->dec_cksum,
+				       & stream->dec_cksumbytes, 4)))
+	    {
+	      return ret;
+	    }
+
+	  for (i = 0; i < 4; i += 1)
+	    {
+	      stream->dec_adler32 =
+		(stream->dec_adler32 << 8) | stream->dec_cksum[i];
+	    }
+	}
+
+      stream->dec_state = DEC_DATA;
+
+      /* Check dec_enclen for redundency, otherwise it is not really used. */
+      {
+	usize_t enclen_check =
+	  (1 + (xd3_sizeof_size (stream->dec_tgtlen) +
+		xd3_sizeof_size (stream->data_sect.size) +
+		xd3_sizeof_size (stream->inst_sect.size) +
+		xd3_sizeof_size (stream->addr_sect.size)) +
+	   stream->data_sect.size +
+	   stream->inst_sect.size +
+	   stream->addr_sect.size +
+	   ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0));
+
+	if (stream->dec_enclen != enclen_check)
+	  {
+	    stream->msg = "incorrect encoding length (redundent)";
+	    return XD3_INVALID_INPUT;
+	  }
+      }
+
+      /* Returning here gives the application a chance to inspect the
+       * header, skip the window, etc. */
+      if (stream->current_window == 0) { return XD3_GOTHEADER; }
+      else                             { return XD3_WINSTART; }
+
+    case DEC_DATA:
+    case DEC_INST:
+    case DEC_ADDR:
+      /* Next read the three sections. */
+     if ((ret = xd3_decode_sections (stream))) { return ret; }
+
+    case DEC_EMIT:
+
+      /* To speed VCD_SOURCE block-address calculations, the source
+       * cpyoff_blocks and cpyoff_blkoff are pre-computed. */
+      if (stream->dec_win_ind & VCD_SOURCE)
+	{
+	  xd3_source *src = stream->src;
+
+	  if (src == NULL)
+	    {
+	      stream->msg = "source input required";
+	      return XD3_INVALID_INPUT;
+	    }
+
+	  xd3_blksize_div(stream->dec_cpyoff, src,
+			  &src->cpyoff_blocks,
+			  &src->cpyoff_blkoff);
+	  
+	  IF_DEBUG2(DP(RINT
+		       "[decode_cpyoff] %"Q"u "
+		       "cpyblkno %"Q"u "
+		       "cpyblkoff %"W"u "
+		       "blksize %"W"u\n",
+		       stream->dec_cpyoff,
+		       src->cpyoff_blocks,
+		       src->cpyoff_blkoff,
+		       src->blksize));
+	}
+
+      /* xd3_decode_emit returns XD3_OUTPUT on every success. */
+      if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT)
+	{
+	  stream->total_out += stream->avail_out;
+	}
+
+      return ret;
+
+    case DEC_FINISH:
+      {
+	if (stream->dec_win_ind & VCD_TARGET)
+	  {
+	    if (stream->dec_lastwin == NULL)
+	      {
+		stream->dec_lastwin   = stream->next_out;
+		stream->dec_lastspace = stream->space_out;
+	      }
+	    else
+	      {
+		xd3_swap_uint8p (& stream->dec_lastwin,
+				 & stream->next_out);
+		xd3_swap_usize_t (& stream->dec_lastspace,
+				  & stream->space_out);
+	      }
+	  }
+
+	stream->dec_lastlen   = stream->dec_tgtlen;
+	stream->dec_laststart = stream->dec_winstart;
+	stream->dec_window_count += 1;
+
+	/* Note: the updates to dec_winstart & current_window are
+	 * deferred until after the next DEC_WININD byte is read. */
+	stream->dec_state = DEC_WININD;
+	return XD3_WINFINISH;
+      }
+
+    default:
+      stream->msg = "invalid state";
+      return XD3_INVALID_INPUT;
+    }
+}
+
+#endif // _XDELTA3_DECODE_H_
diff --git a/third-party/xdelta3/xdelta3/xdelta3-djw.h b/third-party/xdelta3/xdelta3/xdelta3-djw.h
new file mode 100644
index 0000000000..e4a5d1f202
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-djw.h
@@ -0,0 +1,1835 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+#ifndef _XDELTA3_DJW_H_
+#define _XDELTA3_DJW_H_
+
+/* The following people deserve much credit for the algorithms and
+ * techniques contained in this file:
+
+ Julian Seward
+ Bzip2 sources, implementation of the multi-table Huffman technique.
+
+ Jean-loup Gailly and Mark Adler and L. Peter Deutsch
+ Zlib source code, RFC 1951
+
+ Daniel S. Hirschberg and Debra A. LeLewer
+ "Efficient Decoding of Prefix Codes"
+ Communications of the ACM, April 1990 33(4).
+
+ David J. Wheeler
+ Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps.
+ This contains the idea behind the multi-table Huffman and 1-2 coding
+ techniques.
+ ftp://ftp.cl.cam.ac.uk/users/djw3/
+
+*/
+
+/* OPT: during the multi-table iteration, pick the worst-overall
+ * performing table and replace it with exactly the frequencies of the
+ * worst-overall performing sector or N-worst performing sectors. */
+
+/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with
+ * the Bzip prefix coding strategy.  xdfs-0.256 contains the last of
+ * the other-format tests, including RFC1950 and the RFC1950+MTF
+ * tests. */
+
+#define DJW_MAX_CODELEN      20U /* Maximum length of an alphabet code. */
+
+/* Code lengths are themselves code-length encoded, so the total number of
+ * codes is: [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */
+#define DJW_TOTAL_CODES      (DJW_MAX_CODELEN+2)
+
+#define RUN_0                0U /* Symbols used in MTF+1/2 coding. */
+#define RUN_1                1U
+
+/* Number of code lengths always encoded (djw_encode_basic array) */
+#define DJW_BASIC_CODES      5U  
+#define DJW_RUN_CODES        2U  /* Number of run codes */
+
+/* Offset of extra codes */
+#define DJW_EXTRA_12OFFSET   (DJW_BASIC_CODES + DJW_RUN_CODES)
+
+/* Number of optionally encoded code lengths (djw_encode_extra array) */
+#define DJW_EXTRA_CODES      15U
+
+/* Number of bits to code [0-DJW_EXTRA_CODES] */
+#define DJW_EXTRA_CODE_BITS  4U  
+
+#define DJW_MAX_GROUPS       8U  /* Max number of group coding tables */
+#define DJW_GROUP_BITS       3U  /* Number of bits to code [1-DJW_MAX_GROUPS] */
+
+#define DJW_SECTORSZ_MULT     5U  /* Multiplier for encoded sectorsz */
+#define DJW_SECTORSZ_BITS     5U  /* Number of bits to code group size */
+#define DJW_SECTORSZ_MAX      ((1U << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT)
+
+/* Maximum number of iterations to find group tables. */
+#define DJW_MAX_ITER         6U
+/* Minimum number of bits an iteration must reduce coding by. */
+#define DJW_MIN_IMPROVEMENT  20U 
+
+/* Maximum code length of a prefix code length */
+#define DJW_MAX_CLCLEN       15U
+
+/* Number of bits to code [0-DJW_MAX_CLCLEN] */
+#define DJW_CLCLEN_BITS      4U  
+
+#define DJW_MAX_GBCLEN       7U  /* Maximum code length of a group selector */
+
+/* Number of bits to code [0-DJW_MAX_GBCLEN]
+ * TODO: Actually, should never have zero code lengths here, or else a group
+ * went unused.  Write a test for this: if a group goes unused, eliminate
+ * it? */
+#define DJW_GBCLEN_BITS      3U
+
+/* It has to save at least this many bits... */
+#define EFFICIENCY_BITS      16U
+
+typedef struct _djw_stream   djw_stream;
+typedef struct _djw_heapen   djw_heapen;
+typedef struct _djw_prefix   djw_prefix;
+typedef uint32_t             djw_weight;
+
+struct _djw_heapen
+{
+  uint32_t depth;
+  uint32_t freq;
+  uint32_t parent;
+};
+
+struct _djw_prefix
+{
+  usize_t   scount;
+  uint8_t *symbol;
+  usize_t   mcount;
+  uint8_t *mtfsym;
+  uint8_t *repcnt;
+};
+
+struct _djw_stream
+{
+  int unused;
+};
+
+/* Each Huffman table consists of 256 "code length" (CLEN) codes,
+ * which are themselves Huffman coded after eliminating repeats and
+ * move-to-front coding.  The prefix consists of all the CLEN codes in
+ * djw_encode_basic plus a 4-bit value stating how many of the
+ * djw_encode_extra codes are actually coded (the rest are presumed
+ * zero, or unused CLEN codes).
+ *
+ * These values of these two arrays were arrived at by studying the
+ * distribution of min and max clen over a collection of DATA, INST,
+ * and ADDR inputs.  The goal is to specify the order of
+ * djw_extra_codes that is most likely to minimize the number of extra
+ * codes that must be encoded.
+ *
+ * Results: 158896 sections were counted by compressing files (window
+ * size 512K) listed with: `find / -type f ( -user jmacd -o -perm +444
+ * )`
+ *
+ * The distribution of CLEN codes for each efficient invocation of the
+ * secondary compressor (taking the best number of groups/sector size)
+ * was recorded.  Then we look at the distribution of min and max clen
+ * values, counting the number of times the value C_low is less than
+ * the min and C_high is greater than the max.  Values >= C_high and
+ * <= C_low will not have their lengths coded.  The results are sorted
+ * and the least likely 15 are placed into the djw_encode_extra[]
+ * array in order.  These values are used as the initial MTF ordering.
+
+ clow[1] = 155119
+ clow[2] = 140325
+ clow[3] = 84072
+ ---
+ clow[4] = 7225
+ clow[5] = 1093
+ clow[6] = 215
+ ---
+ chigh[4] = 1
+ chigh[5] = 30
+ chigh[6] = 218
+ chigh[7] = 2060
+ chigh[8] = 13271
+ ---
+ chigh[9] = 39463
+ chigh[10] = 77360
+ chigh[11] = 118298
+ chigh[12] = 141360
+ chigh[13] = 154086
+ chigh[14] = 157967
+ chigh[15] = 158603
+ chigh[16] = 158864
+ chigh[17] = 158893
+ chigh[18] = 158895
+ chigh[19] = 158896
+ chigh[20] = 158896
+
+*/
+
+static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] =
+  {
+    9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20,
+  };
+
+static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] =
+  {
+    4, 5, 6, 7, 8,
+  };
+
+/*********************************************************************/
+/*                              DECLS                                */
+/*********************************************************************/
+
+static djw_stream*     djw_alloc           (xd3_stream *stream);
+static int             djw_init            (xd3_stream *stream, 
+					    djw_stream *h,
+					    int is_encode);
+static void            djw_destroy         (xd3_stream *stream,
+					    djw_stream *h);
+
+#if XD3_ENCODER
+static int             xd3_encode_huff     (xd3_stream   *stream,
+					    djw_stream  *sec_stream,
+					    xd3_output   *input,
+					    xd3_output   *output,
+					    xd3_sec_cfg  *cfg);
+#endif
+
+static int             xd3_decode_huff     (xd3_stream     *stream,
+					    djw_stream    *sec_stream,
+					    const uint8_t **input,
+					    const uint8_t  *const input_end,
+					    uint8_t       **output,
+					    const uint8_t  *const output_end);
+
+/*********************************************************************/
+/*                             HUFFMAN                               */
+/*********************************************************************/
+
+static djw_stream*
+djw_alloc (xd3_stream *stream)
+{
+  return (djw_stream*) xd3_alloc (stream, sizeof (djw_stream), 1);
+}
+
+static int
+djw_init (xd3_stream *stream, djw_stream *h, int is_encode)
+{
+  /* Fields are initialized prior to use. */
+  return 0;
+}
+
+static void
+djw_destroy (xd3_stream *stream,
+	     djw_stream *h)
+{
+  xd3_free (stream, h);
+}
+
+
+/*********************************************************************/
+/*                               HEAP                                */
+/*********************************************************************/
+
+static inline int
+heap_less (const djw_heapen *a, const djw_heapen *b)
+{
+  return a->freq   < b->freq ||
+    (a->freq  == b->freq &&
+     a->depth  < b->depth);
+}
+
+static inline void
+heap_insert (usize_t *heap, const djw_heapen *ents, usize_t p, const usize_t e)
+{
+  /* Insert ents[e] into next slot heap[p] */
+  usize_t pp = p/2; /* P's parent */
+
+  while (heap_less (& ents[e], & ents[heap[pp]]))
+    {
+      heap[p] = heap[pp];
+      p  = pp;
+      pp = p/2;
+    }
+
+  heap[p] = e;
+}
+
+static inline djw_heapen*
+heap_extract (usize_t *heap, const djw_heapen *ents, usize_t heap_last)
+{
+  usize_t smallest = heap[1];
+  usize_t p, pc, t;
+
+  /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */
+  heap[1] = heap[heap_last+1];
+
+  /* Re-heapify */
+  for (p = 1; ; p = pc)
+    {
+      pc = p*2;
+
+      /* Reached bottom of heap */
+      if (pc > heap_last) { break; }
+
+      /* See if second child is smaller. */
+      if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]]))
+	{
+	  pc += 1;
+	}
+
+      /* If pc is not smaller than p, heap property re-established. */
+      if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; }
+
+      t = heap[pc];
+      heap[pc] = heap[p];
+      heap[p] = t;
+    }
+
+  return (djw_heapen*) & ents[smallest];
+}
+
+#if XD3_DEBUG
+static void
+heap_check (usize_t *heap, djw_heapen *ents, usize_t heap_last)
+{
+  usize_t i;
+  for (i = 1; i <= heap_last; i += 1)
+    {
+      /* Heap property: child not less than parent */
+      XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]]));
+
+      IF_DEBUG2 (DP(RINT "heap[%"W"u] = %u\n", i, ents[heap[i]].freq));
+    }
+}
+#endif
+
+/*********************************************************************/
+/*                             MTF, 1/2                              */
+/*********************************************************************/
+
+static inline usize_t
+djw_update_mtf (uint8_t *mtf, usize_t mtf_i)
+{
+  int k;
+  usize_t sym = mtf[mtf_i];
+
+  for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; }
+
+  mtf[0] = sym;
+  return sym;
+}
+
+static inline void
+djw_update_1_2 (int *mtf_run, usize_t *mtf_i,
+		uint8_t *mtfsym, djw_weight *freq)
+{
+  uint8_t code;
+  
+  do
+    {
+      /* Offset by 1, since any number of RUN_ symbols implies run>0... */
+      *mtf_run -= 1;
+
+      code = (*mtf_run & 1) ? RUN_1 : RUN_0;
+
+      mtfsym[(*mtf_i)++] = code;
+      freq[code] += 1;
+      *mtf_run >>= 1;
+    }
+  while (*mtf_run >= 1);
+
+  *mtf_run = 0;
+}
+
+static void
+djw_init_clen_mtf_1_2 (uint8_t *clmtf)
+{
+  usize_t i, cl_i = 0;
+
+  clmtf[cl_i++] = 0;
+  for (i = 0; i < DJW_BASIC_CODES; i += 1)
+    {
+      clmtf[cl_i++] = djw_encode_12basic[i];
+    }
+  for (i = 0; i < DJW_EXTRA_CODES; i += 1)
+    {
+      clmtf[cl_i++] = djw_encode_12extra[i];
+    }
+}
+
+/*********************************************************************/
+/*                           PREFIX CODES                            */
+/*********************************************************************/
+#if XD3_ENCODER
+static usize_t
+djw_build_prefix (const djw_weight *freq, uint8_t *clen, usize_t asize, usize_t maxlen)
+{
+  /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1
+   * internal nodes, never more than ALPHABET_SIZE entries actually in the
+   * heap (minimum weight subtrees during prefix construction).  First
+   * ALPHABET_SIZE entries are the actual symbols, next ALPHABET_SIZE-1 are
+   * internal nodes. */
+  djw_heapen ents[ALPHABET_SIZE * 2];
+  usize_t heap[ALPHABET_SIZE + 1];
+
+  usize_t heap_last; /* Index of the last _valid_ heap entry. */
+  usize_t ents_size; /* Number of entries, including 0th fake entry */
+  usize_t  overflow;  /* Number of code lengths that overflow */
+  usize_t total_bits;
+  usize_t i;
+
+  IF_DEBUG (usize_t first_bits = 0);
+
+  /* Insert real symbol frequences. */
+  for (i = 0; i < asize; i += 1)
+    {
+      ents[i+1].freq = freq[i];
+      IF_DEBUG2 (DP(RINT "ents[%"W"i] = freq[%"W"u] = %d\n",
+			i+1, i, freq[i]));
+    }
+
+ again:
+
+  /* The loop is re-entered each time an overflow occurs.  Re-initialize... */
+  heap_last = 0;
+  ents_size = 1;
+  overflow  = 0;
+  total_bits = 0;
+
+  /* 0th entry terminates the while loop in heap_insert (it's the parent of
+   * the smallest element, always less-than) */
+  heap[0] = 0;
+  ents[0].depth = 0;
+  ents[0].freq  = 0;
+
+  /* Initial heap. */
+  for (i = 0; i < asize; i += 1, ents_size += 1)
+    {
+      ents[ents_size].depth  = 0;
+      ents[ents_size].parent = 0;
+
+      if (ents[ents_size].freq != 0)
+	{
+	  heap_insert (heap, ents, ++heap_last, ents_size);
+	}
+    }
+
+  IF_DEBUG (heap_check (heap, ents, heap_last));
+
+  /* Must be at least one symbol, or else we can't get here. */
+  XD3_ASSERT (heap_last != 0);
+
+  /* If there is only one symbol, fake a second to prevent zero-length
+   * codes. */
+  if (heap_last == 1)
+    {
+      /* Pick either the first or last symbol. */
+      usize_t s = freq[0] ? asize-1 : 0;
+      ents[s+1].freq = 1;
+      goto again;
+    }
+
+  /* Build prefix tree. */
+  while (heap_last > 1)
+    {
+      djw_heapen *h1 = heap_extract (heap, ents, --heap_last);
+      djw_heapen *h2 = heap_extract (heap, ents, --heap_last);
+
+      ents[ents_size].freq   = h1->freq + h2->freq;
+      ents[ents_size].depth  = 1 + xd3_max (h1->depth, h2->depth);
+      ents[ents_size].parent = 0;
+
+      h1->parent = h2->parent = ents_size;
+
+      heap_insert (heap, ents, ++heap_last, ents_size++);
+    }
+
+  IF_DEBUG (heap_check (heap, ents, heap_last));
+
+  /* Now compute prefix code lengths, counting parents. */
+  for (i = 1; i < asize+1; i += 1)
+    {
+      usize_t b = 0;
+
+      if (ents[i].freq != 0)
+	{
+	  usize_t p = i;
+
+	  while ((p = ents[p].parent) != 0) { b += 1; }
+
+	  if (b > maxlen) { overflow = 1; }
+
+	  total_bits += b * freq[i-1];
+	}
+
+      /* clen is 0-origin, unlike ents. */
+      IF_DEBUG2 (DP(RINT "clen[%"W"u] = %"W"u\n", i-1, b));
+      clen[i-1] = b;
+    }
+
+  IF_DEBUG (if (first_bits == 0) first_bits = total_bits);
+
+  if (! overflow)
+    {
+      IF_DEBUG2 (if (first_bits != total_bits)
+      {
+	DP(RINT "code length overflow changed %"W"u bits\n",
+	   total_bits - first_bits);
+      });
+      return total_bits;
+    }
+
+  /* OPT: There is a non-looping way to fix overflow shown in zlib, but this
+   * is easier (for now), as done in bzip2. */
+  for (i = 1; i < asize+1; i += 1)
+    {
+      ents[i].freq = ents[i].freq / 2 + 1;
+    }
+
+  goto again;
+}
+
+static void
+djw_build_codes (usize_t *codes, const uint8_t *clen, usize_t asize, usize_t abs_max)
+{
+  usize_t i, l;
+  usize_t min_clen = DJW_MAX_CODELEN;
+  usize_t max_clen = 0;
+  usize_t code = 0;
+
+  /* Find the min and max code length */
+  for (i = 0; i < asize; i += 1)
+    {
+      if (clen[i] > 0 && clen[i] < min_clen)
+	{
+	  min_clen = clen[i];
+	}
+
+      max_clen = xd3_max (max_clen, (usize_t) clen[i]);
+    }
+
+  XD3_ASSERT (max_clen <= abs_max);
+
+  /* Generate a code for each symbol with the appropriate length. */
+  for (l = min_clen; l <= max_clen; l += 1)
+    {
+      for (i = 0; i < asize; i += 1)
+	{
+	  if (clen[i] == l)
+	    {
+	      codes[i] = code++;
+	    } 
+	}
+
+      code <<= 1;
+    }
+
+  IF_DEBUG2 ({
+      for (i = 0; i < asize; i += 1)
+	{
+	  DP(RINT "code[%"W"u] = %"W"u\n", i, codes[i]);
+	}
+    });
+}
+
+/*********************************************************************/
+/*			      MOVE-TO-FRONT                          */
+/*********************************************************************/
+static void
+djw_compute_mtf_1_2 (djw_prefix  *prefix,
+		     uint8_t     *mtf,
+		     djw_weight  *freq_out,
+		     usize_t      nsym)
+{
+  size_t i, j, k;
+  usize_t sym;
+  usize_t size = prefix->scount;
+  usize_t mtf_i = 0;
+  int mtf_run = 0;
+
+  /* This +2 is for the RUN_0, RUN_1 codes */
+  memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+2));
+
+  for (i = 0; i < size; )
+    {
+      /* OPT: Bzip optimizes this algorithm a little by effectively checking
+       * j==0 before the MTF update. */
+      sym = prefix->symbol[i++];
+
+      for (j = 0; mtf[j] != sym; j += 1) { }
+
+      XD3_ASSERT (j <= nsym);
+
+      for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; }
+
+      mtf[0] = sym;
+
+      if (j == 0)
+	{
+	  mtf_run += 1;
+	  continue;
+	}
+
+      if (mtf_run > 0)
+	{
+	  djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
+	}
+
+      /* Non-zero symbols are offset by RUN_1 */
+      prefix->mtfsym[mtf_i++] = (uint8_t)(j+RUN_1);
+      freq_out[j+RUN_1] += 1;
+    }
+
+  if (mtf_run > 0)
+    {
+      djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
+    }
+
+  prefix->mcount = mtf_i;
+}
+
+/* Counts character frequencies of the input buffer, returns the size. */
+static usize_t
+djw_count_freqs (djw_weight *freq, xd3_output *input)
+{
+  xd3_output *in;
+  usize_t size = 0;
+
+  memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE);
+
+  for (in = input; in; in = in->next_page)
+    {
+      const uint8_t *p     = in->base;
+      const uint8_t *p_max = p + in->next;
+
+      size += in->next;
+
+      do
+	{
+	  ++freq[*p];
+	}
+      while (++p < p_max);
+    }
+
+  IF_DEBUG2 ({int i;
+  DP(RINT "freqs: ");
+  for (i = 0; i < ALPHABET_SIZE; i += 1)
+    {
+      DP(RINT "%u ", freq[i]);
+    }
+  DP(RINT "\n");});
+
+  return size;
+}
+
+static void
+djw_compute_multi_prefix (usize_t     groups,
+			  uint8_t     clen[DJW_MAX_GROUPS][ALPHABET_SIZE],
+			  djw_prefix *prefix)
+{
+  usize_t gp, i;
+      
+  prefix->scount = ALPHABET_SIZE;
+  memcpy (prefix->symbol, clen[0], ALPHABET_SIZE);
+
+  for (gp = 1; gp < groups; gp += 1)
+    {
+      for (i = 0; i < ALPHABET_SIZE; i += 1)
+	{
+	  if (clen[gp][i] == 0)
+	    {
+	      continue;
+	    }
+
+	  prefix->symbol[prefix->scount++] = clen[gp][i];
+	}
+    }
+}
+
+static void
+djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq)
+{
+  /* This +1 is for the 0 code-length. */
+  uint8_t clmtf[DJW_MAX_CODELEN+1];
+
+  djw_init_clen_mtf_1_2 (clmtf);
+
+  djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN);
+}
+
+static int
+djw_encode_prefix (xd3_stream   *stream,
+		   xd3_output  **output,
+		   bit_state    *bstate,
+		   djw_prefix   *prefix)
+{
+  int ret;
+  size_t i;
+  usize_t num_to_encode;
+  djw_weight clfreq[DJW_TOTAL_CODES];
+  uint8_t    clclen[DJW_TOTAL_CODES];
+  usize_t    clcode[DJW_TOTAL_CODES];
+
+  /* Move-to-front encode prefix symbols, count frequencies */
+  djw_compute_prefix_1_2 (prefix, clfreq);
+
+  /* Compute codes */
+  djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
+  djw_build_codes  (clcode, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
+
+  /* Compute number of extra codes beyond basic ones for this template. */
+  num_to_encode = DJW_TOTAL_CODES;
+  while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0)
+    {
+      num_to_encode -= 1;
+    }
+  XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
+
+  /* Encode: # of extra codes */
+  if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
+			      num_to_encode - DJW_EXTRA_12OFFSET)))
+    {
+      return ret;
+    }
+
+  /* Encode: MTF code lengths */
+  for (i = 0; i < num_to_encode; i += 1)
+    {
+      if ((ret = xd3_encode_bits (stream, output, bstate,
+				  DJW_CLCLEN_BITS, clclen[i])))
+	{
+	  return ret;
+	}
+    }
+
+  /* Encode: CLEN code lengths */
+  for (i = 0; i < prefix->mcount; i += 1)
+    {
+      usize_t mtf_sym = prefix->mtfsym[i];
+      usize_t bits    = clclen[mtf_sym];
+      usize_t code    = clcode[mtf_sym];
+
+      if ((ret = xd3_encode_bits (stream, output, bstate, bits, code)))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+static void
+djw_compute_selector_1_2 (djw_prefix *prefix,
+			  usize_t     groups,
+			  djw_weight *gbest_freq)
+{
+  uint8_t grmtf[DJW_MAX_GROUPS];
+  usize_t i;
+
+  for (i = 0; i < groups; i += 1) { grmtf[i] = i; }
+
+  djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);
+}
+
+static int
+xd3_encode_howmany_groups (xd3_stream *stream,
+			   xd3_sec_cfg *cfg,
+			   usize_t input_size,
+			   usize_t *ret_groups,
+			   usize_t *ret_sector_size)
+{
+  usize_t cfg_groups = 0;
+  usize_t cfg_sector_size = 0;
+  usize_t sugg_groups = 0;
+  usize_t sugg_sector_size = 0;
+
+  if (cfg->ngroups != 0)
+    {
+      if (cfg->ngroups > DJW_MAX_GROUPS)
+	{
+	  stream->msg = "invalid secondary encoder group number";
+	  return XD3_INTERNAL;
+	}
+
+      cfg_groups = cfg->ngroups;
+    }
+
+  if (cfg->sector_size != 0)
+    {
+      if (cfg->sector_size < DJW_SECTORSZ_MULT ||
+	  cfg->sector_size > DJW_SECTORSZ_MAX ||
+	  (cfg->sector_size % DJW_SECTORSZ_MULT) != 0)
+	{
+	  stream->msg = "invalid secondary encoder sector size";
+	  return XD3_INTERNAL;
+	}
+
+      cfg_sector_size = cfg->sector_size;
+    }
+
+  if (cfg_groups == 0 || cfg_sector_size == 0)
+    {
+      /* These values were found empirically using xdelta3-tune around version
+       * xdfs-0.256. */
+      switch (cfg->data_type)
+	{
+	case DATA_SECTION:
+	  if      (input_size < 1000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 4000)   { sugg_groups = 2; sugg_sector_size = 10; }
+	  else if (input_size < 7000)   { sugg_groups = 3; sugg_sector_size = 10; }
+	  else if (input_size < 10000)  { sugg_groups = 4; sugg_sector_size = 10; }
+	  else if (input_size < 25000)  { sugg_groups = 5; sugg_sector_size = 10; }
+	  else if (input_size < 50000)  { sugg_groups = 7; sugg_sector_size = 20; }
+	  else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; }
+	  else                          { sugg_groups = 8; sugg_sector_size = 70; }
+	  break;
+	case INST_SECTION:
+	  if      (input_size < 7000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 10000)  { sugg_groups = 2; sugg_sector_size = 50; }
+	  else if (input_size < 25000)  { sugg_groups = 3; sugg_sector_size = 50; }
+	  else if (input_size < 50000)  { sugg_groups = 6; sugg_sector_size = 40; }
+	  else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; }
+	  else                          { sugg_groups = 8; sugg_sector_size = 40; }
+	  break;
+	case ADDR_SECTION:
+	  if      (input_size < 9000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 25000)  { sugg_groups = 2; sugg_sector_size = 130; }
+	  else if (input_size < 50000)  { sugg_groups = 3; sugg_sector_size = 130; }
+	  else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; }
+	  else                          { sugg_groups = 7; sugg_sector_size = 130; }
+	  break;
+	}
+
+      if (cfg_groups == 0)
+	{
+	  cfg_groups = sugg_groups;
+	}
+
+      if (cfg_sector_size == 0)
+	{
+	  cfg_sector_size = sugg_sector_size;
+	}
+    }
+
+  if (cfg_groups != 1 && cfg_sector_size == 0)
+    {
+      switch (cfg->data_type)
+	{
+	case DATA_SECTION:
+	  cfg_sector_size = 20;
+	  break;
+	case INST_SECTION:
+	  cfg_sector_size = 50;
+	  break;
+	case ADDR_SECTION:
+	  cfg_sector_size = 130;
+	  break;
+	}
+    }
+
+  (*ret_groups)     = cfg_groups;
+  (*ret_sector_size) = cfg_sector_size;
+
+  XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS);
+  XD3_ASSERT (cfg_groups == 1 ||
+	      (cfg_sector_size >= DJW_SECTORSZ_MULT &&
+	       cfg_sector_size <= DJW_SECTORSZ_MAX));
+
+  return 0;
+}
+
+static int
+xd3_encode_huff (xd3_stream   *stream,
+		 djw_stream   *h,
+		 xd3_output   *input,
+		 xd3_output   *output,
+		 xd3_sec_cfg  *cfg)
+{
+  int         ret;
+  usize_t     groups, sector_size;
+  bit_state   bstate = BIT_STATE_ENCODE_INIT;
+  xd3_output *in;
+  usize_t     output_bits;
+  usize_t     input_bits;
+  usize_t     input_bytes;
+  usize_t     initial_offset = output->next;
+  djw_weight  real_freq[ALPHABET_SIZE];
+  uint8_t    *gbest = NULL;
+  uint8_t    *gbest_mtf = NULL;
+
+  input_bytes = djw_count_freqs (real_freq, input);
+  input_bits  = input_bytes * 8;
+
+  XD3_ASSERT (input_bytes > 0);
+
+  if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes,
+					& groups, & sector_size)))
+    {
+      return ret;
+    }
+
+  if (0)
+    {
+    regroup:
+      /* Sometimes we dynamically decide there are too many groups.  Arrive
+       * here. */
+      output->next = initial_offset;
+      xd3_bit_state_encode_init (& bstate);
+    }
+
+  /* Encode: # of groups (3 bits) */
+  if ((ret = xd3_encode_bits (stream, & output, & bstate,
+			      DJW_GROUP_BITS, groups-1))) { goto failure; }
+
+  if (groups == 1)
+    {
+      /* Single Huffman group. */
+      usize_t    code[ALPHABET_SIZE]; /* Codes */
+      uint8_t    clen[ALPHABET_SIZE];
+      uint8_t    prefix_mtfsym[ALPHABET_SIZE];
+      djw_prefix prefix;
+
+      output_bits =
+	djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
+      djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
+
+      if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
+	{
+	  goto nosecond;
+	}
+
+      /* Encode: prefix */
+      prefix.mtfsym = prefix_mtfsym;
+      prefix.symbol = clen;
+      prefix.scount = ALPHABET_SIZE;
+
+      if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
+	{
+	  goto failure;
+	}
+
+      if (output_bits + (8 * output->next) + EFFICIENCY_BITS >=
+	  input_bits && ! cfg->inefficient)
+	{
+	  goto nosecond;
+	}
+
+      /* Encode: data */
+      for (in = input; in; in = in->next_page)
+	{
+	  const uint8_t *p     = in->base;
+	  const uint8_t *p_max = p + in->next;
+
+	  do
+	    {
+	      usize_t sym  = *p++;
+	      usize_t bits = clen[sym];
+
+	      IF_DEBUG (output_bits -= bits);
+
+	      if ((ret = xd3_encode_bits (stream, & output,
+					  & bstate, bits, code[sym])))
+		{
+		  goto failure;
+		}
+	    }
+	  while (p < p_max);
+	}
+
+      XD3_ASSERT (output_bits == 0);
+    }
+  else
+    {
+      /* DJW Huffman */
+      djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];
+      uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
+      djw_weight left = input_bytes;
+      usize_t gp;
+      usize_t niter = 0;
+      usize_t select_bits;
+      usize_t sym1 = 0, sym2 = 0, s;
+      usize_t gcost[DJW_MAX_GROUPS];
+      usize_t gbest_code[DJW_MAX_GROUPS+2];
+      uint8_t gbest_clen[DJW_MAX_GROUPS+2];
+      usize_t  gbest_max = 1 + (input_bytes - 1) / sector_size;
+      usize_t best_bits = 0;
+      usize_t  gbest_no;
+      usize_t  gpcnt;
+      const uint8_t *p;
+      IF_DEBUG2 (usize_t gcount[DJW_MAX_GROUPS]);
+
+      /* Encode: sector size (5 bits) */
+      if ((ret = xd3_encode_bits (stream, & output, & bstate,
+				  DJW_SECTORSZ_BITS,
+				  (sector_size/DJW_SECTORSZ_MULT)-1)))
+	{
+	  goto failure;
+	}
+
+      /* Dynamic allocation. */
+      if (gbest == NULL)
+	{
+	  if ((gbest = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL)
+	    {
+	      ret = ENOMEM;
+	      goto failure;
+	    }
+	}
+
+      if (gbest_mtf == NULL)
+	{
+	  if ((gbest_mtf = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL)
+	    {
+	      ret = ENOMEM;
+	      goto failure;
+	    }
+	}
+
+      /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
+
+      /* Generate initial code length tables. */
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  djw_weight sum  = 0;
+	  djw_weight goal = left / (groups - gp);
+
+	  IF_DEBUG2 (usize_t nz = 0);
+
+	  /* Due to the single-code granularity of this distribution, it may
+	   * be that we can't generate a distribution for each group.  In that
+	   * case subtract one group and try again.  If (inefficient), we're
+	   * testing group behavior, so don't mess things up. */
+	  if (goal == 0 && !cfg->inefficient)
+	    {
+	      IF_DEBUG2 (DP(RINT "too many groups (%"W"u), dropping one\n",
+			    groups));
+	      groups -= 1;
+	      goto regroup;
+	    }
+
+	  /* Sum == goal is possible when (cfg->inefficient)... */
+	  while (sum < goal)
+	    {
+	      XD3_ASSERT (sym2 < ALPHABET_SIZE);
+	      IF_DEBUG2 (nz += real_freq[sym2] != 0);
+	      sum += real_freq[sym2++];
+	    }
+
+	  IF_DEBUG2(DP(RINT "group %"W"u has symbols %"W"u..%"W"u (%"W"u non-zero) "
+		       "(%u/%"W"u = %.3f)\n",
+		       gp, sym1, sym2, nz, sum,
+		       input_bytes, sum / (double)input_bytes););
+
+	  for (s = 0; s < ALPHABET_SIZE; s += 1)
+	    {
+	      evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;
+	    }
+
+	  left -= sum;
+	  sym1  = sym2+1;
+	}
+
+    repeat:
+
+      niter += 1;
+      gbest_no = 0;
+      memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);
+      IF_DEBUG2 (memset (gcount, 0, sizeof (gcount[0]) * groups));
+
+      /* For each input page (loop is irregular to allow non-pow2-size group
+       * size. */
+      in = input;
+      p  = in->base;
+
+      /* For each group-size sector. */
+      do
+	{
+	  const uint8_t *p0  = p;
+	  xd3_output    *in0 = in;
+	  usize_t best   = 0;
+	  usize_t winner = 0;
+
+	  /* Select best group for each sector, update evolve_freq. */
+	  memset (gcost, 0, sizeof (gcost[0]) * groups);
+
+	  /* For each byte in sector. */
+	  for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	    {
+	      /* For each group. */
+	      for (gp = 0; gp < groups; gp += 1)
+		{
+		  gcost[gp] += evolve_clen[gp][*p];
+		}
+
+	      /* Check end-of-input-page. */
+#             define GP_PAGE()                \
+	      if ((usize_t)(++p - in->base) == in->next) \
+		{                             \
+		  in = in->next_page;         \
+		  if (in == NULL) { break; }  \
+		  p  = in->base;              \
+		}
+
+	      GP_PAGE ();
+	    }
+
+	  /* Find min cost group for this sector */
+	  best = USIZE_T_MAX;
+	  for (gp = 0; gp < groups; gp += 1)
+	    {
+	      if (gcost[gp] < best) 
+		{ 
+		  best = gcost[gp]; 
+		  winner = gp; 
+		}
+	    }
+
+	  XD3_ASSERT(gbest_no < gbest_max);
+	  gbest[gbest_no++] = winner;
+	  IF_DEBUG2 (gcount[winner] += 1);
+
+	  p  = p0;
+	  in = in0;
+
+	  /* Update group frequencies. */
+	  for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	    {
+	      evolve_freq[winner][*p] += 1;
+
+	      GP_PAGE ();
+	    }
+	}
+      while (in != NULL);
+
+      XD3_ASSERT (gbest_no == gbest_max);
+
+      /* Recompute code lengths. */
+      output_bits = 0;
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  int i;
+	  uint8_t evolve_zero[ALPHABET_SIZE];
+	  int any_zeros = 0;
+
+	  memset (evolve_zero, 0, sizeof (evolve_zero));
+
+	  /* Cannot allow a zero clen when the real frequency is non-zero.
+	   * Note: this means we are going to encode a fairly long code for
+	   * these unused entries.  An improvement would be to implement a
+	   * NOTUSED code for when these are actually zero, but this requires
+	   * another data structure (evolve_zero) since we don't know when
+	   * evolve_freq[i] == 0...  Briefly tested, looked worse. */
+	  for (i = 0; i < ALPHABET_SIZE; i += 1)
+	    {
+	      if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
+		{
+		  evolve_freq[gp][i] = 1;
+		  evolve_zero[i] = 1;
+		  any_zeros = 1;
+		}
+	    }
+
+	  output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp],
+					   ALPHABET_SIZE, DJW_MAX_CODELEN);
+
+	  /* The above faking of frequencies does not matter for the last
+	   * iteration, but we don't know when that is yet.  However, it also
+	   * breaks the output_bits computation.  Necessary for accuracy, and
+	   * for the (output_bits==0) assert after all bits are output. */
+	  if (any_zeros)
+	    {
+	      IF_DEBUG2 (usize_t save_total = output_bits);
+
+	      for (i = 0; i < ALPHABET_SIZE; i += 1)
+		{
+		  if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; }
+		}
+
+	      IF_DEBUG2 (DP(RINT "evolve_zero reduced %"W"u bits in group %"W"u\n",
+			    save_total - output_bits, gp));
+	    }
+	}
+
+      IF_DEBUG2(
+	DP(RINT "pass %"W"u total bits: %"W"u group uses: ", niter, output_bits);
+	for (gp = 0; gp < groups; gp += 1) { DP(RINT "%"W"u ", gcount[gp]); }
+	DP(RINT "\n");
+	);
+
+      /* End iteration. */
+
+      IF_DEBUG2 (if (niter > 1 && best_bits < output_bits) {
+	DP(RINT "iteration lost %"W"u bits\n", output_bits - best_bits); });
+
+      if (niter == 1 || (niter < DJW_MAX_ITER &&
+			 (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT))
+	{
+	  best_bits = output_bits;
+	  goto repeat;
+	}
+
+      /* Efficiency check. */
+      if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
+	{
+	  goto nosecond;
+	}
+
+      IF_DEBUG2 (DP(RINT "djw compression: %"W"u -> %0.3f\n",
+		    input_bytes, output_bits / 8.0));
+
+      /* Encode: prefix */
+      {
+	uint8_t     prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	uint8_t     prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	uint8_t     prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	djw_prefix prefix;
+
+	prefix.symbol = prefix_symbol;
+	prefix.mtfsym = prefix_mtfsym;
+	prefix.repcnt = prefix_repcnt;
+
+	djw_compute_multi_prefix (groups, evolve_clen, & prefix);
+	if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
+	  {
+	    goto failure;
+	  }
+      }
+
+      /* Encode: selector frequencies */
+      {
+	/* DJW_MAX_GROUPS +2 is for RUN_0, RUN_1 symbols. */
+	djw_weight gbest_freq[DJW_MAX_GROUPS+2];
+	djw_prefix gbest_prefix;
+	usize_t i;
+
+	gbest_prefix.scount = gbest_no;
+	gbest_prefix.symbol = gbest;
+	gbest_prefix.mtfsym = gbest_mtf;
+
+	djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);
+
+	select_bits =
+	  djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);
+	djw_build_codes  (gbest_code, gbest_clen, groups+1, DJW_MAX_GBCLEN);
+
+	for (i = 0; i < groups+1; i += 1)
+	  {
+	    if ((ret = xd3_encode_bits (stream, & output, & bstate,
+					DJW_GBCLEN_BITS, gbest_clen[i])))
+	      {
+		goto failure;
+	      }
+	  }
+
+	for (i = 0; i < gbest_prefix.mcount; i += 1)
+	  {
+	    usize_t gp_mtf      = gbest_mtf[i];
+	    usize_t gp_sel_bits = gbest_clen[gp_mtf];
+	    usize_t gp_sel_code = gbest_code[gp_mtf];
+
+	    XD3_ASSERT (gp_mtf < groups+1);
+
+	    if ((ret = xd3_encode_bits (stream, & output, & bstate,
+					gp_sel_bits, gp_sel_code)))
+	      {
+		goto failure;
+	      }
+
+	    IF_DEBUG (select_bits -= gp_sel_bits);
+	  }
+
+	XD3_ASSERT (select_bits == 0);
+      }
+
+      /* Efficiency check. */
+      if (output_bits + select_bits + (8 * output->next) +
+	  EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
+	{
+	  goto nosecond;
+	}
+
+      /* Encode: data */
+      {
+	usize_t evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];
+	usize_t sector = 0;
+
+	/* Build code tables for each group. */
+	for (gp = 0; gp < groups; gp += 1)
+	  {
+	    djw_build_codes (evolve_code[gp], evolve_clen[gp],
+			     ALPHABET_SIZE, DJW_MAX_CODELEN);
+	  }
+
+	/* Now loop over the input. */
+	in = input;
+	p  = in->base;
+
+	do
+	  {
+	    /* For each sector. */
+	    usize_t   gp_best  = gbest[sector];
+	    usize_t *gp_codes = evolve_code[gp_best];
+	    uint8_t *gp_clens = evolve_clen[gp_best];
+
+	    XD3_ASSERT (sector < gbest_no);
+
+	    sector += 1;
+
+	    /* Encode the sector data. */
+	    for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	      {
+		usize_t sym  = *p;
+		usize_t bits = gp_clens[sym];
+		usize_t code = gp_codes[sym];
+
+		IF_DEBUG (output_bits -= bits);
+
+		if ((ret = xd3_encode_bits (stream, & output, & bstate,
+					    bits, code)))
+		  {
+		    goto failure;
+		  }
+
+		GP_PAGE ();
+	      }
+	  }
+	while (in != NULL);
+
+	XD3_ASSERT (select_bits == 0);
+	XD3_ASSERT (output_bits == 0);
+      }
+    }
+
+  ret = xd3_flush_bits (stream, & output, & bstate);
+
+  if (0)
+    {
+    nosecond:
+      stream->msg = "secondary compression was inefficient";
+      ret = XD3_NOSECOND;
+    }
+
+ failure:
+
+  xd3_free (stream, gbest);
+  xd3_free (stream, gbest_mtf);
+  return ret;
+}
+#endif /* XD3_ENCODER */
+
+/*********************************************************************/
+/*                              DECODE                               */
+/*********************************************************************/
+
+static void
+djw_build_decoder (xd3_stream    *stream,
+		   usize_t        asize,
+		   usize_t        abs_max,
+		   const uint8_t *clen,
+		   uint8_t       *inorder,
+		   usize_t       *base,
+		   usize_t       *limit,
+		   usize_t       *min_clenp,
+		   usize_t       *max_clenp)
+{
+  usize_t i, l;
+  const uint8_t *ci;
+  usize_t nr_clen [DJW_TOTAL_CODES];
+  usize_t tmp_base[DJW_TOTAL_CODES];
+  usize_t min_clen;
+  usize_t max_clen;
+
+  /* Assumption: the two temporary arrays are large enough to hold abs_max. */
+  XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);
+
+  /* This looks something like the start of zlib's inftrees.c */
+  memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));
+
+  /* Count number of each code length */
+  i  = asize;
+  ci = clen;
+  do
+    {
+      /* Caller _must_ check that values are in-range.  Most of the time the
+       * caller decodes a specific number of bits, which imply the max value,
+       * and the other time the caller decodes a huffman value, which must be
+       * in-range.  Therefore, its an assertion and this function cannot
+       * otherwise fail. */
+      XD3_ASSERT (*ci <= abs_max);
+
+      nr_clen[*ci++]++;
+    }
+  while (--i != 0);
+
+  /* Compute min, max. */
+  for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } }
+  min_clen = i;
+  for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } }
+  max_clen = i;
+
+  /* Fill the BASE, LIMIT table. */
+  tmp_base[min_clen] = 0;
+  base[min_clen]     = 0;
+  limit[min_clen]    = nr_clen[min_clen] - 1;
+  for (i = min_clen + 1; i <= max_clen; i += 1)
+    {
+      usize_t last_limit = ((limit[i-1] + 1) << 1);
+      tmp_base[i] = tmp_base[i-1] + nr_clen[i-1];
+      limit[i]    = last_limit + nr_clen[i] - 1;
+      base[i]     = last_limit - tmp_base[i];
+    }
+
+  /* Fill the inorder array, canonically ordered codes. */
+  ci = clen;
+  for (i = 0; i < asize; i += 1)
+    {
+      if ((l = *ci++) != 0)
+	{
+	  inorder[tmp_base[l]++] = i;
+	}
+    }
+
+  *min_clenp = min_clen;
+  *max_clenp = max_clen;
+}
+
+static inline int
+djw_decode_symbol (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   const uint8_t  *inorder,
+		   const usize_t  *base,
+		   const usize_t  *limit,
+		   usize_t         min_clen,
+		   usize_t         max_clen,
+		   usize_t         *sym,
+		   usize_t          max_sym)
+{
+  usize_t code = 0;
+  usize_t bits = 0;
+
+  /* OPT: Supposedly a small lookup table improves speed here... */
+
+  /* Code outline is similar to xd3_decode_bits... */
+  if (bstate->cur_mask == 0x100) { goto next_byte; }
+
+  for (;;)
+    {
+      do
+	{
+	  if (bits == max_clen) { goto corrupt; }
+
+	  bits += 1;
+	  code  = (code << 1);
+
+	  if (bstate->cur_byte & bstate->cur_mask) { code |= 1; }
+
+	  bstate->cur_mask <<= 1;
+
+	  if (bits >= min_clen && code <= limit[bits]) { goto done; }
+	}
+      while (bstate->cur_mask != 0x100);
+
+    next_byte:
+
+      if (*input == input_end)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return XD3_INVALID_INPUT;
+	}
+
+      bstate->cur_byte = *(*input)++;
+      bstate->cur_mask = 1;
+    }
+
+ done:
+
+  if (base[bits] <= code)
+    {
+      usize_t offset = code - base[bits];
+
+      if (offset <= max_sym)
+	{
+	  IF_DEBUG2 (DP(RINT "(j) %"W"u ", code));
+	  *sym = inorder[offset];
+	  return 0;
+	}
+    }
+
+ corrupt:
+  stream->msg = "secondary decoder invalid code";
+  return XD3_INVALID_INPUT;
+}
+
+static int
+djw_decode_clclen (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   uint8_t        *cl_inorder,
+		   usize_t        *cl_base,
+		   usize_t        *cl_limit,
+		   usize_t        *cl_minlen,
+		   usize_t        *cl_maxlen,
+		   uint8_t        *cl_mtf)
+{
+  int ret;
+  uint8_t cl_clen[DJW_TOTAL_CODES];
+  usize_t num_codes, value;
+  usize_t i;
+
+  /* How many extra code lengths to encode. */
+  if ((ret = xd3_decode_bits (stream, bstate, input,
+			      input_end, DJW_EXTRA_CODE_BITS, & num_codes)))
+    {
+      return ret;
+    }
+
+  num_codes += DJW_EXTRA_12OFFSET;
+
+  /* Read num_codes. */
+  for (i = 0; i < num_codes; i += 1)
+    {
+      if ((ret = xd3_decode_bits (stream, bstate, input,
+				  input_end, DJW_CLCLEN_BITS, & value)))
+	{
+	  return ret;
+	}
+
+      cl_clen[i] = value;
+    }
+
+  /* Set the rest to zero. */
+  for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; }
+
+  /* No need to check for in-range clen values, because: */
+  XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1);
+
+  /* Build the code-length decoder. */
+  djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN,
+		     cl_clen, cl_inorder, cl_base,
+		     cl_limit, cl_minlen, cl_maxlen);
+
+  /* Initialize the MTF state. */
+  djw_init_clen_mtf_1_2 (cl_mtf);
+
+  return 0;
+}
+
+static inline int
+djw_decode_1_2 (xd3_stream     *stream,
+		bit_state      *bstate,
+		const uint8_t **input,
+		const uint8_t  *input_end,
+		const uint8_t  *inorder,
+		const usize_t  *base,
+		const usize_t  *limit,
+		const usize_t  *minlen,
+		const usize_t  *maxlen,
+		uint8_t        *mtfvals,
+		usize_t         elts,
+		usize_t         skip_offset,
+		uint8_t        *values)
+{
+  usize_t n = 0, rep = 0, mtf = 0, s = 0;
+  int ret;
+  
+  while (n < elts)
+    {
+      /* Special case inside generic code: CLEN only: If not the first group,
+       * we already know the zero frequencies. */
+      if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0)
+	{
+	  values[n++] = 0;
+	  continue;
+	}
+
+      /* Repeat last symbol. */
+      if (rep != 0)
+	{
+	  values[n++] = mtfvals[0];
+	  rep -= 1;
+	  continue;
+	}
+
+      /* Symbol following last repeat code. */
+      if (mtf != 0)
+	{
+	  usize_t sym = djw_update_mtf (mtfvals, mtf);
+	  values[n++] = sym;
+	  mtf = 0;
+	  continue;
+	}
+
+      /* Decode next symbol/repeat code. */
+      if ((ret = djw_decode_symbol (stream, bstate, input, input_end,
+				    inorder, base, limit, *minlen, *maxlen,
+				    & mtf, DJW_TOTAL_CODES))) { return ret; }
+
+      if (mtf <= RUN_1)
+	{
+	  /* Repetition. */
+	  rep = ((mtf + 1) << s);
+	  mtf = 0;
+	  s += 1;
+	}
+      else
+	{
+	  /* Remove the RUN_1 MTF offset. */
+	  mtf -= 1;
+	  s = 0;
+	}
+    }
+
+  /* If (rep != 0) there were too many codes received. */
+  if (rep != 0)
+    {
+      stream->msg = "secondary decoder invalid repeat code";
+      return XD3_INVALID_INPUT;
+    }
+  
+  return 0;
+}
+
+static inline int
+djw_decode_prefix (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   const uint8_t  *cl_inorder,
+		   const usize_t  *cl_base,
+		   const usize_t  *cl_limit,
+		   const usize_t  *cl_minlen,
+		   const usize_t  *cl_maxlen,
+		   uint8_t        *cl_mtf,
+		   usize_t         groups,
+		   uint8_t        *clen)
+{
+  return djw_decode_1_2 (stream, bstate, input, input_end,
+			 cl_inorder, cl_base, cl_limit,
+			 cl_minlen, cl_maxlen, cl_mtf,
+			 ALPHABET_SIZE * groups, ALPHABET_SIZE, clen);
+}
+
+static int
+xd3_decode_huff (xd3_stream     *stream,
+		 djw_stream    *h,
+		 const uint8_t **input_pos,
+		 const uint8_t  *const input_end,
+		 uint8_t       **output_pos,
+		 const uint8_t  *const output_end)
+{
+  const uint8_t *input = *input_pos;
+  uint8_t  *output = *output_pos;
+  bit_state bstate = BIT_STATE_DECODE_INIT;
+  uint8_t  *sel_group = NULL;
+  usize_t    groups, gp;
+  usize_t    output_bytes = (usize_t)(output_end - output);
+  usize_t    sector_size;
+  usize_t    sectors;
+  int ret;
+
+  /* Invalid input. */
+  if (output_bytes == 0)
+    {
+      stream->msg = "secondary decoder invalid input";
+      return XD3_INVALID_INPUT;
+    }
+
+  /* Decode: number of groups */
+  if ((ret = xd3_decode_bits (stream, & bstate, & input,
+			      input_end, DJW_GROUP_BITS, & groups)))
+    {
+      goto fail;
+    }
+
+  groups += 1;
+
+  if (groups > 1)
+    {
+      /* Decode: group size */
+      if ((ret = xd3_decode_bits (stream, & bstate, & input,
+				  input_end, DJW_SECTORSZ_BITS,
+				  & sector_size))) { goto fail; }
+      
+      sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT;
+    }
+  else
+    {
+      /* Default for groups == 1 */
+      sector_size = output_bytes;
+    }
+
+  sectors = 1 + (output_bytes - 1) / sector_size;
+
+  /* TODO: In the case of groups==1, lots of extra stack space gets used here.
+   * Could dynamically allocate this memory, which would help with excess
+   * parameter passing, too.  Passing too many parameters in this file,
+   * simplify it! */
+
+  /* Outer scope: per-group symbol decoder tables. */
+  {
+    uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE];
+    usize_t base   [DJW_MAX_GROUPS][DJW_TOTAL_CODES];
+    usize_t limit  [DJW_MAX_GROUPS][DJW_TOTAL_CODES];
+    usize_t minlen [DJW_MAX_GROUPS];
+    usize_t maxlen [DJW_MAX_GROUPS];
+
+    /* Nested scope: code length decoder tables. */
+    {
+      uint8_t clen      [DJW_MAX_GROUPS][ALPHABET_SIZE];
+      uint8_t cl_inorder[DJW_TOTAL_CODES];
+      usize_t cl_base   [DJW_MAX_CLCLEN+2];
+      usize_t cl_limit  [DJW_MAX_CLCLEN+2];
+      uint8_t cl_mtf    [DJW_TOTAL_CODES];
+      usize_t cl_minlen;
+      usize_t cl_maxlen;
+
+      /* Compute the code length decoder. */
+      if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end,
+				    cl_inorder, cl_base, cl_limit, & cl_minlen,
+				    & cl_maxlen, cl_mtf))) { goto fail; }
+
+      /* Now decode each group decoder. */
+      if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end,
+				    cl_inorder, cl_base, cl_limit,
+				    & cl_minlen, & cl_maxlen, cl_mtf,
+				    groups, clen[0]))) { goto fail; }
+
+      /* Prepare the actual decoding tables. */
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN,
+			     clen[gp], inorder[gp], base[gp], limit[gp],
+			     & minlen[gp], & maxlen[gp]);
+	}
+    }
+
+    /* Decode: selector clens. */
+    {
+      uint8_t sel_inorder[DJW_MAX_GROUPS+2];
+      usize_t sel_base   [DJW_MAX_GBCLEN+2];
+      usize_t sel_limit  [DJW_MAX_GBCLEN+2];
+      uint8_t sel_mtf    [DJW_MAX_GROUPS+2];
+      usize_t sel_minlen;
+      usize_t sel_maxlen;
+
+      /* Setup group selection. */
+      if (groups > 1)
+	{
+	  uint8_t sel_clen[DJW_MAX_GROUPS+1];
+
+	  for (gp = 0; gp < groups+1; gp += 1)
+	    {
+	      usize_t value;
+
+	      if ((ret = xd3_decode_bits (stream, & bstate, & input,
+					  input_end, DJW_GBCLEN_BITS,
+					  & value))) { goto fail; }
+
+	      sel_clen[gp] = value;
+	      sel_mtf[gp]  = gp;
+	    }
+
+	  if ((sel_group = (uint8_t*) xd3_alloc (stream, sectors, 1)) == NULL)
+	    {
+	      ret = ENOMEM;
+	      goto fail;
+	    }
+
+	  djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen,
+			     sel_inorder, sel_base, sel_limit,
+			     & sel_minlen, & sel_maxlen);
+
+	  if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end,
+				     sel_inorder, sel_base,
+				     sel_limit, & sel_minlen,
+				     & sel_maxlen, sel_mtf,
+				     sectors, 0, sel_group))) { goto fail; }
+	}
+
+      /* Now decode each sector. */
+      {
+	/* Initialize for (groups==1) case. */
+	uint8_t *gp_inorder = inorder[0]; 
+	usize_t *gp_base    = base[0];
+	usize_t *gp_limit   = limit[0];
+	usize_t  gp_minlen  = minlen[0];
+	usize_t  gp_maxlen  = maxlen[0];
+	usize_t c;
+
+	for (c = 0; c < sectors; c += 1)
+	  {
+	    usize_t n;
+
+	    if (groups >= 2)
+	      {
+		gp = sel_group[c];
+
+		XD3_ASSERT (gp < groups);
+
+		gp_inorder = inorder[gp];
+		gp_base    = base[gp];
+		gp_limit   = limit[gp];
+		gp_minlen  = minlen[gp];
+		gp_maxlen  = maxlen[gp];
+	      }
+
+	    if (output_end < output)
+	      {
+		stream->msg = "secondary decoder invalid input";
+		return XD3_INVALID_INPUT;
+	      }
+	    
+	    /* Decode next sector. */
+	    n = xd3_min (sector_size, (usize_t) (output_end - output));
+
+	    do
+	      {
+		usize_t sym;
+
+		if ((ret = djw_decode_symbol (stream, & bstate,
+					      & input, input_end,
+					      gp_inorder, gp_base,
+					      gp_limit, gp_minlen, gp_maxlen,
+					      & sym, ALPHABET_SIZE)))
+		  {
+		    goto fail;
+		  }
+
+		*output++ = sym;
+	      }
+	    while (--n);
+	  }
+      }
+    }
+  }
+
+  IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate)))
+		   { goto fail; });
+  XD3_ASSERT (ret == 0);
+
+ fail:
+  xd3_free (stream, sel_group);
+
+  (*input_pos) = input;
+  (*output_pos) = output;
+  return ret;
+}
+
+#endif
diff --git a/third-party/xdelta3/xdelta3/xdelta3-fgk.h b/third-party/xdelta3/xdelta3/xdelta3-fgk.h
new file mode 100644
index 0000000000..f880ad9489
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-fgk.h
@@ -0,0 +1,857 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   For demonstration purposes only.
+ */
+
+#ifndef _XDELTA3_FGK_h_
+#define _XDELTA3_FGK_h_
+
+/* An implementation of the FGK algorithm described by D.E. Knuth in
+ * "Dynamic Huffman Coding" in Journal of Algorithms 6. */
+
+/* A 32bit counter (fgk_weight) is used as the frequency counter for
+ * nodes in the huffman tree.  TODO: Need oto test for overflow and/or
+ * reset stats. */
+
+typedef struct _fgk_stream fgk_stream;
+typedef struct _fgk_node   fgk_node;
+typedef struct _fgk_block  fgk_block;
+typedef unsigned int       fgk_bit;
+typedef uint32_t           fgk_weight;
+
+struct _fgk_block {
+  union {
+    fgk_node  *un_leader;
+    fgk_block *un_freeptr;
+  } un;
+};
+
+#define block_leader  un.un_leader
+#define block_freeptr un.un_freeptr
+
+/* The code can also support fixed huffman encoding/decoding. */
+#define IS_ADAPTIVE 1
+
+/* weight is a count of the number of times this element has been seen
+ * in the current encoding/decoding.  parent, right_child, and
+ * left_child are pointers defining the tree structure.  right and
+ * left point to neighbors in an ordered sequence of weights.  The
+ * left child of a node is always guaranteed to have weight not
+ * greater than its sibling.  fgk_blockLeader points to the element
+ * with the same weight as itself which is closest to the next
+ * increasing weight block.  */
+struct _fgk_node
+{
+  fgk_weight  weight;
+  fgk_node   *parent;
+  fgk_node   *left_child;
+  fgk_node   *right_child;
+  fgk_node   *left;
+  fgk_node   *right;
+  fgk_block  *my_block;
+};
+
+/* alphabet_size is the a count of the number of possible leaves in
+ * the huffman tree.  The number of total nodes counting internal
+ * nodes is ((2 * alphabet_size) - 1).  zero_freq_count is the number
+ * of elements remaining which have zero frequency.  zero_freq_exp and
+ * zero_freq_rem satisfy the equation zero_freq_count =
+ * 2^zero_freq_exp + zero_freq_rem.  root_node is the root of the
+ * tree, which is initialized to a node with zero frequency and
+ * contains the 0th such element.  free_node contains a pointer to the
+ * next available fgk_node space.  alphabet contains all the elements
+ * and is indexed by N.  remaining_zeros points to the head of the
+ * list of zeros.  */
+struct _fgk_stream
+{
+  usize_t alphabet_size;
+  usize_t zero_freq_count;
+  usize_t zero_freq_exp;
+  usize_t zero_freq_rem;
+  usize_t coded_depth;
+
+  usize_t total_nodes;
+  usize_t total_blocks;
+
+  fgk_bit *coded_bits;
+
+  fgk_block *block_array;
+  fgk_block *free_block;
+
+  fgk_node *decode_ptr;
+  fgk_node *remaining_zeros;
+  fgk_node *alphabet;
+  fgk_node *root_node;
+  fgk_node *free_node;
+};
+
+/*********************************************************************/
+/*                             Encoder                               */
+/*********************************************************************/
+
+static fgk_stream*     fgk_alloc           (xd3_stream *stream /*, usize_t alphabet_size */);
+static int             fgk_init            (xd3_stream *stream,
+					    fgk_stream *h, 
+					    int is_encode);
+static usize_t         fgk_encode_data     (fgk_stream *h,
+					    usize_t    n);
+static inline fgk_bit  fgk_get_encoded_bit (fgk_stream *h);
+
+static int             xd3_encode_fgk      (xd3_stream  *stream,
+					    fgk_stream  *sec_stream,
+					    xd3_output  *input,
+					    xd3_output  *output,
+					    xd3_sec_cfg *cfg);
+
+/*********************************************************************/
+/* 			       Decoder                               */
+/*********************************************************************/
+
+static inline int      fgk_decode_bit      (fgk_stream *h,
+					    fgk_bit     b);
+static usize_t         fgk_decode_data     (fgk_stream *h);
+static void            fgk_destroy         (xd3_stream *stream,
+					    fgk_stream *h);
+
+static int             xd3_decode_fgk      (xd3_stream     *stream,
+					    fgk_stream     *sec_stream,
+					    const uint8_t **input,
+					    const uint8_t  *const input_end,
+					    uint8_t       **output,
+					    const uint8_t  *const output_end);
+
+/*********************************************************************/
+/* 			       Private                               */
+/*********************************************************************/
+
+static unsigned int fgk_find_nth_zero        (fgk_stream *h, usize_t n);
+static usize_t      fgk_nth_zero             (fgk_stream *h, usize_t n);
+static void         fgk_update_tree          (fgk_stream *h, usize_t n);
+static fgk_node*    fgk_increase_zero_weight (fgk_stream *h, usize_t n);
+static void         fgk_eliminate_zero       (fgk_stream* h, fgk_node *node);
+static void         fgk_move_right           (fgk_stream *h, fgk_node *node);
+static void         fgk_promote              (fgk_stream *h, fgk_node *node);
+static void         fgk_init_node            (fgk_node *node, usize_t i, usize_t size);
+static fgk_block*   fgk_make_block           (fgk_stream *h, fgk_node *l);
+static void         fgk_free_block           (fgk_stream *h, fgk_block *b);
+static void         fgk_factor_remaining     (fgk_stream *h);
+static inline void  fgk_swap_ptrs            (fgk_node **one, fgk_node **two);
+
+/*********************************************************************/
+/* 			    Basic Routines                           */
+/*********************************************************************/
+
+/* returns an initialized huffman encoder for an alphabet with the
+ * given size.  returns NULL if enough memory cannot be allocated */
+static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */)
+{
+  usize_t alphabet_size0 = ALPHABET_SIZE;
+  fgk_stream *h;
+
+  if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL)
+    {
+      return NULL;
+    }
+
+  h->total_nodes  = (2 * alphabet_size0) - 1;
+  h->total_blocks = (2 * h->total_nodes);
+  h->alphabet     = (fgk_node*)  xd3_alloc (stream, h->total_nodes,    sizeof (fgk_node));
+  h->block_array  = (fgk_block*) xd3_alloc (stream, h->total_blocks,   sizeof (fgk_block));
+  h->coded_bits   = (fgk_bit*)   xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit));
+
+  if (h->coded_bits  == NULL ||
+      h->alphabet    == NULL ||
+      h->block_array == NULL)
+    {
+      fgk_destroy (stream, h);
+      return NULL;
+    }
+
+  h->alphabet_size   = alphabet_size0;
+
+  return h;
+}
+
+static int fgk_init (xd3_stream *stream, fgk_stream *h, int is_encode)
+{
+  usize_t ui;
+  ssize_t si;
+
+  h->root_node       = h->alphabet;
+  h->decode_ptr      = h->root_node;
+  h->free_node       = h->alphabet + h->alphabet_size;
+  h->remaining_zeros = h->alphabet;
+  h->coded_depth     = 0;
+  h->zero_freq_count = h->alphabet_size + 2;
+
+  /* after two calls to factor_remaining, zero_freq_count == alphabet_size */
+  fgk_factor_remaining(h); /* set ZFE and ZFR */
+  fgk_factor_remaining(h); /* set ZFDB according to prev state */
+
+  IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes));
+
+  for (ui = 0; ui < h->total_blocks-1; ui += 1)
+    {
+      h->block_array[ui].block_freeptr = &h->block_array[ui + 1];
+    }
+
+  h->block_array[h->total_blocks - 1].block_freeptr = NULL;
+  h->free_block = h->block_array;
+
+  /* Zero frequency nodes are inserted in the first alphabet_size
+   * positions, with Value, weight, and a pointer to the next zero
+   * frequency node.  */
+  for (si = h->alphabet_size - 1; si >= 0; si -= 1)
+    {
+      fgk_init_node (h->alphabet + si, (usize_t) si, h->alphabet_size);
+    }
+
+  return 0;
+}
+
+static void fgk_swap_ptrs(fgk_node **one, fgk_node **two)
+{
+  fgk_node *tmp = *one;
+  *one = *two;
+  *two = tmp;
+}
+
+/* Takes huffman transmitter h and n, the nth elt in the alphabet, and
+ * returns the number of required to encode n. */
+static usize_t fgk_encode_data (fgk_stream* h, usize_t n)
+{
+  fgk_node *target_ptr = h->alphabet + n;
+
+  XD3_ASSERT (n < h->alphabet_size);
+
+  h->coded_depth = 0;
+
+  /* First encode the binary representation of the nth remaining
+   * zero frequency element in reverse such that bit, which will be
+   * encoded from h->coded_depth down to 0 will arrive in increasing
+   * order following the tree path.  If there is only one left, it
+   * is not neccesary to encode these bits. */
+  if (IS_ADAPTIVE && target_ptr->weight == 0)
+    {
+      usize_t where, shift;
+      usize_t bits;
+
+      where = fgk_find_nth_zero(h, n);
+      shift = 1;
+
+      if (h->zero_freq_rem == 0)
+	{
+	  bits = h->zero_freq_exp;
+	}
+      else
+	{
+	  bits = h->zero_freq_exp + 1;
+	}
+
+      while (bits > 0)
+	{
+	  h->coded_bits[h->coded_depth++] = (shift & where) && 1;
+
+	  bits   -= 1;
+	  shift <<= 1;
+	};
+
+      target_ptr = h->remaining_zeros;
+    }
+
+  /* The path from root to node is filled into coded_bits in reverse so
+   * that it is encoded in the right order */
+  while (target_ptr != h->root_node)
+    {
+      h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr);
+
+      target_ptr = target_ptr->parent;
+    }
+
+  if (IS_ADAPTIVE)
+    {
+      fgk_update_tree(h, n);
+    }
+
+  return h->coded_depth;
+}
+
+/* Should be called as many times as fgk_encode_data returns.
+ */
+static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h)
+{
+  XD3_ASSERT (h->coded_depth > 0);
+
+  return h->coded_bits[--h->coded_depth];
+}
+
+/* This procedure updates the tree after alphabet[n] has been encoded
+ * or decoded.
+ */
+static void fgk_update_tree (fgk_stream *h, usize_t n)
+{
+  fgk_node *incr_node;
+
+  if (h->alphabet[n].weight == 0)
+    {
+      incr_node = fgk_increase_zero_weight (h, n);
+    }
+  else
+    {
+      incr_node = h->alphabet + n;
+    }
+
+  while (incr_node != h->root_node)
+    {
+      fgk_move_right (h, incr_node);
+      fgk_promote    (h, incr_node);
+      incr_node->weight += 1;   /* incr the parent */
+      incr_node = incr_node->parent; /* repeat */
+    }
+
+  h->root_node->weight += 1;
+}
+
+static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd)
+{
+  fgk_node **fwd_par_ptr, **back_par_ptr;
+  fgk_node *move_back, *tmp;
+
+  move_back = move_fwd->my_block->block_leader;
+
+  if (move_fwd         == move_back ||
+      move_fwd->parent == move_back ||
+      move_fwd->weight == 0)
+    {
+      return;
+    }
+
+  move_back->right->left = move_fwd;
+
+  if (move_fwd->left)
+    {
+      move_fwd->left->right = move_back;
+    }
+
+  tmp = move_fwd->right;
+  move_fwd->right = move_back->right;
+
+  if (tmp == move_back)
+    {
+      move_back->right = move_fwd;
+    }
+  else
+    {
+      tmp->left = move_back;
+      move_back->right = tmp;
+    }
+
+  tmp = move_back->left;
+  move_back->left = move_fwd->left;
+
+  if (tmp == move_fwd)
+    {
+      move_fwd->left = move_back;
+    }
+  else
+    {
+      tmp->right = move_fwd;
+      move_fwd->left = tmp;
+    }
+
+  if (move_fwd->parent->right_child == move_fwd)
+    {
+      fwd_par_ptr = &move_fwd->parent->right_child;
+    }
+  else
+    {
+      fwd_par_ptr = &move_fwd->parent->left_child;
+    }
+
+  if (move_back->parent->right_child == move_back)
+    {
+      back_par_ptr = &move_back->parent->right_child;
+    }
+  else
+    {
+      back_par_ptr = &move_back->parent->left_child;
+    }
+
+  fgk_swap_ptrs (&move_fwd->parent, &move_back->parent);
+  fgk_swap_ptrs (fwd_par_ptr, back_par_ptr);
+
+  move_fwd->my_block->block_leader = move_fwd;
+}
+
+/* Shifts node, the leader of its block, into the next block. */
+static void fgk_promote (fgk_stream *h, fgk_node *node)
+{
+  fgk_node *my_left, *my_right;
+  fgk_block *cur_block;
+
+  my_right  = node->right;
+  my_left   = node->left;
+  cur_block = node->my_block;
+
+  if (node->weight == 0)
+    {
+      return;
+    }
+
+  /* if left is right child, parent of remaining zeros case (?), means parent
+   * has same weight as right child. */
+  if (my_left == node->right_child &&
+      node->left_child &&
+      node->left_child->weight == 0)
+    {
+      XD3_ASSERT (node->left_child == h->remaining_zeros);
+      XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */
+      
+      if (node->weight == (my_right->weight - 1) && my_right != h->root_node)
+	{
+	  fgk_free_block (h, cur_block);
+	  node->my_block    = my_right->my_block;
+	  my_left->my_block = my_right->my_block;
+	}
+
+      return;
+    }
+
+  if (my_left == h->remaining_zeros)
+    {
+      return;
+    }
+
+  /* true if not the leftmost node */
+  if (my_left->my_block == cur_block)
+    {
+      my_left->my_block->block_leader = my_left;
+    }
+  else
+    {
+      fgk_free_block (h, cur_block);
+    }
+
+  /* node->parent != my_right */
+  if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node))
+    {
+      node->my_block = my_right->my_block;
+    }
+  else
+    {
+      node->my_block = fgk_make_block (h, node);
+    }
+}
+
+/* When an element is seen the first time this is called to remove it from the list of
+ * zero weight elements and introduce a new internal node to the tree.  */
+static fgk_node* fgk_increase_zero_weight (fgk_stream *h, usize_t n)
+{
+  fgk_node *this_zero, *new_internal, *zero_ptr;
+
+  this_zero = h->alphabet + n;
+
+  if (h->zero_freq_count == 1)
+    {
+      /* this is the last one */
+      this_zero->right_child = NULL;
+
+      if (this_zero->right->weight == 1)
+	{
+	  this_zero->my_block = this_zero->right->my_block;
+	}
+      else
+	{
+	  this_zero->my_block = fgk_make_block (h, this_zero);
+	}
+
+      h->remaining_zeros = NULL;
+
+      return this_zero;
+    }
+
+  zero_ptr = h->remaining_zeros;
+
+  new_internal = h->free_node++;
+
+  new_internal->parent      = zero_ptr->parent;
+  new_internal->right       = zero_ptr->right;
+  new_internal->weight      = 0;
+  new_internal->right_child = this_zero;
+  new_internal->left        = this_zero;
+
+  if (h->remaining_zeros == h->root_node)
+    {
+      /* This is the first element to be coded */
+      h->root_node           = new_internal;
+      this_zero->my_block    = fgk_make_block (h, this_zero);
+      new_internal->my_block = fgk_make_block (h, new_internal);
+    }
+  else
+    {
+      new_internal->right->left = new_internal;
+
+      if (zero_ptr->parent->right_child == zero_ptr)
+	{
+	  zero_ptr->parent->right_child = new_internal;
+	}
+      else
+	{
+	  zero_ptr->parent->left_child = new_internal;
+	}
+
+      if (new_internal->right->weight == 1)
+	{
+	  new_internal->my_block = new_internal->right->my_block;
+	}
+      else
+	{
+	  new_internal->my_block = fgk_make_block (h, new_internal);
+	}
+
+      this_zero->my_block = new_internal->my_block;
+    }
+
+  fgk_eliminate_zero (h, this_zero);
+
+  new_internal->left_child = h->remaining_zeros;
+
+  this_zero->right       = new_internal;
+  this_zero->left        = h->remaining_zeros;
+  this_zero->parent      = new_internal;
+  this_zero->left_child  = NULL;
+  this_zero->right_child = NULL;
+
+  h->remaining_zeros->parent = new_internal;
+  h->remaining_zeros->right  = this_zero;
+
+  return this_zero;
+}
+
+/* When a zero frequency element is encoded, it is followed by the
+ * binary representation of the index into the remaining elements.
+ * Sets a cache to the element before it so that it can be removed
+ * without calling this procedure again.  */
+static unsigned int fgk_find_nth_zero (fgk_stream* h, usize_t n)
+{
+  fgk_node *target_ptr = h->alphabet + n;
+  fgk_node *head_ptr = h->remaining_zeros;
+  unsigned int idx = 0;
+
+  while (target_ptr != head_ptr)
+    {
+      head_ptr = head_ptr->right_child;
+      idx += 1;
+    }
+
+  return idx;
+}
+
+/* Splices node out of the list of zeros. */
+static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node)
+{
+  if (h->zero_freq_count == 1)
+    {
+      return;
+    }
+
+  fgk_factor_remaining(h);
+
+  if (node->left_child == NULL)
+    {
+      h->remaining_zeros = h->remaining_zeros->right_child;
+      h->remaining_zeros->left_child = NULL;
+    }
+  else if (node->right_child == NULL)
+    {
+      node->left_child->right_child = NULL;
+    }
+  else
+    {
+      node->right_child->left_child = node->left_child;
+      node->left_child->right_child = node->right_child;
+    }
+}
+
+static void fgk_init_node (fgk_node *node, usize_t i, usize_t size)
+{
+  if (i < size - 1)
+    {
+      node->right_child = node + 1;
+    }
+  else
+    {
+      node->right_child = NULL;
+    }
+
+  if (i >= 1)
+    {
+      node->left_child = node - 1;
+    }
+  else
+    {
+      node->left_child = NULL;
+    }
+
+  node->weight      = 0;
+  node->parent      = NULL;
+  node->right = NULL;
+  node->left  = NULL;
+  node->my_block    = NULL;
+}
+
+/* The data structure used is an array of blocks, which are unions of
+ * free pointers and huffnode pointers.  free blocks are a linked list
+ * of free blocks, the front of which is h->free_block.  The used
+ * blocks are pointers to the head of each block.  */
+static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead)
+{
+  fgk_block *ret = h->free_block;
+
+  XD3_ASSERT (h->free_block != NULL);
+
+  h->free_block = h->free_block->block_freeptr;
+
+  ret->block_leader = lead;
+
+  return ret;
+}
+
+/* Restores the block to the front of the free list. */
+static void fgk_free_block (fgk_stream *h, fgk_block *b)
+{
+  b->block_freeptr = h->free_block;
+  h->free_block = b;
+}
+
+/* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity
+ * the equation given above.  */
+static void fgk_factor_remaining (fgk_stream *h)
+{
+  unsigned int i;
+
+  i = (--h->zero_freq_count);
+  h->zero_freq_exp = 0;
+
+  while (i > 1)
+    {
+      h->zero_freq_exp += 1;
+      i >>= 1;
+    }
+
+  i = 1 << h->zero_freq_exp;
+
+  h->zero_freq_rem = h->zero_freq_count - i;
+}
+
+/* receives a bit at a time and returns true when a complete code has
+ * been received.
+ */
+static inline int fgk_decode_bit (fgk_stream* h, fgk_bit b)
+{
+  XD3_ASSERT (b == 1 || b == 0);
+
+  if (IS_ADAPTIVE && h->decode_ptr->weight == 0)
+    {
+      usize_t bitsreq;
+
+      if (h->zero_freq_rem == 0)
+	{
+	  bitsreq = h->zero_freq_exp;
+	}
+      else
+	{
+	  bitsreq = h->zero_freq_exp + 1;
+	}
+
+      h->coded_bits[h->coded_depth] = b;
+      h->coded_depth += 1;
+
+      return h->coded_depth >= bitsreq;
+    }
+  else
+    {
+      if (b)
+	{
+	  h->decode_ptr = h->decode_ptr->right_child;
+	}
+      else
+	{
+	  h->decode_ptr = h->decode_ptr->left_child;
+	}
+
+      if (h->decode_ptr->left_child == NULL)
+	{
+	  /* If the weight is non-zero, finished. */
+	  if (h->decode_ptr->weight != 0)
+	    {
+	      return 1;
+	    }
+
+	  /* zero_freq_count is dropping to 0, finished. */
+	  return h->zero_freq_count == 1;
+	}
+      else
+	{
+	  return 0;
+	}
+    }
+}
+
+static usize_t fgk_nth_zero (fgk_stream* h, usize_t n)
+{
+  fgk_node *ret = h->remaining_zeros;
+
+  /* ERROR: if during this loop (ret->right_child == NULL) then the
+   * encoder's zero count is too high.  Could return an error code
+   * now, but is probably unnecessary overhead, since the caller
+   * should check integrity anyway. */
+  for (; n != 0 && ret->right_child != NULL; n -= 1)
+    {
+      ret = ret->right_child;
+    }
+
+  return (usize_t)(ret - h->alphabet);
+}
+
+/* once fgk_decode_bit returns 1, this retrieves an index into the
+ * alphabet otherwise this returns 0, indicating more bits are
+ * required.
+ */
+static usize_t fgk_decode_data (fgk_stream* h)
+{
+  usize_t elt = (usize_t)(h->decode_ptr - h->alphabet);
+
+  if (IS_ADAPTIVE && h->decode_ptr->weight == 0) {
+    usize_t i = 0;
+    usize_t n = 0;
+
+    if (h->coded_depth > 0) 
+      {
+	for (; i < h->coded_depth - 1; i += 1)
+	  {
+	    n |= h->coded_bits[i];
+	    n <<= 1;
+	  }
+      }
+
+    n |= h->coded_bits[i];
+    elt = fgk_nth_zero(h, n);
+  }
+
+  h->coded_depth = 0;
+
+  if (IS_ADAPTIVE)
+    {
+      fgk_update_tree(h, elt);
+    }
+
+  h->decode_ptr = h->root_node;
+
+  return elt;
+}
+
+static void fgk_destroy (xd3_stream *stream,
+			 fgk_stream *h)
+{
+  if (h != NULL)
+    {
+      xd3_free (stream, h->alphabet);
+      xd3_free (stream, h->coded_bits);
+      xd3_free (stream, h->block_array);
+      xd3_free (stream, h);
+    }
+}
+
+/*********************************************************************/
+/* 			       Xdelta                                */
+/*********************************************************************/
+
+static int
+xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg)
+{
+  bit_state   bstate = BIT_STATE_ENCODE_INIT;
+  xd3_output *cur_page;
+  int ret;
+
+  /* OPT: quit compression early if it looks bad */
+  for (cur_page = input; cur_page; cur_page = cur_page->next_page)
+    {
+      const uint8_t *inp     = cur_page->base;
+      const uint8_t *inp_max = inp + cur_page->next;
+
+      while (inp < inp_max)
+	{
+	  usize_t bits = fgk_encode_data (sec_stream, *inp++);
+
+	  while (bits--)
+	    {
+	      if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; }
+	    }
+	}
+    }
+
+  return xd3_flush_bits (stream, & output, & bstate);
+}
+
+static int
+xd3_decode_fgk (xd3_stream     *stream,
+		fgk_stream     *sec_stream,
+		const uint8_t **input_pos,
+		const uint8_t  *const input_max,
+		uint8_t       **output_pos,
+		const uint8_t  *const output_max)
+{
+  bit_state bstate;
+  uint8_t *output = *output_pos;
+  const uint8_t *input = *input_pos;
+
+  for (;;)
+    {
+      if (input == input_max)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return XD3_INTERNAL;
+	}
+
+      bstate.cur_byte = *input++;
+
+      for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1)
+	{
+	  int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) ? 1U : 0U);
+
+	  if (! done) { continue; }
+
+	  *output++ = fgk_decode_data (sec_stream);
+
+	  if (output == output_max)
+	    {
+	      /* During regression testing: */
+	      IF_REGRESSION ({
+		int ret;
+		bstate.cur_mask <<= 1;
+		if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; }
+	      });
+
+	      (*output_pos) = output;
+	      (*input_pos) = input;
+	      return 0;
+	    }
+	}
+    }
+}
+
+#endif /* _XDELTA3_FGK_ */
diff --git a/third-party/xdelta3/xdelta3/xdelta3-hash.h b/third-party/xdelta3/xdelta3/xdelta3-hash.h
new file mode 100644
index 0000000000..9238ecd567
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-hash.h
@@ -0,0 +1,159 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#ifndef _XDELTA3_HASH_H_
+#define _XDELTA3_HASH_H_
+
+#include "xdelta3-internal.h"
+
+#if XD3_DEBUG
+#define SMALL_HASH_DEBUG1(s,inp)                                  \
+  uint32_t debug_state;                                           \
+  uint32_t debug_hval = xd3_checksum_hash (& (s)->small_hash,     \
+              xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look))
+#define SMALL_HASH_DEBUG2(s,inp)                                  \
+  XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \
+              xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look)))
+#else
+#define SMALL_HASH_DEBUG1(s,inp)
+#define SMALL_HASH_DEBUG2(s,inp)
+#endif /* XD3_DEBUG */
+
+#if UNALIGNED_OK
+#define UNALIGNED_READ32(dest,src) (*(dest)) = (*(uint32_t*)(src))
+#else
+#define UNALIGNED_READ32(dest,src) memcpy((dest), (src), 4);
+#endif
+
+/* These are good hash multipliers for 32-bit and 64-bit LCGs: see
+ * "linear congruential generators of different sizes and good lattice
+ * structure" */
+#define xd3_hash_multiplier32 1597334677U
+#define xd3_hash_multiplier64 1181783497276652981ULL
+
+/* TODO: small cksum is hard-coded for 4 bytes (i.e., "look" is unused) */
+static inline uint32_t
+xd3_scksum (uint32_t *state,
+            const uint8_t *base,
+            const usize_t look)
+{
+  UNALIGNED_READ32(state, base);
+  return (*state) * xd3_hash_multiplier32;
+}
+static inline uint32_t
+xd3_small_cksum_update (uint32_t *state,
+			const uint8_t *base,
+			usize_t look)
+{
+  UNALIGNED_READ32(state, base+1);
+  return (*state) * xd3_hash_multiplier32;
+}
+
+#if XD3_ENCODER
+inline usize_t
+xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum)
+{
+  return (cksum >> cfg->shift) ^ (cksum & cfg->mask);
+}
+
+#if SIZEOF_USIZE_T == 4
+inline uint32_t
+xd3_large32_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look)
+{
+  uint32_t h = 0;
+  for (usize_t i = 0; i < look; i++) {
+    h += base[i] * cfg->powers[i];
+  }
+  return h;
+}
+
+inline uint32_t
+xd3_large32_cksum_update (xd3_hash_cfg *cfg, const uint32_t cksum,
+			  const uint8_t *base, const usize_t look)
+{
+  return xd3_hash_multiplier32 * cksum - cfg->multiplier * base[0] + base[look];
+}
+#endif
+
+#if SIZEOF_USIZE_T == 8
+inline uint64_t
+xd3_large64_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look)
+{
+  uint64_t h = 0;
+  for (usize_t i = 0; i < look; i++) {
+    h += base[i] * cfg->powers[i];
+  }
+  return h;
+}
+
+inline uint64_t
+xd3_large64_cksum_update (xd3_hash_cfg *cfg, const uint64_t cksum,
+			  const uint8_t *base, const usize_t look)
+{
+  return xd3_hash_multiplier64 * cksum - cfg->multiplier * base[0] + base[look];
+}
+#endif
+
+static usize_t
+xd3_size_hashtable_bits (usize_t slots)
+{
+  usize_t bits = (SIZEOF_USIZE_T * 8) - 1;
+  usize_t i;
+
+  for (i = 3; i <= bits; i += 1)
+    {
+      if (slots < (1U << i))
+	{
+	  /* Note: this is the compaction=1 setting measured in
+	   * checksum_test */
+	  bits = i - 1;
+	  break;
+	}
+    }
+
+  return bits;
+}
+
+int
+xd3_size_hashtable (xd3_stream   *stream,
+		    usize_t       slots,
+		    usize_t       look,
+		    xd3_hash_cfg *cfg)
+{
+  usize_t bits = xd3_size_hashtable_bits (slots);
+
+  cfg->size  = (1U << bits);
+  cfg->mask  = (cfg->size - 1);
+  cfg->shift = (SIZEOF_USIZE_T * 8) - bits;
+  cfg->look  = look;
+
+  if ((cfg->powers = 
+       (usize_t*) xd3_alloc0 (stream, look, sizeof (usize_t))) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  cfg->powers[look-1] = 1;
+  for (int i = look-2; i >= 0; i--)
+    {
+      cfg->powers[i] = cfg->powers[i+1] * xd3_hash_multiplier;
+    }
+  cfg->multiplier = cfg->powers[0] * xd3_hash_multiplier;
+
+  return 0;
+}
+
+#endif /* XD3_ENCODER */
+#endif /* _XDELTA3_HASH_H_ */
diff --git a/third-party/xdelta3/xdelta3/xdelta3-internal.h b/third-party/xdelta3/xdelta3/xdelta3-internal.h
new file mode 100644
index 0000000000..0c6a1bbcc1
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-internal.h
@@ -0,0 +1,385 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#ifndef XDELTA3_INTERNAL_H__
+#define XDELTA3_INTERNAL_H__
+
+#include "xdelta3.h"
+
+typedef struct _main_file        main_file;
+typedef struct _main_extcomp     main_extcomp;
+
+void main_buffree (void *ptr);
+void* main_bufalloc (size_t size);
+void main_file_init (main_file *xfile);
+int main_file_close (main_file *xfile);
+void main_file_cleanup (main_file *xfile);
+int main_file_isopen (main_file *xfile);
+int main_file_open (main_file *xfile, const char* name, int mode);
+int main_file_exists (main_file *xfile);
+int main_file_stat (main_file *xfile, xoff_t *size);
+int xd3_whole_append_window (xd3_stream *stream);
+int xd3_main_cmdline (int argc, char **argv);
+int main_file_read (main_file  *ifile,
+		    uint8_t    *buf,
+		    size_t     size,
+		    size_t    *nread,
+		    const char *msg);
+int main_file_write (main_file *ofile, uint8_t *buf, 
+		     usize_t size, const char *msg);
+void* main_malloc (size_t size);
+void main_free (void *ptr);
+
+int test_compare_files (const char* f0, const char* f1);
+usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno);
+xoff_t xd3_source_eof(const xd3_source *src);
+
+uint32_t xd3_large_cksum_update (uint32_t cksum,
+				 const uint8_t *base,
+				 usize_t look);
+int xd3_emit_byte (xd3_stream  *stream,
+		   xd3_output **outputp,
+		   uint8_t      code);
+
+int xd3_emit_bytes (xd3_stream     *stream,
+		    xd3_output    **outputp,
+		    const uint8_t  *base,
+		    usize_t          size);
+xd3_output* xd3_alloc_output (xd3_stream *stream,
+			      xd3_output *old_output);
+
+int xd3_encode_init_full (xd3_stream *stream);
+usize_t xd3_pow2_roundup (usize_t x);
+long get_millisecs_now (void);
+int xd3_process_stream (int            is_encode,
+			xd3_stream    *stream,
+			int          (*func) (xd3_stream *),
+			int            close_stream,
+			const uint8_t *input,
+			usize_t        input_size,
+			uint8_t       *output,
+			usize_t       *output_size,
+			usize_t        output_size_max);
+
+#if PYTHON_MODULE || SWIG_MODULE || NOT_MAIN
+int xd3_main_cmdline (int argc, char **argv);
+#endif
+
+#if REGRESSION_TEST
+int xd3_selftest (void);
+#endif
+
+/* main_file->mode values */
+typedef enum
+{
+  XO_READ  = 0,
+  XO_WRITE = 1
+} main_file_modes;
+
+#ifndef XD3_POSIX
+#define XD3_POSIX 0
+#endif
+#ifndef XD3_STDIO
+#define XD3_STDIO 0
+#endif
+#ifndef XD3_WIN32
+#define XD3_WIN32 0
+#endif
+#ifndef NOT_MAIN
+#define NOT_MAIN 0
+#endif
+
+/* If none are set, default to posix. */
+#if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0
+#undef XD3_POSIX
+#define XD3_POSIX 1
+#endif
+
+struct _main_file
+{
+#if XD3_WIN32
+  HANDLE              file;
+#elif XD3_STDIO
+  FILE               *file;
+#elif XD3_POSIX
+  int                 file;
+#endif
+
+  int                 mode;          /* XO_READ and XO_WRITE */
+  const char         *filename;      /* File name or /dev/stdin,
+				      * /dev/stdout, /dev/stderr. */
+  char               *filename_copy; /* File name or /dev/stdin,
+				      * /dev/stdout, /dev/stderr. */
+  const char         *realname;      /* File name or /dev/stdin,
+				      * /dev/stdout, /dev/stderr. */
+  const main_extcomp *compressor;    /* External compression struct. */
+  int                 flags;         /* RD_FIRST, RD_NONEXTERNAL, ... */
+  xoff_t              nread;         /* for input position */
+  xoff_t              nwrite;        /* for output position */
+  uint8_t            *snprintf_buf;  /* internal snprintf() use */
+  int                 size_known;    /* Set by main_set_souze */
+  xoff_t              source_position;  /* for avoiding seek in getblk_func */
+  int                 seek_failed;   /* after seek fails once, try FIFO */
+};
+
+#ifndef UINT32_MAX
+#define UINT32_MAX 4294967295U
+#endif
+
+#ifndef UINT64_MAX
+#define UINT64_MAX 18446744073709551615ULL
+#endif
+
+#define UINT32_OFLOW_MASK 0xfe000000U
+#define UINT64_OFLOW_MASK 0xfe00000000000000ULL
+
+/*********************************************************************
+ Integer encoder/decoder functions
+ **********************************************************************/
+
+/* Consume N bytes of input, only used by the decoder. */
+#define DECODE_INPUT(n)             \
+  do {                              \
+  stream->total_in += (xoff_t) (n); \
+  stream->avail_in -= (n);          \
+  stream->next_in  += (n);          \
+  } while (0)
+
+#define DECODE_INTEGER_TYPE(PART,OFLOW)                                \
+  while (stream->avail_in != 0)                                        \
+    {                                                                  \
+      usize_t next = stream->next_in[0];                               \
+                                                                       \
+      DECODE_INPUT(1);                                                 \
+                                                                       \
+      if (PART & OFLOW)                                                \
+	{                                                              \
+	  stream->msg = "overflow in decode_integer";                  \
+	  return XD3_INVALID_INPUT;                                    \
+	}                                                              \
+                                                                       \
+      PART = (PART << 7) | (next & 127);                               \
+                                                                       \
+      if ((next & 128) == 0)                                           \
+	{                                                              \
+	  (*val) = PART;                                               \
+	  PART = 0;                                                    \
+	  return 0;                                                    \
+	}                                                              \
+    }                                                                  \
+                                                                       \
+  stream->msg = "further input required";                              \
+  return XD3_INPUT
+
+#define READ_INTEGER_TYPE(TYPE, OFLOW)                                 \
+  TYPE val = 0;                                                        \
+  const uint8_t *inp = (*inpp);                                        \
+  usize_t next;                                                        \
+                                                                       \
+  do                                                                   \
+    {                                                                  \
+      if (inp == maxp)						       \
+	{                                                              \
+	  stream->msg = "end-of-input in read_integer";                \
+	  return XD3_INVALID_INPUT;                                    \
+	}                                                              \
+                                                                       \
+      if (val & OFLOW)                                                 \
+	{                                                              \
+	  stream->msg = "overflow in read_intger";                     \
+	  return XD3_INVALID_INPUT;                                    \
+	}                                                              \
+                                                                       \
+      next = (*inp++);                                                 \
+      val  = (val << 7) | (next & 127);                                \
+    }                                                                  \
+  while (next & 128);                                                  \
+                                                                       \
+  (*valp) = val;                                                       \
+  (*inpp) = inp;                                                       \
+                                                                       \
+  return 0
+
+#define EMIT_INTEGER_TYPE()                                            \
+  /* max 64-bit value in base-7 encoding is 9.1 bytes */               \
+  uint8_t buf[10];                                                     \
+  usize_t  bufi = 10;                                                  \
+                                                                       \
+  /* This loop performs division and turns on all MSBs. */             \
+  do                                                                   \
+    {                                                                  \
+      buf[--bufi] = (num & 127) | 128;                                 \
+      num >>= 7U;                                                      \
+    }                                                                  \
+  while (num != 0);                                                    \
+                                                                       \
+  /* Turn off MSB of the last byte. */                                 \
+  buf[9] &= 127;                                                       \
+                                                                       \
+  return xd3_emit_bytes (stream, output, buf + bufi, 10 - bufi)
+
+#define IF_SIZEOF32(x) if (num < (1U   << (7 * (x)))) return (x);
+#define IF_SIZEOF64(x) if (num < (1ULL << (7 * (x)))) return (x);
+
+#if USE_UINT32
+static inline uint32_t
+xd3_sizeof_uint32_t (uint32_t num)
+{
+  IF_SIZEOF32(1);
+  IF_SIZEOF32(2);
+  IF_SIZEOF32(3);
+  IF_SIZEOF32(4);
+  return 5;
+}
+
+static inline int
+xd3_decode_uint32_t (xd3_stream *stream, uint32_t *val)
+{ DECODE_INTEGER_TYPE (stream->dec_32part, UINT32_OFLOW_MASK); }
+
+static inline int
+xd3_read_uint32_t (xd3_stream *stream, const uint8_t **inpp,
+		   const uint8_t *maxp, uint32_t *valp)
+{ READ_INTEGER_TYPE (uint32_t, UINT32_OFLOW_MASK); }
+
+#if XD3_ENCODER
+static inline int
+xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output, uint32_t num)
+{ EMIT_INTEGER_TYPE (); }
+#endif  /* XD3_ENCODER */
+#endif  /* USE_UINT32 */
+
+#if USE_UINT64
+static inline uint32_t
+xd3_sizeof_uint64_t (uint64_t num)
+{
+  IF_SIZEOF64(1);
+  IF_SIZEOF64(2);
+  IF_SIZEOF64(3);
+  IF_SIZEOF64(4);
+  IF_SIZEOF64(5);
+  IF_SIZEOF64(6);
+  IF_SIZEOF64(7);
+  IF_SIZEOF64(8);
+  IF_SIZEOF64(9);
+
+  return 10;
+}
+
+static inline int
+xd3_decode_uint64_t (xd3_stream *stream, uint64_t *val)
+{ DECODE_INTEGER_TYPE (stream->dec_64part, UINT64_OFLOW_MASK); }
+
+static inline int
+xd3_read_uint64_t (xd3_stream *stream, const uint8_t **inpp,
+		   const uint8_t *maxp, uint64_t *valp)
+{ READ_INTEGER_TYPE (uint64_t, UINT64_OFLOW_MASK); }
+
+#if XD3_ENCODER
+static inline int
+xd3_emit_uint64_t (xd3_stream *stream, xd3_output **output, uint64_t num)
+{ EMIT_INTEGER_TYPE (); }
+#endif  /* XD3_ENCODER */
+#endif  /* USE_UINT64 */
+
+#if SIZEOF_USIZE_T == 4
+#define USIZE_T_MAX             UINT32_MAX
+#define USIZE_T_MAXBLKSZ        0x80000000U
+#define XD3_MAXSRCWINSZ         (1ULL << 31)
+#define xd3_large_cksum         xd3_large32_cksum
+#define xd3_large_cksum_update  xd3_large32_cksum_update
+#define xd3_hash_multiplier     xd3_hash_multiplier32
+
+static inline uint32_t xd3_sizeof_size (usize_t num)
+{ return xd3_sizeof_uint32_t (num); }
+static inline int xd3_decode_size (xd3_stream *stream, usize_t *valp)
+{ return xd3_decode_uint32_t (stream, (uint32_t*) valp); }
+static inline int xd3_read_size (xd3_stream *stream, const uint8_t **inpp,
+		   const uint8_t *maxp, usize_t *valp)
+{ return xd3_read_uint32_t (stream, inpp, maxp, (uint32_t*) valp); }
+#if XD3_ENCODER
+static inline int xd3_emit_size (xd3_stream *stream, xd3_output **output, usize_t num)
+{ return xd3_emit_uint32_t (stream, output, num); }
+#endif
+
+#elif SIZEOF_USIZE_T == 8
+#define USIZE_T_MAX             UINT64_MAX
+#define USIZE_T_MAXBLKSZ        0x8000000000000000ULL
+#define XD3_MAXSRCWINSZ         (1ULL << 61)
+#define xd3_large_cksum         xd3_large64_cksum
+#define xd3_large_cksum_update  xd3_large64_cksum_update
+#define xd3_hash_multiplier     xd3_hash_multiplier64
+
+static inline uint32_t xd3_sizeof_size (usize_t num)
+{ return xd3_sizeof_uint64_t (num); }
+static inline int xd3_decode_size (xd3_stream *stream, usize_t *valp)
+{ return xd3_decode_uint64_t (stream, (uint64_t*) valp); }
+static inline int xd3_read_size (xd3_stream *stream, const uint8_t **inpp,
+		   const uint8_t *maxp, usize_t *valp)
+{ return xd3_read_uint64_t (stream, inpp, maxp, (uint64_t*) valp); }
+#if XD3_ENCODER
+static inline int xd3_emit_size (xd3_stream *stream, xd3_output **output, usize_t num)
+{ return xd3_emit_uint64_t (stream, output, num); }
+#endif
+
+#endif /* SIZEOF_USIZE_T */
+
+#if SIZEOF_XOFF_T == 4
+#define XOFF_T_MAX        UINT32_MAX
+
+static inline int xd3_decode_offset (xd3_stream *stream, xoff_t *valp)
+{ return xd3_decode_uint32_t (stream, (uint32_t*) valp); }
+#if XD3_ENCODER
+static inline int xd3_emit_offset (xd3_stream *stream, xd3_output **output, xoff_t num)
+{ return xd3_emit_uint32_t (stream, output, num); }
+#endif
+
+#elif SIZEOF_XOFF_T == 8
+#define XOFF_T_MAX        UINT64_MAX
+
+static inline int xd3_decode_offset (xd3_stream *stream, xoff_t *valp)
+{ return xd3_decode_uint64_t (stream, (uint64_t*) valp); }
+#if XD3_ENCODER
+static inline int xd3_emit_offset (xd3_stream *stream, xd3_output **output, xoff_t num)
+{ return xd3_emit_uint64_t (stream, output, num); }
+#endif
+
+#endif
+
+#define USIZE_T_OVERFLOW(a,b) ((USIZE_T_MAX - (usize_t) (a)) < (usize_t) (b))
+#define XOFF_T_OVERFLOW(a,b) ((XOFF_T_MAX - (xoff_t) (a)) < (xoff_t) (b))
+
+int xd3_size_hashtable (xd3_stream   *stream,
+			usize_t       slots,
+			usize_t       look,
+			xd3_hash_cfg *cfg);
+
+usize_t xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum);
+
+#if USE_UINT32
+uint32_t xd3_large32_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
+uint32_t xd3_large32_cksum_update (xd3_hash_cfg *cfg, const uint32_t cksum,
+				   const uint8_t *base, const usize_t look);
+#endif /* USE_UINT32 */
+
+#if USE_UINT64
+uint64_t xd3_large64_cksum (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
+uint64_t xd3_large64_cksum_update (xd3_hash_cfg *cfg, const uint64_t cksum,
+				   const uint8_t *base, const usize_t look);
+#endif /* USE_UINT64 */
+
+#define MAX_LRU_SIZE 32U
+#define XD3_MINSRCWINSZ (XD3_ALLOCSIZE * MAX_LRU_SIZE)
+
+#endif // XDELTA3_INTERNAL_H__
diff --git a/third-party/xdelta3/xdelta3/xdelta3-list.h b/third-party/xdelta3/xdelta3/xdelta3-list.h
new file mode 100644
index 0000000000..b6616fe474
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-list.h
@@ -0,0 +1,127 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#ifndef __XDELTA3_LIST__
+#define __XDELTA3_LIST__
+
+#define XD3_MAKELIST(LTYPE,ETYPE,LNAME)                                 \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _entry (LTYPE* l)                                              \
+{                                                                       \
+  return (ETYPE*) ((char*) l - (ptrdiff_t) &((ETYPE*) 0)->LNAME);       \
+}                                                                       \
+                                                                        \
+static inline void                                                      \
+LTYPE ## _init (LTYPE *l)                                               \
+{                                                                       \
+  l->next = l;                                                          \
+  l->prev = l;                                                          \
+}                                                                       \
+                                                                        \
+static inline void                                                      \
+LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins)                    \
+{                                                                       \
+  next->prev = ins;                                                     \
+  prev->next = ins;                                                     \
+  ins->next  = next;                                                    \
+  ins->prev  = prev;                                                    \
+}                                                                       \
+                                                                        \
+static inline void                                                      \
+LTYPE ## _push_back (LTYPE *l, ETYPE *i)                                \
+{                                                                       \
+  LTYPE ## _add (l->prev, l, & i->LNAME);                               \
+}                                                                       \
+                                                                        \
+static inline void                                                      \
+LTYPE ## _del (LTYPE *next,                                             \
+	       LTYPE *prev)                                             \
+{                                                                       \
+  next->prev = prev;                                                    \
+  prev->next = next;                                                    \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _remove (ETYPE *f)                                             \
+{                                                                       \
+  LTYPE *i = f->LNAME.next;                                             \
+  LTYPE ## _del (f->LNAME.next, f->LNAME.prev);                         \
+  return LTYPE ## _entry (i);                                           \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _pop_back (LTYPE *l)                                           \
+{                                                                       \
+  LTYPE *i = l->prev;                                                   \
+  LTYPE ## _del (i->next, i->prev);                                     \
+  return LTYPE ## _entry (i);                                           \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _pop_front (LTYPE *l)                                          \
+{                                                                       \
+  LTYPE *i = l->next;                                                   \
+  LTYPE ## _del (i->next, i->prev);                                     \
+  return LTYPE ## _entry (i);                                           \
+}                                                                       \
+                                                                        \
+static inline int                                                       \
+LTYPE ## _empty (LTYPE *l)                                              \
+{                                                                       \
+  return l == l->next;                                                  \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _front (LTYPE *f)                                              \
+{                                                                       \
+  return LTYPE ## _entry (f->next);                                     \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _back (LTYPE *f)                                               \
+{                                                                       \
+  return LTYPE ## _entry (f->prev);                                     \
+}                                                                       \
+                                                                        \
+static inline int                                                       \
+LTYPE ## _end (LTYPE *f, ETYPE *i)                                      \
+{                                                                       \
+  return f == & i->LNAME;                                               \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _next (ETYPE *f)                                               \
+{                                                                       \
+  return LTYPE ## _entry (f->LNAME.next);                               \
+}                                                                       \
+                                                                        \
+static inline usize_t                                                   \
+LTYPE ## _length (LTYPE *l)                                             \
+{                                                                       \
+  LTYPE *p;                                                             \
+  usize_t c = 0;                                                        \
+                                                                        \
+  for (p = l->next; p != l; p = p->next)                                \
+    {                                                                   \
+      c += 1;                                                           \
+    }                                                                   \
+                                                                        \
+  return c;                                                             \
+}                                                                       \
+                                                                        \
+typedef int unused_ ## LTYPE
+
+#endif
diff --git a/third-party/xdelta3/xdelta3/xdelta3-lzma.h b/third-party/xdelta3/xdelta3/xdelta3-lzma.h
new file mode 100644
index 0000000000..a707da8cac
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-lzma.h
@@ -0,0 +1,195 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+/* Note: The use of the _easy_ decoder means we're not calling the
+ * xd3_stream malloc hooks.  TODO(jmacd) Fix if anyone cares. */
+
+#ifndef _XDELTA3_LZMA_H_
+#define _XDELTA3_LZMA_H_
+
+#include <lzma.h>
+
+typedef struct _xd3_lzma_stream xd3_lzma_stream;
+
+struct _xd3_lzma_stream {
+  lzma_stream lzma;
+  lzma_options_lzma options;
+  lzma_filter filters[2];
+};
+
+static xd3_sec_stream* 
+xd3_lzma_alloc (xd3_stream *stream)
+{
+  return (xd3_sec_stream*) xd3_alloc (stream, sizeof (xd3_lzma_stream), 1);
+}
+
+static void
+xd3_lzma_destroy (xd3_stream *stream, xd3_sec_stream *sec_stream)
+{
+  xd3_lzma_stream *ls = (xd3_lzma_stream*) sec_stream;
+  lzma_end (&ls->lzma);
+  xd3_free (stream, ls);
+}
+
+static int
+xd3_lzma_init (xd3_stream *stream, xd3_lzma_stream *sec, int is_encode)
+{
+  int ret;
+
+  memset (&sec->lzma, 0, sizeof(sec->lzma));
+
+  if (is_encode)
+    {
+      uint32_t preset = 
+	(stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT;
+
+      if (lzma_lzma_preset(&sec->options, preset)) 
+	{
+	  stream->msg = "invalid lzma preset";
+	  return XD3_INVALID;
+	}
+
+      sec->filters[0].id = LZMA_FILTER_LZMA2;
+      sec->filters[0].options = &sec->options;
+      sec->filters[1].id = LZMA_VLI_UNKNOWN;
+
+      ret = lzma_stream_encoder (&sec->lzma, &sec->filters[0], LZMA_CHECK_NONE);
+    }
+  else 
+    {
+      ret = lzma_stream_decoder (&sec->lzma, UINT64_MAX, LZMA_TELL_NO_CHECK);
+    }
+  
+  if (ret != LZMA_OK)
+    {
+      stream->msg = "lzma stream init failed";
+      return XD3_INTERNAL;
+    }
+
+  return 0;
+}
+
+static int xd3_decode_lzma (xd3_stream *stream, xd3_lzma_stream *sec,
+		     const uint8_t **input_pos,
+		     const uint8_t  *const input_end,
+		     uint8_t       **output_pos,
+		     const uint8_t  *const output_end)
+{
+  uint8_t *output = *output_pos;
+  const uint8_t *input = *input_pos;
+  size_t avail_in = input_end - input;
+  size_t avail_out = output_end - output;
+
+  sec->lzma.avail_in = avail_in;
+  sec->lzma.next_in = input;
+  sec->lzma.avail_out = avail_out;
+  sec->lzma.next_out = output;
+  
+  while (1) 
+    {
+      int lret = lzma_code (&sec->lzma, LZMA_RUN);
+
+      switch (lret)
+	{
+	case LZMA_NO_CHECK: 
+	case LZMA_OK:
+	  if (sec->lzma.avail_out == 0) 
+	    {
+	      (*output_pos) = sec->lzma.next_out;
+	      (*input_pos) = sec->lzma.next_in;
+	      return 0;
+	    }
+	  break;
+
+	default:
+	  stream->msg = "lzma decoding error";
+	  return XD3_INTERNAL;
+	}
+    }
+}
+
+#if XD3_ENCODER
+
+static int xd3_encode_lzma (xd3_stream *stream, 
+		     xd3_lzma_stream *sec, 
+		     xd3_output   *input,
+		     xd3_output   *output,
+		     xd3_sec_cfg  *cfg)
+
+{
+  lzma_action action = LZMA_RUN;
+
+  cfg->inefficient = 1;  /* Can't skip windows */
+  sec->lzma.next_in = NULL;
+  sec->lzma.avail_in = 0;
+  sec->lzma.next_out = (output->base + output->next);
+  sec->lzma.avail_out = (output->avail - output->next);
+
+  while (1)
+    {
+      int lret;
+	  size_t nwrite;
+      if (sec->lzma.avail_in == 0 && input != NULL)
+	{
+	  sec->lzma.avail_in = input->next;
+	  sec->lzma.next_in = input->base;
+	  
+	  if ((input = input->next_page) == NULL)
+	    {
+	      action = LZMA_SYNC_FLUSH;
+	    }
+	}
+
+      lret = lzma_code (&sec->lzma, action);
+
+      nwrite = (output->avail - output->next) - sec->lzma.avail_out;
+
+      if (nwrite != 0) 
+	{
+	  output->next += nwrite;
+
+	  if (output->next == output->avail)
+	    {
+	      if ((output = xd3_alloc_output (stream, output)) == NULL)
+		{
+		  return ENOMEM;
+		}
+	      
+	      sec->lzma.next_out = output->base;
+	      sec->lzma.avail_out = output->avail;
+	    }
+	}
+
+      switch (lret)
+	{
+	case LZMA_OK:
+	  break;
+
+	case LZMA_STREAM_END:
+	  return 0;
+
+	default:
+	  stream->msg = "lzma encoding error";
+	  return XD3_INTERNAL;
+	}
+    }
+
+  return 0;
+}
+
+#endif /* XD3_ENCODER */
+
+#endif /* _XDELTA3_LZMA_H_ */
diff --git a/third-party/xdelta3/xdelta3/xdelta3-main.h b/third-party/xdelta3/xdelta3/xdelta3-main.h
new file mode 100644
index 0000000000..7f1e589b07
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-main.h
@@ -0,0 +1,4062 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+/* This is all the extra stuff you need for convenience to users in a
+ * command line application.  It contains these major components:
+ *
+ * 1. VCDIFF tools 2. external compression support (this is
+ * POSIX-specific).  3. a general read/write loop that handles all of
+ * the Xdelta decode/encode/VCDIFF-print functions 4. command-line
+ * interpreter 5. an Xdelta application header which stores default
+ * filename, external compression settings 6. output/error printing
+ * 7. basic file support and OS interface
+ */
+
+/* TODO list: 1. do exact gzip-like filename, stdout handling.  make a
+ * .vcdiff extension, refuse to encode to stdout without -cf, etc.
+ * 2. Allow the user to add a comment string to the app header without
+ * disturbing the default behavior.
+ */
+
+/* On error handling and printing:
+ *
+ * The xdelta library sets stream->msg to indicate what condition
+ * caused an internal failure, but many failures originate here and
+ * are printed here.  The return convention is 0 for success, as
+ * throughout Xdelta code, but special attention is required here for
+ * the operating system calls with different error handling.  See the
+ * main_file_* routines.  All errors in this file have a message
+ * printed at the time of occurance.  Since some of these calls occur
+ * within calls to the library, the error may end up being printed
+ * again with a more general error message.
+ */
+
+/*********************************************************************/
+
+#include <limits.h>
+
+#ifndef XD3_POSIX
+#define XD3_POSIX 0
+#endif
+#ifndef XD3_STDIO
+#define XD3_STDIO 0
+#endif
+#ifndef XD3_WIN32
+#define XD3_WIN32 0
+#endif
+#ifndef NOT_MAIN
+#define NOT_MAIN 0
+#endif
+
+/* Combines xd3_strerror() and strerror() */
+const char* xd3_mainerror(int err_num);
+
+#include "xdelta3-internal.h"
+
+int
+xsnprintf_func (char *str, size_t n, const char *fmt, ...)
+{
+  va_list a;
+  int ret;
+  va_start (a, fmt);
+  ret = vsnprintf_func (str, n, fmt, a);
+  va_end (a);
+  if (ret < 0)
+    {
+      ret = n;
+    }
+  return ret;
+}
+
+/* Handle externally-compressed inputs. */
+#ifndef EXTERNAL_COMPRESSION
+#define EXTERNAL_COMPRESSION 1
+#endif
+
+#define PRINTHDR_SPECIAL -4378291
+
+/* The number of soft-config variables.  */
+#define XD3_SOFTCFG_VARCNT 7
+
+/* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an
+ * error message from the library. */
+#define XD3_LIB_ERRMSG(stream, ret) "%s: %s\n", \
+    xd3_errstring (stream), xd3_mainerror (ret)
+
+#if XD3_POSIX
+#include <unistd.h> /* close, read, write... */
+#include <sys/types.h>
+#include <fcntl.h>
+#endif
+
+#ifndef _WIN32
+#include <unistd.h> /* lots */
+#include <sys/time.h> /* gettimeofday() */
+#include <sys/stat.h> /* stat() and fstat() */
+#else
+#if defined(_MSC_VER)
+#define strtoll _strtoi64
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef WIFEXITED
+#   define WIFEXITED(stat)  (((*((int *) &(stat))) & 0xff) == 0)
+#endif
+#ifndef WEXITSTATUS
+#   define WEXITSTATUS(stat) (((*((int *) &(stat))) >> 8) & 0xff)
+#endif
+#ifndef S_ISREG
+//#   ifdef S_IFREG
+//#       define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+//#   else
+#       define S_ISREG(m) 1
+//#   endif
+#endif /* !S_ISREG */
+
+// For standard input/output handles
+static STARTUPINFO winStartupInfo;
+#endif
+
+/**********************************************************************
+ ENUMS and TYPES
+ *********************************************************************/
+
+/* These flags (mainly pertaining to main_read() operations) are set
+ * in the main_file->flags variable.  All are related to with external
+ * decompression support.
+ *
+ * RD_FIRST causes the external decompression check when the input is
+ * first read.
+ *
+ * RD_NONEXTERNAL disables external decompression for reading a
+ * compressed input, in the case of Xdelta inputs.  Note: Xdelta is
+ * supported as an external compression type, which makes is the
+ * reason for this flag.  An example to justify this is: to create a
+ * delta between two files that are VCDIFF-compressed.  Two external
+ * Xdelta decoders are run to supply decompressed source and target
+ * inputs to the Xdelta encoder. */
+typedef enum
+{
+  RD_FIRST       = (1 << 0),
+  RD_NONEXTERNAL = (1 << 1),
+  RD_DECOMPSET   = (1 << 2),
+  RD_MAININPUT   = (1 << 3),
+} xd3_read_flags;
+
+/* Main commands.  For example, CMD_PRINTHDR is the "xdelta printhdr"
+ * command. */
+typedef enum
+{
+  CMD_NONE = 0,
+  CMD_PRINTHDR,
+  CMD_PRINTHDRS,
+  CMD_PRINTDELTA,
+  CMD_RECODE,
+  CMD_MERGE_ARG,
+  CMD_MERGE,
+#if XD3_ENCODER
+  CMD_ENCODE,
+#endif
+  CMD_DECODE,
+  CMD_TEST,
+  CMD_CONFIG,
+} xd3_cmd;
+
+#if XD3_ENCODER
+#define CMD_DEFAULT CMD_ENCODE
+#define IS_ENCODE(cmd) (cmd == CMD_ENCODE)
+#else
+#define CMD_DEFAULT CMD_DECODE
+#define IS_ENCODE(cmd) (0)
+#endif
+
+typedef struct _main_merge       main_merge;
+typedef struct _main_merge_list  main_merge_list;
+
+/* Various strings and magic values used to detect and call external
+ * compression.  See below for examples. */
+struct _main_extcomp
+{
+  const char    *recomp_cmdname;
+  const char    *recomp_options;
+
+  const char    *decomp_cmdname;
+  const char    *decomp_options;
+
+  const char    *ident;
+  const char    *magic;
+  usize_t        magic_size;
+  int            flags;
+};
+
+/* Merge state: */
+
+struct _main_merge_list
+{
+  main_merge_list  *next;
+  main_merge_list  *prev;
+};
+
+struct _main_merge
+{
+  const char *filename;
+
+  main_merge_list  link;
+};
+
+XD3_MAKELIST(main_merge_list,main_merge,link);
+
+/* TODO: really need to put options in a struct so that internal
+ * callers can easily reset state. */
+
+#define DEFAULT_VERBOSE 0
+
+/* Program options: various command line flags and options. */
+static int         option_stdout             = 0;
+static int         option_force              = 0;
+static int         option_verbose            = DEFAULT_VERBOSE;
+static int         option_quiet              = 0;
+static int         option_use_appheader      = 1;
+static uint8_t*    option_appheader          = NULL;
+static int         option_use_secondary      = 1;
+static const char* option_secondary          = NULL;
+static int         option_use_checksum       = 1;
+static const char* option_smatch_config      = NULL;
+static int         option_no_compress        = 0;
+static int         option_no_output          = 0; /* do not write output */
+static const char *option_source_filename    = NULL;
+
+static int         option_level              = XD3_DEFAULT_LEVEL;
+static usize_t     option_iopt_size          = XD3_DEFAULT_IOPT_SIZE;
+static usize_t     option_winsize            = XD3_DEFAULT_WINSIZE;
+
+/* option_srcwinsz is restricted to [16kB, 2GB] when usize_t is 32 bits. */
+static xoff_t      option_srcwinsz           = XD3_DEFAULT_SRCWINSZ;
+static usize_t     option_sprevsz            = XD3_DEFAULT_SPREVSZ;
+
+/* These variables are supressed to avoid their use w/o support.  main() warns
+ * appropriately when external compression is not enabled. */
+#if EXTERNAL_COMPRESSION
+static int         num_subprocs = 0;
+static int         option_force2             = 0;
+static int         option_decompress_inputs  = 1;
+static int         option_recompress_outputs = 1;
+#endif
+
+/* This is for comparing "printdelta" output without attention to
+ * copy-instruction modes. */
+#if VCDIFF_TOOLS
+static int option_print_cpymode = 1; /* Note: see reset_defaults(). */
+#endif
+
+/* Static variables */
+IF_DEBUG(static int main_mallocs = 0;)
+
+static char*           program_name = NULL;
+static uint8_t*        appheader_used = NULL;
+static uint8_t*        main_bdata = NULL;
+static usize_t         main_bsize = 0;
+
+/* Hacks for VCDIFF tools, recode command. */
+static int allow_fake_source = 0;
+
+/* recode_stream is used by both recode/merge for reading vcdiff inputs */
+static xd3_stream *recode_stream = NULL;
+
+/* merge_stream is used by merge commands for storing the source encoding */
+static xd3_stream *merge_stream = NULL;
+
+/* This array of compressor types is compiled even if EXTERNAL_COMPRESSION is
+ * false just so the program knows the mapping of IDENT->NAME. */
+static main_extcomp extcomp_types[] =
+{
+  { "bzip2",    "-c",   "bzip2",      "-dc",   "B", "BZh",          3, 0 },
+  { "gzip",     "-c",   "gzip",       "-dc",   "G", "\037\213",     2, 0 },
+  { "compress", "-c",   "uncompress", "-c",    "Z", "\037\235",     2, 0 },
+
+  /* Xz is lzma with a magic number http://tukaani.org/xz/format.html */
+  { "xz", "-c", "xz", "-dc", "Y", "\xfd\x37\x7a\x58\x5a\x00", 2, 0 },
+};
+
+static int main_input (xd3_cmd cmd, main_file *ifile,
+                       main_file *ofile, main_file *sfile);
+static void main_get_appheader (xd3_stream *stream, main_file *ifile,
+				main_file *output, main_file *sfile);
+
+static int main_getblk_func (xd3_stream *stream,
+			     xd3_source *source,
+			     xoff_t      blkno);
+static int main_file_seek (main_file *xfile, xoff_t pos);
+static int main_read_primary_input (main_file   *file,
+				    uint8_t     *buf,
+				    size_t       size,
+				    size_t      *nread);
+
+static const char* main_format_bcnt (xoff_t r, shortbuf *buf);
+static int main_help (void);
+
+#if XD3_ENCODER
+static int xd3_merge_input_output (xd3_stream *stream,
+				   xd3_whole_state *source);
+#endif
+
+/* The code in xdelta3-blk.h is essentially part of this unit, see
+ * comments there. */
+#include "xdelta3-blkcache.h"
+
+static void (*xprintf_message_func)(const char*msg) = NULL;
+
+void
+xprintf (const char *fmt, ...)
+{
+  char buf[1000];
+  va_list a;
+  int size;
+  va_start (a, fmt);
+  size = vsnprintf_func (buf, 1000, fmt, a);
+  va_end (a);
+  if (size < 0)
+    {
+      size = sizeof(buf) - 1;
+      buf[size] = 0;
+    }
+  if (xprintf_message_func != NULL) {
+    xprintf_message_func(buf);
+  } else {
+    size_t ignore = fwrite(buf, 1, size, stderr);
+    (void) ignore;
+  }
+}
+
+static int
+main_version (void)
+{
+  /* $Format: "  XPR(NTR \"Xdelta version $Xdelta3Version$, Copyright (C) Joshua MacDonald\\n\");" $ */
+  XPR(NTR "Xdelta version 3.1.1, Copyright (C) Joshua MacDonald\n");
+  XPR(NTR "Xdelta comes with ABSOLUTELY NO WARRANTY.\n");
+  XPR(NTR "Licensed under the Apache License, Version 2.0\n");
+  XPR(NTR "See \"LICENSE\" for details.\n");
+  return EXIT_SUCCESS;
+}
+
+static int
+main_config (void)
+{
+  main_version ();
+
+  XPR(NTR "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION);
+  XPR(NTR "REGRESSION_TEST=%d\n", REGRESSION_TEST);
+  XPR(NTR "SECONDARY_DJW=%d\n", SECONDARY_DJW);
+  XPR(NTR "SECONDARY_FGK=%d\n", SECONDARY_FGK);
+  XPR(NTR "SECONDARY_LZMA=%d\n", SECONDARY_LZMA);
+  XPR(NTR "UNALIGNED_OK=%d\n", UNALIGNED_OK);
+  XPR(NTR "VCDIFF_TOOLS=%d\n", VCDIFF_TOOLS);
+  XPR(NTR "XD3_ALLOCSIZE=%d\n", XD3_ALLOCSIZE);
+  XPR(NTR "XD3_DEBUG=%d\n", XD3_DEBUG);
+  XPR(NTR "XD3_ENCODER=%d\n", XD3_ENCODER);
+  XPR(NTR "XD3_POSIX=%d\n", XD3_POSIX);
+  XPR(NTR "XD3_STDIO=%d\n", XD3_STDIO);
+  XPR(NTR "XD3_WIN32=%d\n", XD3_WIN32);
+  XPR(NTR "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64);
+  XPR(NTR "XD3_USE_LARGESIZET=%d\n", XD3_USE_LARGESIZET);
+  XPR(NTR "XD3_DEFAULT_LEVEL=%d\n", XD3_DEFAULT_LEVEL);
+  XPR(NTR "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE);
+  XPR(NTR "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ);
+  XPR(NTR "XD3_DEFAULT_SRCWINSZ=%d\n", XD3_DEFAULT_SRCWINSZ);
+  XPR(NTR "XD3_DEFAULT_WINSIZE=%d\n", XD3_DEFAULT_WINSIZE);
+  XPR(NTR "XD3_HARDMAXWINSIZE=%d\n", XD3_HARDMAXWINSIZE);
+  XPR(NTR "sizeof(void*)=%d\n", (int)sizeof(void*));
+  XPR(NTR "sizeof(int)=%d\n", (int)sizeof(int));
+  XPR(NTR "sizeof(long)=%d\n", (int)sizeof(long));
+  XPR(NTR "sizeof(long long)=%d\n", (int)sizeof(long long));
+  XPR(NTR "sizeof(unsigned long long)=%d\n", (int)sizeof(unsigned long long));
+  XPR(NTR "sizeof(size_t)=%d\n", (int)sizeof(size_t));
+  XPR(NTR "sizeof(uint32_t)=%d\n", (int)sizeof(uint32_t));
+  XPR(NTR "sizeof(uint64_t)=%d\n", (int)sizeof(uint64_t));
+  XPR(NTR "sizeof(usize_t)=%d\n", (int)sizeof(usize_t));
+  XPR(NTR "sizeof(xoff_t)=%d\n", (int)sizeof(xoff_t));
+
+  return EXIT_SUCCESS;
+}
+
+static void
+reset_defaults(void)
+{
+  option_stdout = 0;
+  option_force = 0;
+  option_verbose = DEFAULT_VERBOSE;
+  option_quiet = 0;
+  option_appheader = NULL;
+  option_use_secondary = 1;
+  option_secondary = NULL;
+  option_smatch_config = NULL;
+  option_no_compress = 0;
+  option_no_output = 0;
+  option_source_filename = NULL;
+  program_name = NULL;
+  appheader_used = NULL;
+  main_bdata = NULL;
+  main_bsize = 0;
+  allow_fake_source = 0;
+  option_smatch_config = NULL;
+
+  main_lru_reset();
+
+  option_use_appheader = 1;
+  option_use_checksum = 1;
+#if EXTERNAL_COMPRESSION
+  option_force2 = 0;
+  option_decompress_inputs  = 1;
+  option_recompress_outputs = 1;
+  num_subprocs = 0;
+#endif
+#if VCDIFF_TOOLS
+  option_print_cpymode = 1;
+#endif
+  option_level = XD3_DEFAULT_LEVEL;
+  option_iopt_size = XD3_DEFAULT_IOPT_SIZE;
+  option_winsize = XD3_DEFAULT_WINSIZE;
+  option_srcwinsz = XD3_DEFAULT_SRCWINSZ;
+  option_sprevsz = XD3_DEFAULT_SPREVSZ;
+}
+
+static void*
+main_malloc1 (size_t size)
+{
+  void* r = malloc (size);
+  if (r == NULL) { XPR(NT "malloc: %s\n", xd3_mainerror (ENOMEM)); }
+  return r;
+}
+
+void* main_bufalloc (size_t size) {
+#if XD3_WIN32
+  return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
+#else
+  return main_malloc1(size);
+#endif
+}
+
+void*
+main_malloc (size_t size)
+{
+  void *r = main_malloc1 (size);
+  if (r) { IF_DEBUG (main_mallocs += 1); }
+  return r;
+}
+
+static void*
+main_alloc (void   *opaque,
+	    size_t  items,
+	    usize_t  size)
+{
+  return main_malloc1 (items * size);
+}
+
+static void
+main_free1 (void *opaque, void *ptr)
+{
+  free (ptr);
+}
+
+void
+main_free (void *ptr)
+{
+  if (ptr)
+    {
+      IF_DEBUG (main_mallocs -= 1);
+      main_free1 (NULL, ptr);
+      IF_DEBUG (XD3_ASSERT(main_mallocs >= 0));
+    }
+}
+
+void main_buffree (void *ptr) {
+#if XD3_WIN32
+  VirtualFree(ptr, 0, MEM_RELEASE);
+#else
+  main_free1(NULL, ptr);
+#endif
+}
+
+/* This ensures that (ret = errno) always indicates failure, in case errno was
+ * accidentally not set.  If this prints there's a bug somewhere. */
+static int
+get_errno (void)
+{
+#ifndef _WIN32
+  if (errno == 0)
+    {
+      XPR(NT "you found a bug: expected errno != 0\n");
+      errno = XD3_INTERNAL;
+    }
+  return errno;
+#else
+  DWORD err_num = GetLastError();
+  if (err_num == NO_ERROR)
+    {
+      err_num = XD3_INTERNAL;
+    }
+  return err_num;
+#endif
+}
+
+const char*
+xd3_mainerror(int err_num) {
+#ifndef _WIN32
+	const char* x = xd3_strerror (err_num);
+	if (x != NULL)
+	  {
+	    return x;
+	  }
+	return strerror(err_num);
+#else
+	static char err_buf[256];
+	const char* x = xd3_strerror (err_num);
+	if (x != NULL)
+	  {
+	    return x;
+	  }
+	memset (err_buf, 0, 256);
+	FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM |
+		       FORMAT_MESSAGE_IGNORE_INSERTS,
+		       NULL, err_num,
+		       MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
+		       err_buf, 256, NULL);
+	if (err_buf[0] != 0 && err_buf[strlen(err_buf) - 1] == '\n')
+	  {
+	    err_buf[strlen(err_buf) - 1] = 0;
+	  }
+	return err_buf;
+#endif
+}
+
+long
+get_millisecs_now (void)
+{
+#ifndef _WIN32
+  struct timeval tv;
+
+  gettimeofday (& tv, NULL);
+
+  return (tv.tv_sec) * 1000L + (tv.tv_usec) / 1000;
+#else
+  SYSTEMTIME st;
+  FILETIME ft;
+  __int64 *pi = (__int64*)&ft;
+  GetLocalTime(&st);
+  SystemTimeToFileTime(&st, &ft);
+  return (long)((*pi) / 10000);
+#endif
+}
+
+/* Always >= 1 millisec, right? */
+static long
+get_millisecs_since (void)
+{
+  static long last = 0;
+  long now = get_millisecs_now();
+  long diff = now - last;
+  last = now;
+  return diff;
+}
+
+static const char*
+main_format_bcnt (xoff_t r, shortbuf *buf)
+{
+  static const char* fmts[] = { "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB" };
+  usize_t i;
+
+  for (i = 0; i < SIZEOF_ARRAY(fmts) - 1; i += 1)
+    {
+      xoff_t new_r;
+
+      if (r == 0)
+	{
+	  short_sprintf (*buf, "0 %s", fmts[i]);
+	  return buf->buf;
+	}
+
+      if (r >= 1 && r < 10)
+	{
+	  short_sprintf (*buf, "%.2f %s", (double) r, fmts[i]);
+	  return buf->buf;
+	}
+
+      if (r >= 10 && r < 100)
+	{
+	  short_sprintf (*buf, "%.1f %s", (double) r, fmts[i]);
+	  return buf->buf;
+	}
+
+      if (r >= 100 && r < 1000)
+	{
+	  short_sprintf (*buf, "%"Q"u %s", r, fmts[i]);
+	  return buf->buf;
+	}
+
+      new_r = r / 1024;
+
+      if (new_r < 10)
+	{
+	  short_sprintf (*buf, "%.2f %s", (double) r / 1024.0, fmts[i + 1]);
+	  return buf->buf;
+	}
+
+      if (new_r < 100)
+	{
+	  short_sprintf (*buf, "%.1f %s", (double) r / 1024.0, fmts[i + 1]);
+	  return buf->buf;
+	}
+
+      r = new_r;
+    }
+  XD3_ASSERT (0);
+  return "";
+}
+
+static char*
+main_format_rate (xoff_t bytes, long millis, shortbuf *buf)
+{
+  xoff_t r = (xoff_t)(1.0 * bytes / (1.0 * millis / 1000.0));
+  static shortbuf lbuf;
+
+  main_format_bcnt (r, &lbuf);
+  short_sprintf (*buf, "%s/s", lbuf.buf);
+  return buf->buf;
+}
+
+static char*
+main_format_millis (long millis, shortbuf *buf)
+{
+  if (millis < 1000)
+    { 
+      short_sprintf (*buf, "%lu ms", millis); 
+    }
+  else if (millis < 10000) 
+    {
+      short_sprintf (*buf, "%.1f sec", millis / 1000.0);
+    }
+  else
+    {
+      short_sprintf (*buf, "%lu sec", millis / 1000L); 
+    }
+  return buf->buf;
+}
+
+/* A safe version of strtol for xoff_t. */
+static int
+main_strtoxoff (const char* s, xoff_t *xo, char which)
+{
+  char *e;
+  xoff_t x;
+
+  XD3_ASSERT(s && *s != 0);
+
+  {
+#if SIZEOF_XOFF_T == SIZEOF_UNSIGNED_LONG_LONG
+    unsigned long long xx = strtoull (s, &e, 0);
+    unsigned long long bad = ULLONG_MAX;
+#elif SIZEOF_XOFF_T <= SIZEOF_UNSIGNED_LONG
+    unsigned long xx = strtoul (s, &e, 0);
+    unsigned long long bad = ULONG_MAX;
+#else
+    /* Something wrong with SIZEOF_XOFF_T, SIZEOF_UNSIGNED_LONG, etc. */
+    #error Bad configure script
+#endif
+
+    if (xx == bad)
+      {
+	XPR(NT "-%c: negative integer: %s\n", which, s);
+	return EXIT_FAILURE;
+      }
+
+    x = xx;
+  }
+
+  if (*e != 0)
+    {
+      XPR(NT "-%c: invalid integer: %s\n", which, s);
+      return EXIT_FAILURE;
+    }
+
+  (*xo) = x;
+  return 0;
+}
+
+static int
+main_atoux (const char* arg, xoff_t *xo, xoff_t low,
+	    xoff_t high, char which)
+{
+  xoff_t x;
+  int ret;
+
+  if ((ret = main_strtoxoff (arg, & x, which))) { return ret; }
+
+  if (x < low)
+    {
+      XPR(NT "-%c: minimum value: %"Q"u\n", which, low);
+      return EXIT_FAILURE;
+    }
+  if (high != 0 && x > high)
+    {
+      XPR(NT "-%c: maximum value: %"Q"u\n", which, high);
+      return EXIT_FAILURE;
+    }
+  (*xo) = x;
+  return 0;
+}
+
+static int
+main_atou (const char* arg, usize_t *uo, usize_t low,
+	   usize_t high, char which) 
+{
+  int ret;
+  xoff_t xo;
+  if ((ret = main_atoux (arg, &xo, low, high, which)))
+    {
+      return ret;
+    }
+  *uo = (usize_t)xo;
+  return 0;
+}
+
+/******************************************************************
+ FILE BASICS
+ ******************************************************************/
+
+/* With all the variation in file system-call semantics, arguments,
+ * return values and error-handling for the POSIX and STDIO file APIs,
+ * the insides of these functions make me sick, which is why these
+ * wrappers exist. */
+
+#define XOPEN_OPNAME (xfile->mode == XO_READ ? "read" : "write")
+#define XOPEN_STDIO  (xfile->mode == XO_READ ? "rb" : "wb")
+#define XOPEN_POSIX  (xfile->mode == XO_READ ? \
+		      O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC)
+#define XOPEN_MODE   (xfile->mode == XO_READ ? 0 : 0666)
+
+#define XF_ERROR(op, name, ret) \
+  do { if (!option_quiet) { XPR(NT "file %s failed: %s: %s: %s\n", (op), \
+       XOPEN_OPNAME, (name), xd3_mainerror (ret)); } } while (0)
+
+#if XD3_STDIO
+#define XFNO(f) fileno(f->file)
+#define XSTDOUT_XF(f) { (f)->file = stdout; (f)->filename = "/dev/stdout"; }
+#define XSTDIN_XF(f)  { (f)->file = stdin;  (f)->filename = "/dev/stdin"; }
+
+#elif XD3_POSIX
+#define XFNO(f) f->file
+#define XSTDOUT_XF(f) \
+  { (f)->file = STDOUT_FILENO; (f)->filename = "/dev/stdout"; }
+#define XSTDIN_XF(f) \
+  { (f)->file = STDIN_FILENO;  (f)->filename = "/dev/stdin"; }
+
+#elif XD3_WIN32
+#define XFNO(f) -1
+#define XSTDOUT_XF(f) { \
+  (f)->file = GetStdHandle(STD_OUTPUT_HANDLE); \
+  (f)->filename = "(stdout)"; \
+  }
+#define XSTDIN_XF(f) { \
+  (f)->file = GetStdHandle(STD_INPUT_HANDLE); \
+  (f)->filename = "(stdin)"; \
+  }
+#endif
+
+void
+main_file_init (main_file *xfile)
+{
+  memset (xfile, 0, sizeof (*xfile));
+
+#if XD3_POSIX
+  xfile->file = -1;
+#endif
+#if XD3_WIN32
+  xfile->file = INVALID_HANDLE_VALUE;
+#endif
+}
+
+int
+main_file_isopen (main_file *xfile)
+{
+#if XD3_STDIO
+  return xfile->file != NULL;
+
+#elif XD3_POSIX
+  return xfile->file != -1;
+
+#elif XD3_WIN32
+  return xfile->file != INVALID_HANDLE_VALUE;
+#endif
+}
+
+int
+main_file_close (main_file *xfile)
+{
+  int ret = 0;
+
+  if (! main_file_isopen (xfile))
+    {
+      return 0;
+    }
+
+#if XD3_STDIO
+  ret = fclose (xfile->file);
+  xfile->file = NULL;
+
+#elif XD3_POSIX
+  ret = close (xfile->file);
+  xfile->file = -1;
+
+#elif XD3_WIN32
+  if (!CloseHandle(xfile->file)) {
+    ret = get_errno ();
+  }
+  xfile->file = INVALID_HANDLE_VALUE;
+#endif
+
+  if (ret != 0) { XF_ERROR ("close", xfile->filename, ret = get_errno ()); }
+  return ret;
+}
+
+void
+main_file_cleanup (main_file *xfile)
+{
+  XD3_ASSERT (xfile != NULL);
+
+  if (main_file_isopen (xfile))
+    {
+      main_file_close (xfile);
+    }
+
+  if (xfile->snprintf_buf != NULL)
+    {
+      main_free(xfile->snprintf_buf);
+      xfile->snprintf_buf = NULL;
+    }
+
+  if (xfile->filename_copy != NULL)
+    {
+      main_free(xfile->filename_copy);
+      xfile->filename_copy = NULL;
+    }
+}
+
+int
+main_file_open (main_file *xfile, const char* name, int mode)
+{
+  int ret = 0;
+
+  xfile->mode = mode;
+
+  XD3_ASSERT (name != NULL);
+  XD3_ASSERT (! main_file_isopen (xfile));
+  if (name[0] == 0)
+    {
+      XPR(NT "invalid file name: empty string\n");
+      return XD3_INVALID;
+    }
+
+  IF_DEBUG1(DP(RINT "[main] open source %s\n", name));
+
+#if XD3_STDIO
+  xfile->file = fopen (name, XOPEN_STDIO);
+
+  ret = (xfile->file == NULL) ? get_errno () : 0;
+
+#elif XD3_POSIX
+  /* TODO: Should retry this call if interrupted, similar to read/write */
+  if ((ret = open (name, XOPEN_POSIX, XOPEN_MODE)) < 0)
+    {
+      ret = get_errno ();
+    }
+  else
+    {
+      xfile->file = ret;
+      ret = 0;
+    }
+
+#elif XD3_WIN32
+  xfile->file = CreateFile(name,
+			   (mode == XO_READ) ? GENERIC_READ : GENERIC_WRITE,
+			   FILE_SHARE_READ,
+			   NULL,
+			   (mode == XO_READ) ?
+			   OPEN_EXISTING :
+			   (option_force ? CREATE_ALWAYS : CREATE_NEW),
+			   FILE_ATTRIBUTE_NORMAL,
+			   NULL);
+  if (xfile->file == INVALID_HANDLE_VALUE)
+    {
+      ret = get_errno ();
+    }
+#endif
+  if (ret) { XF_ERROR ("open", name, ret); }
+  else     { xfile->realname = name; xfile->nread = 0; }
+  return ret;
+}
+
+int
+main_file_stat (main_file *xfile, xoff_t *size)
+{
+  int ret = 0;
+#if XD3_WIN32
+  if (GetFileType(xfile->file) != FILE_TYPE_DISK)
+    {
+      return -1;
+    }
+# if (_WIN32_WINNT >= 0x0500)
+  {
+    LARGE_INTEGER li;
+    if (GetFileSizeEx(xfile->file, &li) == 0)
+      {
+	return get_errno ();
+      }
+    *size = li.QuadPart;
+  }
+# else
+  {
+    DWORD filesize = GetFileSize(xfile->file, NULL);
+    if (filesize == INVALID_FILE_SIZE)
+      {
+	return get_errno ()
+      }
+    *size = filesize;
+  }
+# endif
+#else
+  struct stat sbuf;
+  if (fstat (XFNO (xfile), & sbuf) < 0)
+    {
+      ret = get_errno ();
+      return ret;
+    }
+
+  if (! S_ISREG (sbuf.st_mode))
+    {
+      return ESPIPE;
+    }
+  (*size) = sbuf.st_size;
+#endif
+  return ret;
+}
+
+int
+main_file_exists (main_file *xfile)
+{
+  struct stat sbuf;
+  return stat (xfile->filename, & sbuf) == 0 && S_ISREG (sbuf.st_mode);
+}
+
+#if (XD3_POSIX || EXTERNAL_COMPRESSION)
+/* POSIX-generic code takes a function pointer to read() or write().
+ * This calls the function repeatedly until the buffer is full or EOF.
+ * The NREAD parameter is not set for write, NULL is passed.  Return
+ * is signed, < 0 indicate errors, otherwise byte count. */
+typedef int (xd3_posix_func) (int fd, uint8_t *buf, usize_t size);
+
+static int
+xd3_posix_io (int fd, uint8_t *buf, size_t size,
+	      xd3_posix_func *func, size_t *nread)
+{
+  int ret;
+  size_t nproc = 0;
+
+  while (nproc < size)
+    {
+      size_t tryread = xd3_min(size - nproc, 1U << 30);
+      ssize_t result = (*func) (fd, buf + nproc, tryread);
+
+      if (result < 0)
+	{
+	  ret = get_errno ();
+	  if (ret != EAGAIN && ret != EINTR)
+	    {
+	      return ret;
+	    }
+	  continue;
+	}
+
+      if (nread != NULL && result == 0) { break; }
+
+      nproc += result;
+    }
+  if (nread != NULL) { (*nread) = nproc; }
+  return 0;
+}
+#endif
+
+#if XD3_WIN32
+static int
+xd3_win32_io (HANDLE file, uint8_t *buf, size_t size,
+	      int is_read, size_t *nread)
+{
+  int ret = 0;
+  size_t nproc = 0;
+
+  while (nproc < size)
+    {
+      DWORD nproc2 = 0;  /* hmm */
+	  DWORD nremain = size - nproc;
+      if ((is_read ?
+	   ReadFile (file, buf + nproc, nremain, &nproc2, NULL) :
+	   WriteFile (file, buf + nproc, nremain, &nproc2, NULL)) == 0)
+	{
+	  ret = get_errno();
+	  if (ret != ERROR_HANDLE_EOF && ret != ERROR_BROKEN_PIPE)
+	    {
+	      return ret;
+	    }
+	  /* By falling through here, we'll break this loop in the
+	   * read case in case of eof or broken pipe. */
+	}
+
+      nproc += nproc2;
+
+      if (nread != NULL && nproc2 == 0) { break; }
+    }
+  if (nread != NULL) { (*nread) = nproc; }
+  return 0;
+}
+#endif
+
+/* POSIX is unbuffered, while STDIO is buffered.  main_file_read()
+ * should always be called on blocks. */
+int
+main_file_read (main_file  *ifile,
+		uint8_t    *buf,
+		size_t      size,
+		size_t     *nread,
+		const char *msg)
+{
+  int ret = 0;
+  IF_DEBUG1(DP(RINT "[main] read %s up to %"Z"u\n", ifile->filename, size));
+
+#if XD3_STDIO
+  size_t result;
+
+  result = fread (buf, 1, size, ifile->file);
+
+  if (result < size && ferror (ifile->file))
+    {
+      ret = get_errno ();
+    }
+  else
+    {
+      *nread = result;
+    }
+
+#elif XD3_POSIX
+  ret = xd3_posix_io (ifile->file, buf, size, (xd3_posix_func*) &read, nread);
+#elif XD3_WIN32
+  ret = xd3_win32_io (ifile->file, buf, size, 1 /* is_read */, nread);
+#endif
+
+  if (ret)
+    {
+      XPR(NT "%s: %s: %s\n", msg, ifile->filename, xd3_mainerror (ret));
+    }
+  else
+    {
+      if (option_verbose > 4) { XPR(NT "read %s: %"Z"u bytes\n",
+				    ifile->filename, (*nread)); }
+      ifile->nread += (*nread);
+    }
+
+  return ret;
+}
+
+int
+main_file_write (main_file *ofile, uint8_t *buf, usize_t size, const char *msg)
+{
+  int ret = 0;
+
+  IF_DEBUG1(DP(RINT "[main] write %"W"u\n bytes", size));
+  
+#if XD3_STDIO
+  usize_t result;
+
+  result = fwrite (buf, 1, size, ofile->file);
+
+  if (result != size) { ret = get_errno (); }
+
+#elif XD3_POSIX
+  ret = xd3_posix_io (ofile->file, buf, size, (xd3_posix_func*) &write, NULL);
+
+#elif XD3_WIN32
+  ret = xd3_win32_io (ofile->file, buf, size, 0, NULL);
+
+#endif
+
+  if (ret)
+    {
+      XPR(NT "%s: %s: %s\n", msg, ofile->filename, xd3_mainerror (ret));
+    }
+  else
+    {
+      if (option_verbose > 5) { XPR(NT "write %s: %"W"u bytes\n",
+				    ofile->filename, size); }
+      ofile->nwrite += size;
+    }
+
+  return ret;
+}
+
+static int
+main_file_seek (main_file *xfile, xoff_t pos)
+{
+  int ret = 0;
+
+#if XD3_STDIO
+  if (fseek (xfile->file, pos, SEEK_SET) != 0) { ret = get_errno (); }
+
+#elif XD3_POSIX
+  if ((xoff_t) lseek (xfile->file, pos, SEEK_SET) != pos)
+    { ret = get_errno (); }
+
+#elif XD3_WIN32
+# if (_WIN32_WINNT >= 0x0500)
+  LARGE_INTEGER move, out;
+  move.QuadPart = pos;
+  if (SetFilePointerEx(xfile->file, move, &out, FILE_BEGIN) == 0)
+    {
+      ret = get_errno ();
+    }
+# else
+  if (SetFilePointer(xfile->file, (LONG)pos, NULL, FILE_BEGIN) ==
+      INVALID_SET_FILE_POINTER)
+    {
+      ret = get_errno ();
+    }
+# endif
+#endif
+
+  return ret;
+}
+
+/* This function simply writes the stream output buffer, if there is
+ * any, for encode, decode and recode commands.  (The VCDIFF tools use
+ * main_print_func()). */
+static int
+main_write_output (xd3_stream* stream, main_file *ofile)
+{
+  int ret;
+
+  IF_DEBUG1(DP(RINT "[main] write(%s) %"W"u\n bytes", ofile->filename, stream->avail_out));
+
+  if (option_no_output)
+    {
+      return 0;
+    }
+
+  if (stream->avail_out > 0 &&
+      (ret = main_file_write (ofile, stream->next_out,
+			      stream->avail_out, "write failed")))
+    {
+      return ret;
+    }
+
+  return 0;
+}
+
+static int
+main_set_secondary_flags (xd3_config *config)
+{
+  int ret;
+  if (!option_use_secondary)
+    {
+      return 0;
+    }
+  if (option_secondary == NULL)
+    {
+      /* Set a default secondary compressor if LZMA is built in, otherwise
+       * default to no secondary compressor. */
+      if (SECONDARY_LZMA)
+	{
+	  config->flags |= XD3_SEC_LZMA;
+	}
+    }
+  else
+    {
+      if (strcmp (option_secondary, "lzma") == 0 && SECONDARY_LZMA)
+	{
+	  config->flags |= XD3_SEC_LZMA;
+	}
+      else if (strcmp (option_secondary, "fgk") == 0 && SECONDARY_FGK)
+	{
+	  config->flags |= XD3_SEC_FGK;
+	}
+      else if (strncmp (option_secondary, "djw", 3) == 0 && SECONDARY_DJW)
+	{
+	  usize_t level = XD3_DEFAULT_SECONDARY_LEVEL;
+
+	  config->flags |= XD3_SEC_DJW;
+
+	  if (strlen (option_secondary) > 3 &&
+	      (ret = main_atou (option_secondary + 3,
+				&level,
+				0, 9, 'S')) != 0 &&
+	      !option_quiet)
+	    {
+	      return XD3_INVALID;
+	    }
+
+	  /* XD3_SEC_NOXXXX flags disable secondary compression on
+	   * a per-section basis.  For djw, ngroups=1 indicates
+	   * minimum work, ngroups=0 uses default settings, which
+	   * is > 1 groups by default. */
+	  if (level < 1) { config->flags |= XD3_SEC_NODATA; }
+	  if (level < 7) { config->sec_data.ngroups = 1; }
+	  else { config->sec_data.ngroups = 0; }
+
+	  if (level < 3) { config->flags |= XD3_SEC_NOINST; }
+	  if (level < 8) { config->sec_inst.ngroups = 1; }
+	  else { config->sec_inst.ngroups = 0; }
+
+	  if (level < 5) { config->flags |= XD3_SEC_NOADDR; }
+	  if (level < 9) { config->sec_addr.ngroups = 1; }
+	  else { config->sec_addr.ngroups = 0; }
+	}
+      else if (*option_secondary == 0 ||
+	       strcmp (option_secondary, "none") == 0)
+	{
+	}
+      else 
+	{
+	  if (!option_quiet)
+	    {
+	      XPR(NT "unrecognized or not compiled secondary compressor: %s\n",
+		  option_secondary);
+	    }
+	  return XD3_INVALID;
+	}
+    }
+
+  if (option_verbose)
+    {
+      XPR(NT "secondary compression: %s\n",
+	  (config->flags | XD3_SEC_LZMA) ? "lzma" :
+	  ((config->flags | XD3_SEC_FGK) ? "fgk" :
+	   ((config->flags | XD3_SEC_DJW) ? "djw" :
+	    "none")));
+    }
+
+  return 0;
+}
+
+/******************************************************************
+ VCDIFF TOOLS
+ *****************************************************************/
+
+#include "xdelta3-merge.h"
+
+#if VCDIFF_TOOLS
+
+/* The following macros let VCDIFF print using main_file_write(),
+ * for example:
+ *
+ *   VC(UT "trying to be portable: %d\n", x)VE;
+ */
+#define SNPRINTF_BUFSIZE 1024
+#define VC do { if (((ret = xsnprintf_func
+#define UT (char*)xfile->snprintf_buf, SNPRINTF_BUFSIZE,
+#define VE ) >= SNPRINTF_BUFSIZE			       \
+  && (ret = main_print_overflow(ret)) != 0)		       \
+  || (ret = main_file_write(xfile, xfile->snprintf_buf,        \
+			    (usize_t)ret, "print")) != 0)      \
+  { return ret; } } while (0)
+
+static int
+main_print_overflow (int x)
+{
+  XPR(NT "internal print buffer overflow: %d bytes\n", x);
+  return XD3_INTERNAL;
+}
+
+/* This function prints a single VCDIFF window. */
+static int
+main_print_window (xd3_stream* stream, main_file *xfile)
+{
+  int ret;
+  usize_t size = 0;
+
+  VC(UT "  Offset Code Type1 Size1  @Addr1 + Type2 Size2 @Addr2\n")VE;
+
+  while (stream->inst_sect.buf < stream->inst_sect.buf_max)
+    {
+      usize_t code = stream->inst_sect.buf[0];
+      const uint8_t *addr_before = stream->addr_sect.buf;
+      const uint8_t *inst_before = stream->inst_sect.buf;
+      usize_t addr_bytes;
+      usize_t inst_bytes;
+      usize_t size_before = size;
+
+      if ((ret = xd3_decode_instruction (stream)))
+	{
+	  XPR(NT "instruction decode error at %"Q"u: %s\n",
+	      stream->dec_winstart + size, stream->msg);
+	  return ret;
+	}
+
+      addr_bytes = (usize_t)(stream->addr_sect.buf - addr_before);
+      inst_bytes = (usize_t)(stream->inst_sect.buf - inst_before);
+
+      VC(UT "  %06"Q"u %03"W"u  %s %6"W"u", 
+	 stream->dec_winstart + size,
+	 option_print_cpymode ? code : 0,
+	 xd3_rtype_to_string ((xd3_rtype) stream->dec_current1.type,
+			      option_print_cpymode),
+	 stream->dec_current1.size)VE;
+
+      if (stream->dec_current1.type != XD3_NOOP)
+	{
+	  if (stream->dec_current1.type >= XD3_CPY)
+	    {
+	      if (stream->dec_current1.addr >= stream->dec_cpylen)
+		{
+		  VC(UT " T@%-6"W"u",
+		     stream->dec_current1.addr - stream->dec_cpylen)VE;
+		}
+	      else
+		{
+		  VC(UT " S@%-6"Q"u",
+		     stream->dec_cpyoff + stream->dec_current1.addr)VE;
+		}
+	    }
+	  else
+	    {
+	      VC(UT "        ")VE;
+	    }
+
+	  size += stream->dec_current1.size;
+	}
+
+      if (stream->dec_current2.type != XD3_NOOP)
+	{
+	  VC(UT "  %s %6"W"u",
+	     xd3_rtype_to_string ((xd3_rtype) stream->dec_current2.type,
+				  option_print_cpymode),
+	     stream->dec_current2.size)VE;
+
+	  if (stream->dec_current2.type >= XD3_CPY)
+	    {
+	      if (stream->dec_current2.addr >= stream->dec_cpylen)
+		{
+		  VC(UT " T@%-6"W"u",
+		     stream->dec_current2.addr - stream->dec_cpylen)VE;
+		}
+	      else
+		{
+		  VC(UT " S@%-6"Q"u",
+		     stream->dec_cpyoff + stream->dec_current2.addr)VE;
+		}
+	    }
+
+	  size += stream->dec_current2.size;
+	}
+
+      VC(UT "\n")VE;
+
+      if (option_verbose &&
+	  addr_bytes + inst_bytes >= (size - size_before) &&
+	  (stream->dec_current1.type >= XD3_CPY ||
+	   stream->dec_current2.type >= XD3_CPY))
+	{
+	  VC(UT "  %06"Q"u (inefficiency) %"W"u encoded as %"W"u bytes\n",
+	     stream->dec_winstart + size_before,
+	     size - size_before,
+	     addr_bytes + inst_bytes)VE;
+	}
+    }
+
+  if (stream->dec_tgtlen != size && (stream->flags & XD3_SKIP_WINDOW) == 0)
+    {
+      XPR(NT "target window size inconsistency");
+      return XD3_INTERNAL;
+    }
+
+  if (stream->dec_position != stream->dec_maxpos)
+    {
+      XPR(NT "target window position inconsistency");
+      return XD3_INTERNAL;
+    }
+
+  if (stream->addr_sect.buf != stream->addr_sect.buf_max)
+    {
+      XPR(NT "address section inconsistency");
+      return XD3_INTERNAL;
+    }
+
+  return 0;
+}
+
+static int
+main_print_vcdiff_file (main_file *xfile, main_file *file, const char *type)
+{
+  int ret;  /* Used by above macros */
+  if (file->filename)
+    {
+      VC(UT "XDELTA filename (%s):     %s\n", type,
+	 file->filename)VE;
+    }
+  if (file->compressor)
+    {
+      VC(UT "XDELTA ext comp (%s):     %s\n", type,
+	 file->compressor->recomp_cmdname)VE;
+    }
+  return 0;
+}
+
+/* This function prints a VCDIFF input, mainly for debugging purposes. */
+static int
+main_print_func (xd3_stream* stream, main_file *xfile)
+{
+  int ret;
+
+  if (option_no_output)
+    {
+      return 0;
+    }
+
+  if (xfile->snprintf_buf == NULL)
+    {
+      if ((xfile->snprintf_buf =
+	   (uint8_t*)main_malloc(SNPRINTF_BUFSIZE)) == NULL)
+	{
+	  return ENOMEM;
+	}
+    }
+
+  if (stream->dec_winstart == 0)
+    {
+      VC(UT "VCDIFF version:               0\n")VE;
+      VC(UT "VCDIFF header size:           %"W"u\n",
+	 stream->dec_hdrsize)VE;
+      VC(UT "VCDIFF header indicator:      ")VE;
+      if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
+	VC(UT "VCD_SECONDARY ")VE;
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0)
+	VC(UT "VCD_CODETABLE ")VE;
+      if ((stream->dec_hdr_ind & VCD_APPHEADER) != 0)
+	VC(UT "VCD_APPHEADER ")VE;
+      if (stream->dec_hdr_ind == 0)
+	VC(UT "none")VE;
+      VC(UT "\n")VE;
+
+      IF_SEC(VC(UT "VCDIFF secondary compressor:  %s\n",
+		stream->sec_type ? stream->sec_type->name : "none")VE);
+      IF_NSEC(VC(UT "VCDIFF secondary compressor: unsupported\n")VE);
+
+      if (stream->dec_hdr_ind & VCD_APPHEADER)
+	{
+	  uint8_t *apphead;
+	  usize_t appheadsz;
+	  ret = xd3_get_appheader (stream, & apphead, & appheadsz);
+
+	  if (ret == 0 && appheadsz > 0)
+	    {
+	      int sq = option_quiet;
+	      main_file i, o, s;
+	      XD3_ASSERT (apphead != NULL);
+	      VC(UT "VCDIFF application header:    ")VE;
+	      if ((ret = main_file_write (xfile, apphead,
+					  appheadsz, "print")) != 0)
+		{ return ret; }
+	      VC(UT "\n")VE;
+
+	      main_file_init (& i);
+	      main_file_init (& o);
+	      main_file_init (& s);
+	      option_quiet = 1;
+	      main_get_appheader (stream, &i, & o, & s);
+	      option_quiet = sq;
+	      if ((ret = main_print_vcdiff_file (xfile, & o, "output")))
+		{ return ret; }
+	      if ((ret = main_print_vcdiff_file (xfile, & s, "source")))
+		{ return ret; }
+	      main_file_cleanup (& i);
+	      main_file_cleanup (& o);
+	      main_file_cleanup (& s);
+	    }
+	}
+    }
+  else
+    {
+      VC(UT "\n")VE;
+    }
+
+  VC(UT "VCDIFF window number:         %"Q"u\n", stream->current_window)VE;
+  VC(UT "VCDIFF window indicator:      ")VE;
+  if ((stream->dec_win_ind & VCD_SOURCE) != 0) VC(UT "VCD_SOURCE ")VE;
+  if ((stream->dec_win_ind & VCD_TARGET) != 0) VC(UT "VCD_TARGET ")VE;
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0) VC(UT "VCD_ADLER32 ")VE;
+  if (stream->dec_win_ind == 0) VC(UT "none")VE;
+  VC(UT "\n")VE;
+
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0)
+    {
+      VC(UT "VCDIFF adler32 checksum:      %08X\n",
+	 stream->dec_adler32)VE;
+    }
+
+  if (stream->dec_del_ind != 0)
+    {
+      VC(UT "VCDIFF delta indicator:       ")VE;
+      if ((stream->dec_del_ind & VCD_DATACOMP) != 0) VC(UT "VCD_DATACOMP ")VE;
+      if ((stream->dec_del_ind & VCD_INSTCOMP) != 0) VC(UT "VCD_INSTCOMP ")VE;
+      if ((stream->dec_del_ind & VCD_ADDRCOMP) != 0) VC(UT "VCD_ADDRCOMP ")VE;
+      if (stream->dec_del_ind == 0) VC(UT "none")VE;
+      VC(UT "\n")VE;
+    }
+
+  if (stream->dec_winstart != 0)
+    {
+      VC(UT "VCDIFF window at offset:      %"Q"u\n", stream->dec_winstart)VE;
+    }
+
+  if (SRCORTGT (stream->dec_win_ind))
+    {
+      VC(UT "VCDIFF copy window length:    %"W"u\n",
+	 stream->dec_cpylen)VE;
+      VC(UT "VCDIFF copy window offset:    %"Q"u\n",
+	 stream->dec_cpyoff)VE;
+    }
+
+  VC(UT "VCDIFF delta encoding length: %"W"u\n",
+     (usize_t)stream->dec_enclen)VE;
+  VC(UT "VCDIFF target window length:  %"W"u\n",
+     (usize_t)stream->dec_tgtlen)VE;
+
+  VC(UT "VCDIFF data section length:   %"W"u\n",
+     (usize_t)stream->data_sect.size)VE;
+  VC(UT "VCDIFF inst section length:   %"W"u\n",
+     (usize_t)stream->inst_sect.size)VE;
+  VC(UT "VCDIFF addr section length:   %"W"u\n",
+     (usize_t)stream->addr_sect.size)VE;
+
+  ret = 0;
+  if ((stream->flags & XD3_JUST_HDR) != 0)
+    {
+      /* Print a header -- finished! */
+      ret = PRINTHDR_SPECIAL;
+    }
+  else if ((stream->flags & XD3_SKIP_WINDOW) == 0)
+    {
+      ret = main_print_window (stream, xfile);
+    }
+
+  return ret;
+}
+
+static int
+main_recode_copy (xd3_stream* stream,
+		  xd3_output* output,
+		  xd3_desect* input)
+{
+  int ret;
+
+  XD3_ASSERT(output != NULL);
+  XD3_ASSERT(output->next_page == NULL);
+
+  if ((ret = xd3_decode_allocate (recode_stream,
+				  input->size,
+				  &output->base,
+				  &output->avail)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (stream, ret));
+      return ret;
+    }
+
+  memcpy (output->base,
+	  /* Note: decoder advances buf, so get base of buffer with
+	   * buf_max - size */
+	  input->buf_max - input->size,
+	  input->size);
+  output->next = input->size;
+  return 0;
+}
+
+// Re-encode one window
+static int
+main_recode_func (xd3_stream* stream, main_file *ofile)
+{
+  int ret;
+  xd3_source decode_source;
+
+  XD3_ASSERT(stream->dec_state == DEC_FINISH);
+  XD3_ASSERT(recode_stream->enc_state == ENC_INIT ||
+	     recode_stream->enc_state == ENC_INPUT);
+
+  // Copy partial decoder output to partial encoder inputs
+  if ((ret = main_recode_copy (recode_stream,
+			       DATA_HEAD(recode_stream),
+			       &stream->data_sect)) ||
+      (ret = main_recode_copy (recode_stream,
+			       INST_HEAD(recode_stream),
+			       &stream->inst_sect)) ||
+      (ret = main_recode_copy (recode_stream,
+			       ADDR_HEAD(recode_stream),
+			       &stream->addr_sect)))
+    {
+      return ret;
+    }
+
+  // This jumps to xd3_emit_hdr()
+  recode_stream->enc_state = ENC_FLUSH;
+  recode_stream->avail_in = stream->dec_tgtlen;
+
+  if (SRCORTGT (stream->dec_win_ind))
+    {
+      recode_stream->src = & decode_source;
+      decode_source.srclen = stream->dec_cpylen;
+      decode_source.srcbase = stream->dec_cpyoff;
+    }
+
+  if (option_use_checksum &&
+      (stream->dec_win_ind & VCD_ADLER32) != 0)
+    {
+      recode_stream->flags |= XD3_ADLER32_RECODE;
+      recode_stream->recode_adler32 = stream->dec_adler32;
+    }
+
+  if (option_use_appheader != 0 &&
+      option_appheader != NULL)
+    {
+      xd3_set_appheader (recode_stream, option_appheader,
+			 (usize_t) strlen ((char*) option_appheader));
+    }
+  else if (option_use_appheader != 0 &&
+	   option_appheader == NULL)
+    {
+      if (stream->dec_appheader != NULL)
+	{
+	  xd3_set_appheader (recode_stream,
+			     stream->dec_appheader, stream->dec_appheadsz);
+	}
+    }
+
+  // Output loop
+  for (;;)
+    {
+      switch((ret = xd3_encode_input (recode_stream)))
+	{
+	case XD3_INPUT: {
+	  /* finished recoding one window */
+	  stream->total_out = recode_stream->total_out;
+	  return 0;
+	}
+	case XD3_OUTPUT: {
+	  /* main_file_write below */
+	  break;
+	}
+	case XD3_GOTHEADER:
+	case XD3_WINSTART:
+	case XD3_WINFINISH: {
+	  /* ignore */
+	  continue;
+	}
+	case XD3_GETSRCBLK:
+	case 0: {
+	    return XD3_INTERNAL;
+	  }
+	default:
+	  return ret;
+	}
+
+      if ((ret = main_write_output (recode_stream, ofile)))
+	{
+	  return ret;
+	}
+
+      xd3_consume_output (recode_stream);
+    }
+}
+#endif /* VCDIFF_TOOLS */
+
+/*******************************************************************
+ VCDIFF merging
+ ******************************************************************/
+
+#if VCDIFF_TOOLS
+/* Modifies static state. */
+static int
+main_init_recode_stream (void)
+{
+  int ret;
+  int stream_flags = XD3_ADLER32_NOVER | XD3_SKIP_EMIT;
+  int recode_flags;
+  xd3_config recode_config;
+
+  XD3_ASSERT (recode_stream == NULL);
+
+  if ((recode_stream = (xd3_stream*) main_malloc(sizeof(xd3_stream))) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  recode_flags = (stream_flags & XD3_SEC_TYPE);
+
+  recode_config.alloc = main_alloc;
+  recode_config.freef = main_free1;
+
+  xd3_init_config(&recode_config, recode_flags);
+
+  if ((ret = main_set_secondary_flags (&recode_config)) ||
+      (ret = xd3_config_stream (recode_stream, &recode_config)) ||
+      (ret = xd3_encode_init_partial (recode_stream)) ||
+      (ret = xd3_whole_state_init (recode_stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (recode_stream, ret));
+      xd3_free_stream (recode_stream);
+      recode_stream = NULL;
+      return ret;
+    }
+
+  return 0;
+}
+
+/* This processes the sequence of -m arguments.  The final input
+ * is processed as part of the ordinary main_input() loop. */
+static int
+main_merge_arguments (main_merge_list* merges)
+{
+  int ret = 0;
+  int count = 0;
+  main_merge *merge = NULL;
+  xd3_stream merge_input;
+
+  if (main_merge_list_empty (merges))
+    {
+      return 0;
+    }
+
+  if ((ret = xd3_config_stream (& merge_input, NULL)) ||
+      (ret = xd3_whole_state_init (& merge_input)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& merge_input, ret));
+      return ret;
+    }
+
+  merge = main_merge_list_front (merges);
+  while (!main_merge_list_end (merges, merge))
+    {
+      main_file mfile;
+      main_file_init (& mfile);
+      mfile.filename = merge->filename;
+      mfile.flags = RD_NONEXTERNAL;
+
+      if ((ret = main_file_open (& mfile, merge->filename, XO_READ)))
+        {
+          goto error;
+        }
+
+      ret = main_input (CMD_MERGE_ARG, & mfile, NULL, NULL);
+
+      if (ret == 0)
+	{
+	  if (count++ == 0)
+	    {
+	      /* The first merge source is the next merge input. */
+	      xd3_swap_whole_state (& recode_stream->whole_target,
+				    & merge_input.whole_target);
+	    }
+	  else
+	    {
+	      /* Merge the recode_stream with merge_input. */
+	      ret = xd3_merge_input_output (recode_stream,
+					    & merge_input.whole_target);
+
+	      /* Save the next merge source in merge_input. */
+	      xd3_swap_whole_state (& recode_stream->whole_target,
+				    & merge_input.whole_target);
+	    }
+	}
+
+      main_file_cleanup (& mfile);
+
+      if (recode_stream != NULL)
+        {
+          xd3_free_stream (recode_stream);
+          main_free (recode_stream);
+          recode_stream = NULL;
+        }
+
+      if (main_bdata != NULL)
+        {
+          main_buffree (main_bdata);
+          main_bdata = NULL;
+	  main_bsize = 0;
+        }
+
+      if (ret != 0)
+        {
+	  goto error;
+        }
+
+      merge = main_merge_list_next (merge);
+    }
+
+  XD3_ASSERT (merge_stream == NULL);
+
+  if ((merge_stream = (xd3_stream*) main_malloc (sizeof(xd3_stream))) == NULL)
+    {
+      ret = ENOMEM;
+      goto error;
+    }
+
+  if ((ret = xd3_config_stream (merge_stream, NULL)) ||
+      (ret = xd3_whole_state_init (merge_stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& merge_input, ret));
+      goto error;
+    }
+
+  xd3_swap_whole_state (& merge_stream->whole_target,
+			& merge_input.whole_target);
+  ret = 0;
+ error:
+  xd3_free_stream (& merge_input);
+  return ret;
+}
+
+/* This processes each window of the final merge input.  This routine
+ * does not output, it buffers the entire delta into memory. */
+static int
+main_merge_func (xd3_stream* stream, main_file *no_write)
+{
+  int ret;
+
+  if ((ret = xd3_whole_append_window (stream)))
+    {
+      return ret;
+    }
+
+  return 0;
+}
+
+
+/* This is called after all windows have been read, as a final step in
+ * main_input().  This is only called for the final merge step. */
+static int
+main_merge_output (xd3_stream *stream, main_file *ofile)
+{
+  int ret;
+  usize_t inst_pos = 0;
+  xoff_t output_pos = 0;
+  xd3_source recode_source;
+  usize_t window_num = 0;
+  int at_least_once = 0;
+
+  /* merge_stream is set if there were arguments.  this stream's input
+   * needs to be applied to the merge_stream source. */
+  if ((merge_stream != NULL) &&
+      (ret = xd3_merge_input_output (stream,
+				     & merge_stream->whole_target)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (stream, ret));
+      return ret;
+    }
+
+  if (option_use_appheader != 0 &&
+      option_appheader != NULL)
+    {
+      xd3_set_appheader (recode_stream, option_appheader,
+			 (usize_t) strlen ((char*) option_appheader));
+    }
+
+  /* Enter the ENC_INPUT state and bypass the next_in == NULL test
+   * and (leftover) input buffering logic. */
+  XD3_ASSERT(recode_stream->enc_state == ENC_INIT);
+  recode_stream->enc_state = ENC_INPUT;
+  recode_stream->next_in = main_bdata;
+  recode_stream->flags |= XD3_FLUSH;
+
+  /* This encodes the entire target. */
+  while (inst_pos < stream->whole_target.instlen || !at_least_once)
+    {
+      xoff_t window_start = output_pos;
+      int window_srcset = 0;
+      xoff_t window_srcmin = 0;
+      xoff_t window_srcmax = 0;
+      usize_t window_pos = 0;
+      usize_t window_size;
+
+      /* at_least_once ensures that we encode at least one window,
+       * which handles the 0-byte case. */
+      at_least_once = 1;
+
+      XD3_ASSERT (recode_stream->enc_state == ENC_INPUT);
+
+      if ((ret = xd3_encode_input (recode_stream)) != XD3_WINSTART)
+	{
+	  XPR(NT "invalid merge state: %s\n", xd3_mainerror (ret));
+	  return XD3_INVALID;
+	}
+
+      /* Window sizes must match from the input to the output, so that
+       * target copies are in-range (and so that checksums carry
+       * over). */
+      XD3_ASSERT (window_num < stream->whole_target.wininfolen);
+      window_size = stream->whole_target.wininfo[window_num].length;
+
+      /* Output position should also match. */
+      if (output_pos != stream->whole_target.wininfo[window_num].offset)
+	{
+	  XPR(NT "internal merge error: offset mismatch\n");
+	  return XD3_INVALID;
+	}
+
+      if (option_use_checksum &&
+	  (stream->dec_win_ind & VCD_ADLER32) != 0)
+	{
+	  recode_stream->flags |= XD3_ADLER32_RECODE;
+	  recode_stream->recode_adler32 =
+	    stream->whole_target.wininfo[window_num].adler32;
+	}
+
+      window_num++;
+
+      if (main_bsize < window_size)
+	{
+	  main_buffree (main_bdata);
+	  main_bdata = NULL;
+	  main_bsize = 0;
+	  if ((main_bdata = (uint8_t*)
+	       main_bufalloc (window_size)) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+	  main_bsize = window_size;
+	}
+
+      /* This encodes a single target window. */
+      while (window_pos < window_size &&
+	     inst_pos < stream->whole_target.instlen)
+	{
+	  xd3_winst *inst = &stream->whole_target.inst[inst_pos];
+	  usize_t take = xd3_min(inst->size, window_size - window_pos);
+	  xoff_t addr;
+
+	  switch (inst->type)
+	    {
+	    case XD3_RUN:
+	      if ((ret = xd3_emit_run (recode_stream, window_pos, take,
+				       &stream->whole_target.adds[inst->addr])))
+		{
+		  return ret;
+		}
+	      break;
+
+	    case XD3_ADD:
+	      /* Adds are implicit, put them into the input buffer. */
+	      memcpy (main_bdata + window_pos,
+		      stream->whole_target.adds + inst->addr, take);
+	      break;
+
+	    default: /* XD3_COPY + copy mode */
+	      if (inst->mode != 0)
+		{
+		  if (window_srcset) {
+		    window_srcmin = xd3_min (window_srcmin, inst->addr);
+		    window_srcmax = xd3_max (window_srcmax, inst->addr + take);
+		  } else {
+		    window_srcset = 1;
+		    window_srcmin = inst->addr;
+		    window_srcmax = inst->addr + take;
+		  }
+		  addr = inst->addr;
+		}
+	      else
+		{
+		  XD3_ASSERT (inst->addr >= window_start);
+		  addr = inst->addr - window_start;
+		}
+	      IF_DEBUG2 ({
+		  XPR(NTR "[merge copy] winpos %"W"u take %"W"u "
+		      "addr %"Q"u mode %u\n",
+		      window_pos, take, addr, inst->mode);
+		});
+	      if ((ret = xd3_found_match (recode_stream, window_pos, take,
+					  addr, inst->mode != 0)))
+		{
+		  return ret;
+		}
+	      break;
+	    }
+
+	  window_pos += take;
+	  output_pos += take;
+
+	  if (take == inst->size)
+	    {
+	      inst_pos += 1;
+	    }
+	  else
+	    {
+	      /* Modify the instruction for the next pass. */
+	      if (inst->type != XD3_RUN)
+		{
+		  inst->addr += take;
+		}
+	      inst->size -= take;
+	    }
+	}
+
+      xd3_avail_input (recode_stream, main_bdata, window_pos);
+
+      recode_stream->enc_state = ENC_INSTR;
+
+      if (window_srcset) {
+	recode_stream->srcwin_decided = 1;
+	recode_stream->src = &recode_source;
+	recode_source.srclen = (usize_t)(window_srcmax - window_srcmin);
+	recode_source.srcbase = window_srcmin;
+	recode_stream->taroff = recode_source.srclen;
+
+	XD3_ASSERT (recode_source.srclen != 0);
+      } else {
+	recode_stream->srcwin_decided = 0;
+	recode_stream->src = NULL;
+	recode_stream->taroff = 0;
+      }
+
+      for (;;)
+	{
+	  switch ((ret = xd3_encode_input (recode_stream)))
+	    {
+	    case XD3_INPUT: {
+	      goto done_window;
+	    }
+	    case XD3_OUTPUT: {
+	      /* main_file_write below */
+	      break;
+	    }
+	    case XD3_GOTHEADER:
+	    case XD3_WINSTART:
+	    case XD3_WINFINISH: {
+	      /* ignore */
+	      continue;
+	    }
+	    case XD3_GETSRCBLK:
+	    case 0: {
+	      return XD3_INTERNAL;
+	    }
+	    default:
+	      return ret;
+	    }
+
+	  if ((ret = main_write_output(recode_stream, ofile)))
+	    {
+	      return ret;
+	    }
+
+	  xd3_consume_output (recode_stream);
+	}
+    done_window:
+      (void) 0;
+    }
+
+  return 0;
+}
+#endif
+
+/*******************************************************************
+ Input decompression, output recompression
+ ******************************************************************/
+
+#if EXTERNAL_COMPRESSION
+/* This is tricky POSIX-specific code with lots of fork(), pipe(),
+ * dup(), waitpid(), and exec() business.  Most of this code
+ * originated in PRCS1, which did automatic package-file
+ * decompression.  It works with both XD3_POSIX and XD3_STDIO file
+ * disciplines.
+ *
+ * To automatically detect compressed inputs requires a child process
+ * to reconstruct the input stream, which was advanced in order to
+ * detect compression, because it may not be seekable.  In other
+ * words, the main program reads part of the input stream, and if it
+ * detects a compressed input it then forks a pipe copier process,
+ * which copies the first-read block out of the main-program's memory,
+ * then streams the remaining compressed input into the
+ * input-decompression pipe.
+ */
+
+#include <signal.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+/* Remember which pipe FD is which. */
+#define PIPE_READ_FD  0
+#define PIPE_WRITE_FD 1
+#define MAX_SUBPROCS  4  /* max(source + copier + output,
+			        source + copier + input + copier). */
+static pid_t ext_subprocs[MAX_SUBPROCS];
+
+/* Like write(), applies to a fd instead of a main_file, for the pipe
+ * copier subprocess.  Does not print an error, to facilitate ignoring
+ * trailing garbage, see main_pipe_copier(). */
+static int
+main_pipe_write (int outfd, uint8_t *exist_buf, usize_t remain)
+{
+  int ret;
+
+  if ((ret = xd3_posix_io (outfd, exist_buf, remain,
+			   (xd3_posix_func*) &write, NULL)))
+    {
+      return ret;
+    }
+
+  return 0;
+}
+
+/* A simple error-reporting waitpid interface. */
+static int
+main_waitpid_check(pid_t pid)
+{
+  int status;
+  int ret = 0;
+
+  if (waitpid (pid, & status, 0) < 0)
+    {
+      ret = get_errno ();
+      XPR(NT "external compression [pid %d] wait: %s\n",
+	  pid, xd3_mainerror (ret));
+    }
+  else if (! WIFEXITED (status))
+    {
+      // SIGPIPE will be delivered to the child process whenever it
+      // writes data after this process closes the pipe, 
+      // happens if xdelta does not require access to the entire 
+      // source file.  Considered normal.
+      if (! WIFSIGNALED (status) || WTERMSIG (status) != SIGPIPE) 
+	{
+	  ret = ECHILD;
+	  XPR(NT "external compression [pid %d] signal %d\n", pid, 
+	      WIFSIGNALED (status) ? WTERMSIG (status) : WSTOPSIG (status));
+	}
+      else if (option_verbose)
+	{
+	  XPR(NT "external compression sigpipe\n");
+	}
+    }
+  else if (WEXITSTATUS (status) != 0)
+    {
+      ret = ECHILD;
+      if (option_verbose > 1)
+	{
+	  /* Presumably, the error was printed by the subprocess. */
+	  XPR(NT "external compression [pid %d] exit %d\n",
+	      pid, WEXITSTATUS (status));
+	}
+    }
+
+  return ret;
+}
+
+/* Wait for any existing child processes to check for abnormal exit. */
+static int
+main_external_compression_finish (void)
+{
+  int i;
+  int ret;
+
+  for (i = 0; i < num_subprocs; i += 1)
+    {
+      if (! ext_subprocs[i]) { continue; }
+
+      if ((ret = main_waitpid_check (ext_subprocs[i])))
+	{
+	  return ret;
+	}
+
+      ext_subprocs[i] = 0;
+    }
+
+  return 0;
+}
+
+/* Kills any outstanding compression process. */
+static void
+main_external_compression_cleanup (void)
+{
+  int i;
+
+  for (i = 0; i < num_subprocs; i += 1)
+    {
+      if (! ext_subprocs[i]) { continue; }
+
+      kill (ext_subprocs[i], SIGTERM);
+
+      ext_subprocs[i] = 0;
+    }
+}
+
+/* This runs as a forked process of main_input_decompress_setup() to
+ * copy input to the decompression process.  First, the available
+ * input is copied out of the existing buffer, then the buffer is
+ * reused to continue reading from the compressed input file. */
+static int
+main_pipe_copier (uint8_t     *pipe_buf,
+		  usize_t      pipe_bufsize,
+		  size_t       nread,
+		  main_file   *ifile,
+		  int          outfd)
+{
+  int ret;
+  xoff_t skipped = 0;
+
+  /* Prevent SIGPIPE signals, allow EPIPE return values instead.  This
+   * is safe to comment-out, except that the -F flag will not work
+   * properly (the parent would need to treat WTERMSIG(status) ==
+   * SIGPIPE). */
+  struct sigaction sa;
+  sa.sa_handler = SIG_IGN;
+  sigaction (SIGPIPE, &sa, NULL);
+
+  for (;;)
+    {
+      /* force_drain will be set when option_force and EPIPE cause us
+       * to skip data.  This is reset each time through the loop, so
+       * the break condition below works. */
+      int force_drain = 0;
+      if (nread > 0 && (ret = main_pipe_write (outfd, pipe_buf, nread)))
+	{
+	  if (ret == EPIPE)
+	    {
+	      /* This causes the loop to continue reading until nread
+	       * == 0. */
+	      skipped += nread;
+	      force_drain = 1;
+	    }
+	  else
+	    {
+	      XPR(NT "pipe write failed: %s\n", xd3_mainerror (ret));
+	      return ret;
+	    }
+	}
+
+      if (nread < pipe_bufsize && !force_drain)
+	{
+	  break;
+	}
+
+      if ((ret = main_file_read (ifile, pipe_buf, pipe_bufsize,
+				 & nread, "pipe read failed")) < 0)
+	{
+	  return ret;
+	}
+    }
+
+  if (option_verbose && skipped != 0)
+    {
+      XPR(NT "skipping %"Q"u bytes in %s\n",
+	  skipped, ifile->filename);
+    }
+  return 0;
+}
+
+/* This function is called after we have read some amount of data from
+ * the input file and detected a compressed input.  Here we start a
+ * decompression subprocess by forking twice.  The first process runs
+ * the decompression command, the second process copies data to the
+ * input of the first. */
+static int
+main_input_decompress_setup (const main_extcomp   *decomp,
+			     main_file            *ifile,
+			     uint8_t              *input_buf,
+			     usize_t               input_bufsize,
+			     uint8_t              *pipe_buf,
+			     usize_t               pipe_bufsize,
+			     usize_t               pipe_avail,
+			     size_t               *nread)
+{
+  /* The two pipes: input and output file descriptors. */
+  int outpipefd[2], inpipefd[2];
+  int input_fd = -1;  /* The resulting input_fd (output of decompression). */
+  pid_t decomp_id, copier_id;  /* The two subprocs. */
+  int ret;
+
+  outpipefd[0] = outpipefd[1] = -1;
+  inpipefd[0]  = inpipefd[1]  = -1;
+
+  if (pipe (outpipefd) || pipe (inpipefd))
+    {
+      XPR(NT "pipe failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  if ((decomp_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The first child runs the decompression process: */
+  if (decomp_id == 0)
+    {
+      if (option_verbose > 2)
+	{
+	  XPR(NT "external decompression pid %d\n", getpid ());
+	}
+
+      /* Setup pipes: write to the outpipe, read from the inpipe. */
+      if (dup2 (outpipefd[PIPE_WRITE_FD], STDOUT_FILENO) < 0 ||
+	  dup2 (inpipefd[PIPE_READ_FD], STDIN_FILENO) < 0 ||
+	  close (outpipefd[PIPE_READ_FD]) ||
+	  close (outpipefd[PIPE_WRITE_FD]) ||
+	  close (inpipefd[PIPE_READ_FD]) ||
+	  close (inpipefd[PIPE_WRITE_FD]) ||
+	  execlp (decomp->decomp_cmdname, decomp->decomp_cmdname,
+		  decomp->decomp_options,
+		  option_force2 ? "-f" : NULL,
+		  NULL))
+	{
+	  XPR(NT "child process %s failed to execute: %s\n",
+	      decomp->decomp_cmdname, xd3_mainerror (get_errno ()));
+	}
+
+      _exit (127);
+    }
+
+  XD3_ASSERT(num_subprocs < MAX_SUBPROCS);
+  ext_subprocs[num_subprocs++] = decomp_id;
+
+  if ((copier_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The second child runs the copier process: */
+  if (copier_id == 0)
+    {
+      int exitval = 0;
+
+      if (option_verbose > 2)
+	{
+	  XPR(NT "child pipe-copier pid %d\n", getpid ());
+	}
+
+      if (close (inpipefd[PIPE_READ_FD]) ||
+	  close (outpipefd[PIPE_READ_FD]) ||
+	  close (outpipefd[PIPE_WRITE_FD]) ||
+	  main_pipe_copier (pipe_buf, pipe_bufsize, pipe_avail,
+			    ifile, inpipefd[PIPE_WRITE_FD]) ||
+	  close (inpipefd[PIPE_WRITE_FD]))
+	{
+	  XPR(NT "child copier process failed: %s\n",
+	      xd3_mainerror (get_errno ()));
+	  exitval = 1;
+	}
+
+      _exit (exitval);
+    }
+
+  XD3_ASSERT(num_subprocs < MAX_SUBPROCS);
+  ext_subprocs[num_subprocs++] = copier_id;
+
+  /* The parent closes both pipes after duplicating the output of
+   * compression. */
+  input_fd = dup (outpipefd[PIPE_READ_FD]);
+
+  if (input_fd < 0 ||
+      main_file_close (ifile) ||
+      close (outpipefd[PIPE_READ_FD]) ||
+      close (outpipefd[PIPE_WRITE_FD]) ||
+      close (inpipefd[PIPE_READ_FD]) ||
+      close (inpipefd[PIPE_WRITE_FD]))
+    {
+      XPR(NT "dup/close failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#if XD3_STDIO
+  /* Note: fdopen() acquires the fd, closes it when finished. */
+  if ((ifile->file = fdopen (input_fd, "r")) == NULL)
+    {
+      XPR(NT "fdopen failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#elif XD3_POSIX
+  ifile->file = input_fd;
+#endif
+
+  ifile->compressor = decomp;
+
+  /* Now the input file is decompressed. */
+  return main_file_read (ifile, input_buf, input_bufsize,
+			 nread, "input decompression failed");
+
+ pipe_cleanup:
+  close (input_fd);
+  close (outpipefd[PIPE_READ_FD]);
+  close (outpipefd[PIPE_WRITE_FD]);
+  close (inpipefd[PIPE_READ_FD]);
+  close (inpipefd[PIPE_WRITE_FD]);
+  return ret;
+}
+
+
+/* This routine is called when the first buffer of input data is read
+ * by the main program (unless input decompression is disabled by
+ * command-line option).  If it recognizes the magic number of a known
+ * input type it invokes decompression.
+ *
+ * Skips decompression if the decompression type or the file type is
+ * RD_NONEXTERNAL.
+ *
+ * Behaves exactly like main_file_read, otherwise.
+ *
+ * This function uses a separate buffer to read the first small block
+ * of input.  If a compressed input is detected, the separate buffer
+ * is passed to the pipe copier.  This avoids using the same size
+ * buffer in both cases. */
+static int
+main_secondary_decompress_check (main_file  *file,
+				 uint8_t    *input_buf,
+				 size_t      input_size,
+				 size_t     *nread)
+{
+  int ret;
+  usize_t i;
+  usize_t try_read = xd3_min (input_size, XD3_ALLOCSIZE);
+  size_t  check_nread = 0;
+  uint8_t check_buf[XD3_ALLOCSIZE];  /* TODO: heap allocate */
+  const main_extcomp *decompressor = NULL;
+
+  if ((ret = main_file_read (file, check_buf,
+			     try_read,
+			     & check_nread, "input read failed")))
+    {
+      return ret;
+    }
+
+  if (file->flags & RD_DECOMPSET)
+    {
+      /* This allows the application header to override the magic
+       * number, for whatever reason. */
+      decompressor = file->compressor;
+    }
+  else
+    {
+      for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+	{
+	  const main_extcomp *decomp = & extcomp_types[i];
+
+	  if (check_nread > decomp->magic_size)
+	    {
+	      /* The following expr checks if we are trying to read a
+	       * VCDIFF input, in which case do not treat it as
+	       * "secondary" decompression. */
+	      int skip_this_type = (decomp->flags & RD_NONEXTERNAL) &&
+  	                           (file->flags & RD_NONEXTERNAL);
+
+	      if (skip_this_type)
+		{
+		  continue;
+		}
+
+	      if (memcmp (check_buf, decomp->magic, decomp->magic_size) == 0)
+		{
+		  decompressor = decomp;
+		  break;
+		}
+	    }
+	}
+    }
+
+  if (decompressor != NULL)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "externally compressed input: %s %s%s < %s\n",
+	      decompressor->decomp_cmdname,
+	      decompressor->decomp_options,
+	      (option_force2 ? " -f" : ""),
+	      file->filename);
+	  if (file->flags & RD_MAININPUT)
+	    {
+	      XPR(NT
+  "WARNING: the encoder is automatically decompressing the input file;\n");
+	      XPR(NT
+  "WARNING: the decoder will automatically recompress the output file;\n");
+	      XPR(NT
+  "WARNING: this may result in different compressed data and checksums\n");
+	      XPR(NT
+  "WARNING: despite being identical data; if this is an issue, use -D\n");
+	      XPR(NT
+  "WARNING: to avoid decompression and/or use -R to avoid recompression\n");
+	      XPR(NT
+  "WARNING: and/or manually decompress the input file; if you know the\n");
+	      XPR(NT
+  "WARNING: compression settings that will produce identical output\n");
+	      XPR(NT
+  "WARNING: you may set those flags using the environment (e.g., GZIP=-9)\n");
+	    }
+	}
+
+      file->size_known = 0;
+      return main_input_decompress_setup (decompressor, file,
+					  input_buf, input_size,
+					  check_buf, XD3_ALLOCSIZE,
+					  check_nread, nread);
+    }
+
+  /* Now read the rest of the input block. */
+  (*nread) = 0;
+
+  if (check_nread == try_read)
+    {
+      ret = main_file_read (file,
+			    input_buf + try_read,
+			    input_size - try_read,
+			    nread,
+			    "input read failed");
+    }
+
+  memcpy (input_buf, check_buf, check_nread);
+
+  (*nread) += check_nread;
+
+  return 0;
+}
+
+/* Initiate re-compression of the output stream.  This is easier than
+ * input decompression because we know beforehand that the stream will
+ * be compressed, whereas the input has already been read when we
+ * decide it should be decompressed.  Thus, it only requires one
+ * subprocess and one pipe. */
+static int
+main_recompress_output (main_file *ofile)
+{
+  pid_t recomp_id;  /* One subproc. */
+  int   pipefd[2];  /* One pipe. */
+  int   output_fd = -1;
+  int   ret;
+  const main_extcomp *recomp = ofile->compressor;
+
+  pipefd[0] = pipefd[1] = -1;
+
+  if (pipe (pipefd))
+    {
+      XPR(NT "pipe failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  if ((recomp_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The child runs the recompression process: */
+  if (recomp_id == 0)
+    {
+      if (option_verbose > 2)
+	{
+	  XPR(NT "external recompression pid %d\n", getpid ());
+	}
+
+      /* Setup pipes: write to the output file, read from the pipe. */
+      if (dup2 (XFNO (ofile), STDOUT_FILENO) < 0 ||
+	  dup2 (pipefd[PIPE_READ_FD], STDIN_FILENO) < 0 ||
+	  close (pipefd[PIPE_READ_FD]) ||
+	  close (pipefd[PIPE_WRITE_FD]) ||
+	  execlp (recomp->recomp_cmdname, recomp->recomp_cmdname,
+		  recomp->recomp_options,
+		  option_force2 ? "-f" : NULL,
+		  NULL))
+	{
+	  XPR(NT "child process %s failed to execute: %s\n",
+	      recomp->recomp_cmdname, xd3_mainerror (get_errno ()));
+	}
+
+      _exit (127);
+    }
+
+  XD3_ASSERT(num_subprocs < MAX_SUBPROCS);
+  ext_subprocs[num_subprocs++] = recomp_id;
+
+  /* The parent closes both pipes after duplicating the output-fd for
+   * writing to the compression pipe. */
+  output_fd = dup (pipefd[PIPE_WRITE_FD]);
+
+  if (output_fd < 0 ||
+      main_file_close (ofile) ||
+      close (pipefd[PIPE_READ_FD]) ||
+      close (pipefd[PIPE_WRITE_FD]))
+    {
+      XPR(NT "close failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#if XD3_STDIO
+  /* Note: fdopen() acquires the fd, closes it when finished. */
+  if ((ofile->file = fdopen (output_fd, "w")) == NULL)
+    {
+      XPR(NT "fdopen failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#elif XD3_POSIX
+  ofile->file = output_fd;
+#endif
+
+  /* Now the output file will be compressed. */
+  return 0;
+
+ pipe_cleanup:
+  close (output_fd);
+  close (pipefd[PIPE_READ_FD]);
+  close (pipefd[PIPE_WRITE_FD]);
+  return ret;
+}
+#endif /* EXTERNAL_COMPRESSION */
+
+/* Identify the compressor that was used based on its ident string,
+ * which is passed in the application header. */
+static const main_extcomp*
+main_ident_compressor (const char *ident)
+{
+  usize_t i;
+
+  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+    {
+      if (strcmp (extcomp_types[i].ident, ident) == 0)
+	{
+	  return & extcomp_types[i];
+	}
+    }
+
+  return NULL;
+}
+
+/* Return the main_extcomp record to use for this identifier, if possible. */
+static const main_extcomp*
+main_get_compressor (const char *ident)
+{
+  const main_extcomp *ext = main_ident_compressor (ident);
+
+  if (ext == NULL)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "warning: cannot recompress output: "
+		   "unrecognized external compression ID: %s\n", ident);
+	}
+      return NULL;
+    }
+  else if (! EXTERNAL_COMPRESSION)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "warning: external support not compiled: "
+		   "original input was compressed: %s\n", ext->recomp_cmdname);
+	}
+      return NULL;
+    }
+  else
+    {
+      return ext;
+    }
+}
+
+/*********************************************************************
+ APPLICATION HEADER
+ *******************************************************************/
+
+#if XD3_ENCODER
+static const char*
+main_apphead_string (const char* x)
+{
+  const char *y;
+
+  if (x == NULL) { return ""; }
+
+  if (strcmp (x, "/dev/stdin") == 0 ||
+      strcmp (x, "/dev/stdout") == 0 ||
+      strcmp (x, "/dev/stderr") == 0) { return "-"; }
+
+  // TODO: this is not portable
+  return (y = strrchr (x, '/')) == NULL ? x : y + 1;
+}
+
+static int
+main_set_appheader (xd3_stream *stream, main_file *input, main_file *sfile)
+{
+  /* The user may disable the application header.  Once the appheader
+   * is set, this disables setting it again. */
+  if (appheader_used || ! option_use_appheader) { return 0; }
+
+  /* The user may specify the application header, otherwise format the
+     default header. */
+  if (option_appheader)
+    {
+      appheader_used = option_appheader;
+    }
+  else
+    {
+      const char *iname;
+      const char *icomp;
+      const char *sname;
+      const char *scomp;
+      usize_t len;
+
+      iname = main_apphead_string (input->filename);
+      icomp = (input->compressor == NULL) ? "" : input->compressor->ident;
+      len = (usize_t) strlen (iname) + (usize_t) strlen (icomp) + 2;
+
+      if (sfile->filename != NULL)
+	{
+	  sname = main_apphead_string (sfile->filename);
+	  scomp = (sfile->compressor == NULL) ? "" : sfile->compressor->ident;
+	  len += (usize_t) strlen (sname) + (usize_t) strlen (scomp) + 2;
+	}
+      else
+	{
+	  sname = scomp = "";
+	}
+
+      if ((appheader_used = (uint8_t*) main_malloc (len)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      if (sfile->filename == NULL)
+	{
+	  snprintf_func ((char*)appheader_used, len, "%s/%s", iname, icomp);
+	}
+      else
+	{
+	  snprintf_func ((char*)appheader_used, len, "%s/%s/%s/%s",
+		    iname, icomp, sname, scomp);
+	}
+    }
+
+  xd3_set_appheader (stream, appheader_used,
+		     (usize_t) strlen ((char*)appheader_used));
+
+  return 0;
+}
+#endif
+
+static void
+main_get_appheader_params (main_file *file, char **parsed,
+			   int output, const char *type,
+			   main_file *other)
+{
+  /* Set the filename if it was not specified.  If output, option_stdout (-c)
+   * overrides. */
+  if (file->filename == NULL &&
+      ! (output && option_stdout) &&
+      strcmp (parsed[0], "-") != 0)
+    {
+      file->filename = parsed[0];
+
+      if (other->filename != NULL) {
+	/* Take directory from the other file, if it has one. */
+	/* TODO: This results in nonsense names like /dev/foo.tar.gz
+	 * and probably the filename-default logic interferes with
+	 * multi-file operation and the standard file extension?
+	 * Possibly the name header is bad, should be off by default.
+	 * Possibly we just want to remember external/compression
+	 * settings. */
+	const char *last_slash = strrchr(other->filename, '/');
+
+	if (last_slash != NULL) {
+	  usize_t dlen = (usize_t) (last_slash - other->filename);
+
+	  XD3_ASSERT(file->filename_copy == NULL);
+	  file->filename_copy =
+	    (char*) main_malloc(dlen + 2 + (usize_t) strlen(file->filename));
+
+	  strncpy(file->filename_copy, other->filename, dlen);
+	  file->filename_copy[dlen] = '/';
+	  strcpy(file->filename_copy + dlen + 1, parsed[0]);
+
+	  file->filename = file->filename_copy;
+	}
+      }
+
+      if (! option_quiet)
+	{
+	  XPR(NT "using default %s filename: %s\n", type, file->filename);
+	}
+    }
+
+  /* Set the compressor, initiate de/recompression later. */
+  if (file->compressor == NULL && *parsed[1] != 0)
+    {
+      file->flags |= RD_DECOMPSET;
+      file->compressor = main_get_compressor (parsed[1]);
+    }
+}
+
+static void
+main_get_appheader (xd3_stream *stream, main_file *ifile,
+		    main_file *output, main_file *sfile)
+{
+  uint8_t *apphead;
+  usize_t appheadsz;
+  int ret;
+
+  /* The user may disable the application header.  Once the appheader
+   * is set, this disables setting it again. */
+  if (! option_use_appheader) { return; }
+
+  ret = xd3_get_appheader (stream, & apphead, & appheadsz);
+
+  /* Ignore failure, it only means we haven't received a header yet. */
+  if (ret != 0) { return; }
+
+  if (appheadsz > 0)
+    {
+      char *start = (char*)apphead;
+      char *slash;
+      int   place = 0;
+      const int kMaxArgs = 4;
+      char *parsed[4];
+
+      memset (parsed, 0, sizeof (parsed));
+
+      while ((slash = strchr (start, '/')) != NULL && place < (kMaxArgs-1))
+	{
+	  *slash = 0;
+	  parsed[place++] = start;
+	  start = slash + 1;
+	}
+
+      parsed[place++] = start;
+
+      /* First take the output parameters. */
+      if (place == 2 || place == 4)
+	{
+	  main_get_appheader_params (output, parsed, 1, "output", ifile);
+	}
+
+      /* Then take the source parameters. */
+      if (place == 4)
+	{
+	  main_get_appheader_params (sfile, parsed+2, 0, "source", ifile);
+	}
+    }
+
+  option_use_appheader = 0;
+  return;
+}
+
+/*********************************************************************
+ Main I/O routines
+ **********************************************************************/
+
+/* This function acts like the above except it may also try to
+ * recognize a compressed input (source or target) when the first
+ * buffer of data is read.  The EXTERNAL_COMPRESSION code is called to
+ * search for magic numbers. */
+static int
+main_read_primary_input (main_file   *file,
+			 uint8_t     *buf,
+			 size_t       size,
+			 size_t      *nread)
+{
+#if EXTERNAL_COMPRESSION
+  if (option_decompress_inputs && file->flags & RD_FIRST)
+    {
+      file->flags &= ~RD_FIRST;
+      return main_secondary_decompress_check (file, buf, size, nread);
+    }
+#endif
+
+  return main_file_read (file, buf, size, nread, "input read failed");
+}
+
+/* Open the main output file, sets a default file name, initiate
+ * recompression.  This function is expected to fprint any error
+ * messages. */
+static int
+main_open_output (xd3_stream *stream, main_file *ofile)
+{
+  int ret;
+
+  if (option_no_output)
+    {
+      return 0;
+    }
+
+  if (ofile->filename == NULL)
+    {
+      XSTDOUT_XF (ofile);
+
+      if (option_verbose > 1)
+	{
+	  XPR(NT "using standard output: %s\n", ofile->filename);
+	}
+    }
+  else
+    {
+      /* Stat the file to check for overwrite. */
+      if (option_force == 0 && main_file_exists (ofile))
+	{
+	  if (!option_quiet)
+	    {
+	      XPR(NT "to overwrite output file specify -f: %s\n",
+		  ofile->filename);
+	    }
+	  return EEXIST;
+	}
+
+      if ((ret = main_file_open (ofile, ofile->filename, XO_WRITE)))
+	{
+	  return ret;
+	}
+
+      if (option_verbose > 1) { XPR(NT "output %s\n", ofile->filename); }
+    }
+
+#if EXTERNAL_COMPRESSION
+  /* Do output recompression. */
+  if (ofile->compressor != NULL && option_recompress_outputs == 1)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "externally compressed output: %s %s%s > %s\n",
+	      ofile->compressor->recomp_cmdname,
+	      ofile->compressor->recomp_options,
+	      (option_force2 ? " -f" : ""),
+	      ofile->filename);
+	}
+
+      if ((ret = main_recompress_output (ofile)))
+	{
+	  return ret;
+	}
+    }
+#endif
+
+  return 0;
+}
+
+static usize_t
+main_get_winsize (main_file *ifile) {
+  xoff_t file_size = 0;
+  usize_t size = option_winsize;
+  static shortbuf iszbuf;
+
+  if (main_file_stat (ifile, &file_size) == 0)
+    {
+      size = (usize_t) xd3_min (file_size, (xoff_t) size);
+    }
+
+  size = xd3_max (size, XD3_ALLOCSIZE);
+
+  if (option_verbose > 1)
+    {
+      XPR(NT "input %s window size %s\n",
+	  ifile->filename,
+	  main_format_bcnt (size, &iszbuf));
+    }
+
+  return size;
+}
+
+/*********************************************************************
+ Main routines
+ ********************************************************************/
+
+/* This is a generic input function.  It calls the xd3_encode_input or
+ * xd3_decode_input functions and makes calls to the various input
+ * handling routines above, which coordinate external decompression.
+ */
+static int
+main_input (xd3_cmd     cmd,
+	    main_file   *ifile,
+	    main_file   *ofile,
+	    main_file   *sfile)
+{
+  int        ret;
+  xd3_stream stream;
+  size_t     nread = 0;
+  usize_t    winsize;
+  int        stream_flags = 0;
+  xd3_config config;
+  xd3_source source;
+  xoff_t     last_total_in = 0;
+  xoff_t     last_total_out = 0;
+  long       start_time;
+  int        stdout_only = 0;
+  int (*input_func) (xd3_stream*);
+  int (*output_func) (xd3_stream*, main_file *);
+
+  memset (& stream, 0, sizeof (stream));
+  memset (& source, 0, sizeof (source));
+  memset (& config, 0, sizeof (config));
+
+  config.alloc = main_alloc;
+  config.freef = main_free1;
+
+  config.iopt_size = option_iopt_size;
+  config.sprevsz = option_sprevsz;
+
+  do_src_fifo = 0;
+
+  start_time = get_millisecs_now ();
+
+  if (option_use_checksum) { stream_flags |= XD3_ADLER32; }
+
+  /* main_input setup. */
+  switch ((int) cmd)
+    {
+#if VCDIFF_TOOLS
+           if (1) { case CMD_PRINTHDR:   stream_flags |= XD3_JUST_HDR; }
+      else if (1) { case CMD_PRINTHDRS:  stream_flags |= XD3_SKIP_WINDOW; }
+      else        { case CMD_PRINTDELTA: stream_flags |= XD3_SKIP_EMIT; }
+      ifile->flags |= RD_NONEXTERNAL;
+      input_func    = xd3_decode_input;
+      output_func   = main_print_func;
+      stream_flags |= XD3_ADLER32_NOVER;
+      stdout_only   = 1;
+      break;
+
+    case CMD_RECODE:
+    case CMD_MERGE:
+    case CMD_MERGE_ARG:
+      /* No source will be read */
+      stream_flags |= XD3_ADLER32_NOVER | XD3_SKIP_EMIT;
+      ifile->flags |= RD_NONEXTERNAL;
+      input_func = xd3_decode_input;
+
+      if ((ret = main_init_recode_stream ()))
+        {
+	  return EXIT_FAILURE;
+        }
+
+      if (cmd == CMD_RECODE) { output_func = main_recode_func; }
+      else                   { output_func = main_merge_func; }
+      break;
+#endif /* VCDIFF_TOOLS */
+
+#if XD3_ENCODER
+    case CMD_ENCODE:
+      do_src_fifo = 1;
+      input_func  = xd3_encode_input;
+      output_func = main_write_output;
+
+      if (option_no_compress)      { stream_flags |= XD3_NOCOMPRESS; }
+      if (option_smatch_config)
+	{
+	  const char *s = option_smatch_config;
+	  char *e;
+	  long values[XD3_SOFTCFG_VARCNT];
+	  int got;
+
+	  config.smatch_cfg = XD3_SMATCH_SOFT;
+
+	  for (got = 0; got < XD3_SOFTCFG_VARCNT; got += 1, s = e + 1)
+	    {
+	      values[got] = strtol (s, &e, 10);
+
+	      if ((values[got] < 0) ||
+		  (e == s) ||
+		  (got < XD3_SOFTCFG_VARCNT-1 && *e == 0) ||
+		  (got == XD3_SOFTCFG_VARCNT-1 && *e != 0))
+		{
+		  XPR(NT "invalid string match specifier (-C) %d: %s\n",
+		      got, s);
+		  return EXIT_FAILURE;
+		}
+	    }
+
+	  config.smatcher_soft.large_look    = values[0];
+	  config.smatcher_soft.large_step    = values[1];
+	  config.smatcher_soft.small_look    = values[2];
+	  config.smatcher_soft.small_chain   = values[3];
+	  config.smatcher_soft.small_lchain  = values[4];
+	  config.smatcher_soft.max_lazy      = values[5];
+	  config.smatcher_soft.long_enough   = values[6];
+	}
+      else
+	{
+	  if (option_verbose > 2)
+	    {
+	      XPR(NT "compression level: %d\n", option_level);
+	    }
+	  if (option_level == 0)
+	    {
+	      stream_flags |= XD3_NOCOMPRESS;
+	      config.smatch_cfg = XD3_SMATCH_FASTEST;
+	    }
+	  else if (option_level == 1)
+	    { config.smatch_cfg = XD3_SMATCH_FASTEST; }
+	  else if (option_level == 2)
+	    { config.smatch_cfg = XD3_SMATCH_FASTER; }
+	  else if (option_level <= 5)
+	    { config.smatch_cfg = XD3_SMATCH_FAST; }
+	  else if (option_level == 6)
+	    { config.smatch_cfg = XD3_SMATCH_DEFAULT; }
+	  else
+	    { config.smatch_cfg = XD3_SMATCH_SLOW; }
+	}
+      break;
+#endif
+    case CMD_DECODE:
+      if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; }
+      ifile->flags |= RD_NONEXTERNAL;
+      input_func    = xd3_decode_input;
+      output_func   = main_write_output;
+      break;
+    default:
+      XPR(NT "internal error\n");
+      return EXIT_FAILURE;
+    }
+
+  main_bsize = winsize = main_get_winsize (ifile);
+
+  if ((main_bdata = (uint8_t*) main_bufalloc (winsize)) == NULL)
+    {
+      return EXIT_FAILURE;
+    }
+
+  config.winsize = winsize;
+  config.getblk = main_getblk_func;
+  config.flags = stream_flags;
+
+  if ((ret = main_set_secondary_flags (&config)) ||
+      (ret = xd3_config_stream (& stream, & config)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+
+#if VCDIFF_TOOLS
+  if ((cmd == CMD_MERGE || cmd == CMD_MERGE_ARG) &&
+      (ret = xd3_whole_state_init (& stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+#endif
+
+  if (cmd != CMD_DECODE)
+    {
+      /* When not decoding, set source now.  The decoder delays this
+       * step until XD3_GOTHEADER. */
+      if (sfile && sfile->filename != NULL)
+	{
+	  if ((ret = main_set_source (& stream, cmd, sfile, & source)))
+	    {
+	      return EXIT_FAILURE;
+	    }
+
+	  XD3_ASSERT(stream.src != NULL);
+	}
+    }
+
+  if (cmd == CMD_PRINTHDR ||
+      cmd == CMD_PRINTHDRS ||
+      cmd == CMD_PRINTDELTA ||
+      cmd == CMD_RECODE)
+    {
+      if (sfile->filename == NULL)
+	{
+	  allow_fake_source = 1;
+	  sfile->filename = "<placeholder>";
+	  main_set_source (& stream, cmd, sfile, & source);
+	}
+    }
+
+  /* This times each window. */
+  get_millisecs_since ();
+
+  /* Main input loop. */
+  do
+    {
+      xoff_t input_offset;
+      xoff_t input_remain;
+      usize_t try_read;
+
+      input_offset = ifile->nread;
+
+      input_remain = XOFF_T_MAX - input_offset;
+
+      try_read = (usize_t) xd3_min ((xoff_t) config.winsize, input_remain);
+
+      if ((ret = main_read_primary_input (ifile, main_bdata,
+					  try_read, & nread)))
+	{
+	  return EXIT_FAILURE;
+	}
+
+      /* If we've reached EOF tell the stream to flush. */
+      if (nread < try_read)
+	{
+	  stream.flags |= XD3_FLUSH;
+	}
+
+#if XD3_ENCODER
+      /* After the first main_read_primary_input completes, we know
+       * all the information needed to encode the application
+       * header. */
+      if (cmd == CMD_ENCODE &&
+	  (ret = main_set_appheader (& stream, ifile, sfile)))
+	{
+	  return EXIT_FAILURE;
+	}
+#endif
+      xd3_avail_input (& stream, main_bdata, nread);
+
+      /* If we read zero bytes after encoding at least one window... */
+      if (nread == 0 && stream.current_window > 0) {
+	break;
+      }
+
+    again:
+      ret = input_func (& stream);
+
+      switch (ret)
+	{
+	case XD3_INPUT:
+	  continue;
+
+	case XD3_GOTHEADER:
+	  {
+	    XD3_ASSERT (stream.current_window == 0);
+
+	    /* Need to process the appheader as soon as possible.  It may
+	     * contain a suggested default filename/decompression routine for
+	     * the ofile, and it may contain default/decompression routine for
+	     * the sources. */
+	    if (cmd == CMD_DECODE)
+	      {
+		/* May need to set the sfile->filename if none was given. */
+		main_get_appheader (& stream, ifile, ofile, sfile);
+
+		/* Now open the source file. */
+		  if ((sfile->filename != NULL) &&
+		      (ret = main_set_source (& stream, cmd, sfile, & source)))
+		  {
+		    return EXIT_FAILURE;
+		  }
+	      }
+	  }
+	/* FALLTHROUGH */
+	case XD3_WINSTART:
+	  {
+	    /* e.g., set or unset XD3_SKIP_WINDOW. */
+	    goto again;
+	  }
+
+	case XD3_OUTPUT:
+	  {
+	    /* Defer opening the output file until the stream produces its
+	     * first output for both encoder and decoder, this way we
+	     * delay long enough for the decoder to receive the
+	     * application header.  (Or longer if there are skipped
+	     * windows, but I can't think of any reason not to delay
+	     * open.) */
+	    if (ofile != NULL &&
+		! main_file_isopen (ofile) &&
+		(ret = main_open_output (& stream, ofile)) != 0)
+	      {
+		return EXIT_FAILURE;
+	      }
+
+	    if ((ret = output_func (& stream, ofile)) &&
+		(ret != PRINTHDR_SPECIAL))
+	      {
+		return EXIT_FAILURE;
+	      }
+
+	    if (ret == PRINTHDR_SPECIAL)
+	      {
+		xd3_abort_stream (& stream);
+		ret = EXIT_SUCCESS;
+		goto done;
+	      }
+
+	    ret = 0;
+
+	    xd3_consume_output (& stream);
+	    goto again;
+	  }
+
+	case XD3_WINFINISH:
+	  {
+	    if (IS_ENCODE (cmd) || cmd == CMD_DECODE || cmd == CMD_RECODE)
+	      {
+		if (! option_quiet && IS_ENCODE (cmd) &&
+		    main_file_isopen (sfile))
+		  {
+		    /* Warn when no source copies are found */
+		    if (option_verbose && ! xd3_encoder_used_source (& stream))
+		      {
+			XPR(NT "warning: input window %"Q"u..%"Q"u has "
+			    "no source copies\n",
+			    stream.current_window * winsize,
+			    (stream.current_window+1) * winsize);
+			XD3_ASSERT (stream.src != NULL);
+		      }
+
+		    /* Limited i-buffer size affects source copies
+		     * when the sourcewin is decided early. */
+		    if (option_verbose > 1 &&
+			stream.srcwin_decided_early &&
+			stream.i_slots_used > stream.iopt_size)
+		      {
+			XPR(NT "warning: input position %"Q"u overflowed "
+			    "instruction buffer, needed %"W"u (vs. %"W"u), "
+			    "consider changing -I\n",
+			    stream.current_window * winsize,
+			    stream.i_slots_used, stream.iopt_size);
+		      }
+		  }
+
+		if (option_verbose)
+		  {
+		    shortbuf rrateavg, wrateavg, tm;
+		    shortbuf rdb, wdb;
+		    shortbuf trdb, twdb;
+		    shortbuf srcpos;
+		    long millis = get_millisecs_since ();
+		    usize_t this_read = (usize_t)(stream.total_in -
+						  last_total_in);
+		    usize_t this_write = (usize_t)(stream.total_out -
+						   last_total_out);
+		    last_total_in = stream.total_in;
+		    last_total_out = stream.total_out;
+
+		    if (option_verbose > 1)
+		      {
+			XPR(NT "%"Q"u: in %s (%s): out %s (%s): "
+			    "total in %s: out %s: %s: srcpos %s\n",
+			    stream.current_window,
+			    main_format_bcnt (this_read, &rdb),
+			    main_format_rate (this_read, millis, &rrateavg),
+			    main_format_bcnt (this_write, &wdb),
+			    main_format_rate (this_write, millis, &wrateavg),
+			    main_format_bcnt (stream.total_in, &trdb),
+			    main_format_bcnt (stream.total_out, &twdb),
+			    main_format_millis (millis, &tm),
+			    main_format_bcnt (stream.srcwin_cksum_pos, &srcpos));
+		      }
+		    else
+		      {
+			XPR(NT "%"Q"u: in %s: out %s: total in %s: "
+			    "out %s: %s\n",
+ 			    stream.current_window,
+			    main_format_bcnt (this_read, &rdb),
+			    main_format_bcnt (this_write, &wdb),
+			    main_format_bcnt (stream.total_in, &trdb),
+			    main_format_bcnt (stream.total_out, &twdb),
+			    main_format_millis (millis, &tm));
+		      }
+		  }
+	      }
+	    goto again;
+	  }
+
+	default:
+	  /* input_func() error */
+	  XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+	  if (! option_quiet && ret == XD3_INVALID_INPUT &&
+	      sfile != NULL && sfile->filename != NULL)
+	    {
+	      XPR(NT "normally this indicates that the source file is incorrect\n");
+	      XPR(NT "please verify the source file with sha1sum or equivalent\n");
+	    }
+	  return EXIT_FAILURE;
+	}
+    }
+  while (nread == config.winsize);
+done:
+  /* Close the inputs. (ifile must be open, sfile may be open) */
+  main_file_close (ifile);
+  if (sfile != NULL)
+    {
+      main_file_close (sfile);
+    }
+
+#if VCDIFF_TOOLS
+  if (cmd == CMD_MERGE &&
+      (ret = main_merge_output (& stream, ofile)))
+    {
+      return EXIT_FAILURE;
+    }
+
+  if (cmd == CMD_MERGE_ARG)
+    {
+      xd3_swap_whole_state (& stream.whole_target,
+			    & recode_stream->whole_target);
+    }
+#endif /* VCDIFF_TOOLS */
+
+  /* If output file is not open yet because of delayed-open, it means
+   * we never encountered a window in the delta, but it could have had
+   * a VCDIFF header?  TODO: solve this elsewhere.  For now, it prints
+   * "nothing to output" below, but the check doesn't happen in case
+   * of option_no_output.  */
+  if (! option_no_output && ofile != NULL)
+    {
+      if (!stdout_only && ! main_file_isopen (ofile))
+	{
+	  XPR(NT "nothing to output: %s\n", ifile->filename);
+	  return EXIT_FAILURE;
+	}
+
+      /* Have to close the output before calling
+       * main_external_compression_finish, or else it hangs. */
+      if (main_file_close (ofile) != 0)
+	{
+	  return EXIT_FAILURE;
+	}
+    }
+
+#if EXTERNAL_COMPRESSION
+  if ((ret = main_external_compression_finish ()))
+    {
+      XPR(NT "external compression commands failed\n");
+      return EXIT_FAILURE;
+    }
+#endif
+
+  if ((ret = xd3_close_stream (& stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+
+#if XD3_ENCODER
+  if (option_verbose > 1 && cmd == CMD_ENCODE)
+    {
+      XPR(NT "scanner configuration: %s\n", stream.smatcher.name);
+      XPR(NT "target hash table size: %"W"u\n", stream.small_hash.size);
+      if (sfile != NULL && sfile->filename != NULL)
+	{
+	  XPR(NT "source hash table size: %"W"u\n", stream.large_hash.size);
+	}
+    }
+
+  if (option_verbose > 2 && cmd == CMD_ENCODE)
+    {
+      XPR(NT "source copies: %"Q"u (%"Q"u bytes)\n",
+	  stream.n_scpy, stream.l_scpy);
+      XPR(NT "target copies: %"Q"u (%"Q"u bytes)\n",
+	  stream.n_tcpy, stream.l_tcpy);
+      XPR(NT "adds: %"Q"u (%"Q"u bytes)\n", stream.n_add, stream.l_add);
+      XPR(NT "runs: %"Q"u (%"Q"u bytes)\n", stream.n_run, stream.l_run);
+    }
+#endif
+
+  xd3_free_stream (& stream);
+
+  if (option_verbose)
+    {
+      shortbuf tm;
+      long end_time = get_millisecs_now ();
+      xoff_t nwrite = ofile != NULL ? ofile->nwrite : 0;
+
+      XPR(NT "finished in %s; input %"Q"u output %"Q"u bytes (%0.2f%%)\n",
+	  main_format_millis (end_time - start_time, &tm),
+	  ifile->nread, nwrite, 100.0 * nwrite / ifile->nread);
+    }
+
+  return EXIT_SUCCESS;
+}
+
+/* free memory before exit, reset single-use variables. */
+static void
+main_cleanup (void)
+{
+  if (appheader_used != NULL &&
+      appheader_used != option_appheader)
+    {
+      main_free (appheader_used);
+      appheader_used = NULL;
+    }
+
+  main_buffree (main_bdata);
+  main_bdata = NULL;
+  main_bsize = 0;
+
+  main_lru_cleanup();
+
+  if (recode_stream != NULL)
+    {
+      xd3_free_stream (recode_stream);
+      main_free (recode_stream);
+      recode_stream = NULL;
+    }
+
+  if (merge_stream != NULL)
+    {
+      xd3_free_stream (merge_stream);
+      main_free (merge_stream);
+      merge_stream = NULL;
+    }
+
+  XD3_ASSERT (main_mallocs == 0);
+}
+
+static void
+setup_environment (int argc,
+		   char **argv,
+		   int *argc_out,
+		   char ***argv_out,
+		   char ***argv_free,
+		   char **env_free)
+{
+  int n, i, i0;
+  char *p, *v = getenv("XDELTA");
+  if (v == NULL) {
+    (*argc_out) = argc;
+    (*argv_out) = argv;
+    (*argv_free) = NULL;
+    (*env_free) = NULL;
+    return;
+  }
+
+  (*env_free) = (char*) main_malloc((usize_t) strlen(v) + 1);
+  strcpy(*env_free, v);
+
+  /* Space needed for extra args, at least # of spaces */
+  n = argc + 1;
+  for (p = *env_free; *p != 0; ) {
+    if (*p++ == ' ') {
+      n++;
+    }
+  }
+
+  (*argv_free) = (char**) main_malloc(sizeof(char*) * (n + 1));
+  (*argv_out) = (*argv_free);
+  (*argv_out)[0] = argv[0];
+  (*argv_out)[n] = NULL;
+
+  i = 1;
+  for (p = *env_free; *p != 0; ) {
+    (*argv_out)[i++] = p;
+    while (*p != ' ' && *p != 0) {
+      p++;
+    }
+    while (*p == ' ') {
+      *p++ = 0;
+    }
+  }
+
+  for (i0 = 1; i0 < argc; i0++) {
+    (*argv_out)[i++] = argv[i0];
+  }
+
+  /* Counting spaces is an upper bound, argv stays NULL terminated. */
+  (*argc_out) = i;
+  while (i <= n) {
+    (*argv_out)[i++] = NULL;
+  }
+}
+
+#if PYTHON_MODULE || SWIG_MODULE || NOT_MAIN
+int xd3_main_cmdline (int argc, char **argv)
+#else
+int main (int argc, char **argv)
+#endif
+{
+  static const char *flags =
+    "0123456789cdefhnqvDFJNORVs:m:B:C:E:I:L:O:M:P:W:A::S::";
+  xd3_cmd cmd;
+  main_file ifile;
+  main_file ofile;
+  main_file sfile;
+  main_merge_list merge_order;
+  main_merge *merge;
+  int my_optind;
+  const char *my_optarg;
+  const char *my_optstr;
+  const char *sfilename;
+  int env_argc;
+  char **env_argv;
+  char **free_argv;  /* malloc() in setup_environment() */
+  char *free_value;  /* malloc() in setup_environment() */
+  int ret;
+
+#ifdef _WIN32
+  GetStartupInfo(&winStartupInfo);
+  setvbuf(stderr, NULL, _IONBF, 0);  /* Do not buffer stderr */
+#endif
+
+  main_file_init (& ifile);
+  main_file_init (& ofile);
+  main_file_init (& sfile);
+  main_merge_list_init (& merge_order);
+
+  reset_defaults();
+
+  free_argv = NULL;
+  free_value = NULL;
+  setup_environment(argc, argv, &env_argc, &env_argv,
+		    &free_argv, &free_value);
+  cmd = CMD_NONE;
+  sfilename = NULL;
+  my_optind = 1;
+  argv = env_argv;
+  argc = env_argc;
+  program_name = env_argv[0];
+
+ takearg:
+  my_optarg = NULL;
+  my_optstr = argv[my_optind];
+
+  /* This doesn't use getopt() because it makes trouble for -P & python which
+   * reenter main() and thus care about freeing all memory.  I never had much
+   * trust for getopt anyway, it's too opaque.  This implements a fairly
+   * standard non-long-option getopt with support for named operations (e.g.,
+   * "xdelta3 [encode|decode|printhdr...] < in > out"). */
+  if (my_optstr)
+    {
+      if (*my_optstr == '-')    { my_optstr += 1; }
+      else if (cmd == CMD_NONE) { goto nonflag; }
+      else                      { my_optstr = NULL; }
+    }
+  while (my_optstr)
+    {
+      const char *s;
+      my_optarg = NULL;
+      if ((ret = *my_optstr++) == 0) { my_optind += 1; goto takearg; }
+
+      /* Option handling: first check for one ':' following the option in
+       * flags, then check for two.  The syntax allows:
+       *
+       * 1. -Afoo                   defines optarg="foo"
+       * 2. -A foo                  defines optarg="foo"
+       * 3. -A ""                   defines optarg="" (allows empty-string)
+       * 4. -A [EOA or -moreargs]   error (mandatory case)
+       * 5. -A [EOA -moreargs]      defines optarg=NULL (optional case)
+       * 6. -A=foo                  defines optarg="foo"
+       * 7. -A=                     defines optarg="" (mandatory case)
+       * 8. -A=                     defines optarg=NULL (optional case)
+       *
+       * See tests in test_command_line_arguments().
+       */
+      s = strchr (flags, ret);
+      if (s && s[1] && s[1] == ':')
+	{
+	  int option = s[2] && s[2] == ':';
+
+	  /* Case 1, set optarg to the remaining characters. */
+	  my_optarg = my_optstr;
+	  my_optstr = "";
+
+	  /* Case 2-5 */
+	  if (*my_optarg == 0)
+	    {
+	      /* Condition 4-5 */
+	      int have_arg = (my_optind < (argc - 1) &&
+			      *argv[my_optind+1] != '-');
+
+	      if (! have_arg)
+		{
+		  if (! option)
+		  {
+		    /* Case 4 */
+		    XPR(NT "-%c: requires an argument\n", ret);
+		    ret = EXIT_FAILURE;
+		    goto cleanup;
+		  }
+		  /* Case 5. */
+		  my_optarg = NULL;
+		}
+	      else
+		{
+		  /* Case 2-3. */
+		  my_optarg = argv[++my_optind];
+		}
+	    }
+	  /* Case 6-8. */
+	  else if (*my_optarg == '=')
+	    {
+	      /* Remove the = in all cases. */
+	      my_optarg += 1;
+
+	      if (option && *my_optarg == 0)
+		{
+		  /* Case 8. */
+		  my_optarg = NULL;
+		}
+	    }
+	}
+
+      switch (ret)
+	{
+	/* case: if no '-' was found, maybe check for a command name. */
+	nonflag:
+	       if (strcmp (my_optstr, "decode") == 0) { cmd = CMD_DECODE; }
+	  else if (strcmp (my_optstr, "encode") == 0)
+	    {
+#if XD3_ENCODER
+	      cmd = CMD_ENCODE;
+#else
+	      XPR(NT "encoder support not compiled\n");
+	      return EXIT_FAILURE;
+#endif
+	    }
+	  else if (strcmp (my_optstr, "config") == 0) { cmd = CMD_CONFIG; }
+#if REGRESSION_TEST
+	  else if (strcmp (my_optstr, "test") == 0) { cmd = CMD_TEST; }
+#endif
+#if VCDIFF_TOOLS
+	  else if (strcmp (my_optstr, "printhdr") == 0) { cmd = CMD_PRINTHDR; }
+	  else if (strcmp (my_optstr, "printhdrs") == 0)
+	    { cmd = CMD_PRINTHDRS; }
+	  else if (strcmp (my_optstr, "printdelta") == 0)
+	    { cmd = CMD_PRINTDELTA; }
+	  else if (strcmp (my_optstr, "recode") == 0) { cmd = CMD_RECODE; }
+	  else if (strcmp (my_optstr, "merge") == 0) { cmd = CMD_MERGE; }
+#endif
+
+	  /* If no option was found and still no command, let the default
+	   * command be encode.  The remaining args are treated as
+	   * filenames. */
+	  if (cmd == CMD_NONE)
+	    {
+	      cmd = CMD_DEFAULT;
+	      my_optstr = NULL;
+	      break;
+	    }
+	  else
+	    {
+	      /* But if we find a command name, continue the getopt loop. */
+	      my_optind += 1;
+	      goto takearg;
+	    }
+
+	  /* gzip-like options */
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7': case '8': case '9':
+	  option_level = ret - '0';
+	  break;
+	case 'f': option_force = 1; break;
+	case 'F':
+#if EXTERNAL_COMPRESSION
+	  option_force2 = 1;
+#else
+	  XPR(NT "warning: -F option ignored, "
+	      "external compression support was not compiled\n");
+	  break;
+#endif
+	case 'v': option_verbose += 1; option_quiet = 0; break;
+	case 'q': option_quiet = 1; option_verbose = 0; break;
+	case 'c': option_stdout = 1; break;
+	case 'd':
+	  if (cmd == CMD_NONE) { cmd = CMD_DECODE; }
+	  else { ret = main_help (); goto exit; }
+	  break;
+	case 'e':
+#if XD3_ENCODER
+	  if (cmd == CMD_NONE) { cmd = CMD_ENCODE; }
+	  else { ret = main_help (); goto exit; }
+	  break;
+#else
+	  XPR(NT "encoder support not compiled\n");
+	  return EXIT_FAILURE;
+#endif
+
+	case 'n': option_use_checksum = 0; break;
+	case 'N': option_no_compress = 1; break;
+	case 'C': option_smatch_config = my_optarg; break;
+	case 'J': option_no_output = 1; break;
+	case 'S': if (my_optarg == NULL)
+	    {
+	      option_use_secondary = 0;
+	      option_secondary = NULL;
+	    }
+	  else
+	    {
+	      option_use_secondary = 1;
+	      option_secondary = my_optarg;
+	    }
+	  break;
+	case 'A': if (my_optarg == NULL) { option_use_appheader = 0; }
+	          else { option_appheader = (uint8_t*) my_optarg; } break;
+	case 'B': {
+	  xoff_t bsize;
+	  if ((ret = main_atoux (my_optarg, & bsize,
+				 XD3_MINSRCWINSZ, XD3_MAXSRCWINSZ, 'B')))
+	    {
+	      goto exit;
+	    }
+	  option_srcwinsz = bsize;
+	  break;
+	}
+	case 'I':
+	  if ((ret = main_atou (my_optarg, & option_iopt_size, 0,
+				0, 'I')))
+	    {
+	      goto exit;
+	    }
+	  break;
+	case 'P':
+	  if ((ret = main_atou (my_optarg, & option_sprevsz, 0,
+				0, 'P')))
+	    {
+	      goto exit;
+	    }
+	  break;
+	case 'W':
+	  if ((ret = main_atou (my_optarg, & option_winsize, XD3_ALLOCSIZE,
+				XD3_HARDMAXWINSIZE, 'W')))
+	  {
+	    goto exit;
+	  }
+	  break;
+	case 'D':
+#if EXTERNAL_COMPRESSION == 0
+	  if (option_verbose > 0)
+	    {
+	      XPR(NT "warning: -D option ignored, "
+		  "external compression support was not compiled\n");
+	    }
+#else
+	  option_decompress_inputs  = 0;
+#endif
+	  break;
+	case 'R':
+#if EXTERNAL_COMPRESSION == 0
+	  if (option_verbose > 0)
+	    {
+	      XPR(NT "warning: -R option ignored, "
+		  "external compression support was not compiled\n");
+	    }
+#else
+	  option_recompress_outputs = 0;
+#endif
+	  break;
+	case 's':
+	  if (sfilename != NULL)
+	    {
+	      XPR(NT "specify only one source file\n");
+	      goto cleanup;
+	    }
+
+	  sfilename = my_optarg;
+	  break;
+	case 'm':
+	  if ((merge = (main_merge*)
+	       main_malloc (sizeof (main_merge))) == NULL)
+	    {
+	      goto cleanup;
+	    }
+	  main_merge_list_push_back (& merge_order, merge);
+	  merge->filename = my_optarg;
+	  break;
+	case 'V':
+	  ret = main_version (); goto exit;
+	default:
+	  ret = main_help (); goto exit;
+	}
+    }
+
+  option_source_filename = sfilename;
+
+  /* In case there were no arguments, set the default command. */
+  if (cmd == CMD_NONE) { cmd = CMD_DEFAULT; }
+
+  argc -= my_optind;
+  argv += my_optind;
+
+  /* There may be up to two more arguments. */
+  if (argc > 2)
+    {
+      XPR(NT "too many filenames: %s ...\n", argv[2]);
+      goto cleanup;
+    }
+
+  ifile.flags    = RD_FIRST | RD_MAININPUT;
+  sfile.flags    = RD_FIRST;
+  sfile.filename = option_source_filename;
+
+  /* The infile takes the next argument, if there is one.  But if not, infile
+   * is set to stdin. */
+  if (argc > 0)
+    {
+      ifile.filename = argv[0];
+
+      if ((ret = main_file_open (& ifile, ifile.filename, XO_READ)))
+	{
+	  goto cleanup;
+	}
+    }
+  else
+    {
+      XSTDIN_XF (& ifile);
+    }
+
+  /* The ofile takes the following argument, if there is one.  But if not, it
+   * is left NULL until the application header is processed.  It will be set
+   * in main_open_output. */
+  if (argc > 1)
+    {
+      /* Check for conflicting arguments. */
+      if (option_stdout && ! option_quiet)
+	{
+	  XPR(NT "warning: -c option overrides output filename: %s\n",
+	      argv[1]);
+	}
+
+      if (! option_stdout) { ofile.filename = argv[1]; }
+    }
+
+#if VCDIFF_TOOLS
+  if (cmd == CMD_MERGE &&
+      (ret = main_merge_arguments (&merge_order)))
+    {
+      goto cleanup;
+    }
+#endif /* VCDIFF_TOOLS */
+
+  switch (cmd)
+    {
+    case CMD_PRINTHDR:
+    case CMD_PRINTHDRS:
+    case CMD_PRINTDELTA:
+#if XD3_ENCODER
+    case CMD_ENCODE:
+    case CMD_RECODE:
+    case CMD_MERGE:
+#endif
+    case CMD_DECODE:
+      ret = main_input (cmd, & ifile, & ofile, & sfile);
+      break;
+
+#if REGRESSION_TEST
+    case CMD_TEST:
+      main_config ();
+      ret = xd3_selftest ();
+      break;
+#endif
+
+    case CMD_CONFIG:
+      ret = main_config ();
+      break;
+
+    default:
+      ret = main_help ();
+      break;
+    }
+
+  if (0)
+    {
+    cleanup:
+      ret = EXIT_FAILURE;
+    exit:
+      (void)0;
+    }
+
+#if EXTERNAL_COMPRESSION
+  main_external_compression_cleanup ();
+#endif
+
+  main_file_cleanup (& ifile);
+  main_file_cleanup (& ofile);
+  main_file_cleanup (& sfile);
+
+  while (! main_merge_list_empty (& merge_order))
+    {
+      merge = main_merge_list_pop_front (& merge_order);
+      main_free (merge);
+    }
+
+  main_free (free_argv);
+  main_free (free_value);
+
+  main_cleanup ();
+
+  fflush (stdout);
+  fflush (stderr);
+  return ret;
+}
+
+static int
+main_help (void)
+{
+  main_version();
+
+  /* Note: update wiki when command-line features change */
+  XPR(NTR "usage: xdelta3 [command/options] [input [output]]\n");
+  XPR(NTR "make patch:\n");
+  XPR(NTR "\n");
+  XPR(NTR "  xdelta3.exe -e -s old_file new_file delta_file\n");
+  XPR(NTR "\n");
+  XPR(NTR "apply patch:\n");
+  XPR(NTR "\n");
+  XPR(NTR "  xdelta3.exe -d -s old_file delta_file decoded_new_file\n");
+  XPR(NTR "\n");
+  XPR(NTR "special command names:\n");
+  XPR(NTR "    config      prints xdelta3 configuration\n");
+  XPR(NTR "    decode      decompress the input\n");
+  XPR(NTR "    encode      compress the input%s\n",
+     XD3_ENCODER ? "" : " [Not compiled]");
+#if REGRESSION_TEST
+  XPR(NTR "    test        run the builtin tests\n");
+#endif
+#if VCDIFF_TOOLS
+  XPR(NTR "special commands for VCDIFF inputs:\n");
+  XPR(NTR "    printdelta  print information about the entire delta\n");
+  XPR(NTR "    printhdr    print information about the first window\n");
+  XPR(NTR "    printhdrs   print information about all windows\n");
+  XPR(NTR "    recode      encode with new application/secondary settings\n");
+  XPR(NTR "    merge       merge VCDIFF inputs (see below)\n");
+#endif
+  XPR(NTR "merge patches:\n");
+  XPR(NTR "\n");
+  XPR(NTR "  xdelta3 merge -m 1.vcdiff -m 2.vcdiff 3.vcdiff merged.vcdiff\n");
+  XPR(NTR "\n");
+  XPR(NTR "standard options:\n");
+  XPR(NTR "   -0 .. -9     compression level\n");
+  XPR(NTR "   -c           use stdout\n");
+  XPR(NTR "   -d           decompress\n");
+  XPR(NTR "   -e           compress%s\n",
+     XD3_ENCODER ? "" : " [Not compiled]");
+  XPR(NTR "   -f           force (overwrite, ignore trailing garbage)\n");
+#if EXTERNAL_COMPRESSION
+  XPR(NTR "   -F           force the external-compression subprocess\n");
+#endif
+  XPR(NTR "   -h           show help\n");
+  XPR(NTR "   -q           be quiet\n");
+  XPR(NTR "   -v           be verbose (max 2)\n");
+  XPR(NTR "   -V           show version\n");
+
+  XPR(NTR "memory options:\n");
+  XPR(NTR "   -B bytes     source window size\n");
+  XPR(NTR "   -W bytes     input window size\n");
+  XPR(NTR "   -P size      compression duplicates window\n");
+  XPR(NTR "   -I size      instruction buffer size (0 = unlimited)\n");
+
+  XPR(NTR "compression options:\n");
+  XPR(NTR "   -s source    source file to copy from (if any)\n");
+  XPR(NTR "   -S [lzma|djw|fgk] enable/disable secondary compression\n");
+  XPR(NTR "   -N           disable small string-matching compression\n");
+  XPR(NTR "   -D           disable external decompression (encode/decode)\n");
+  XPR(NTR "   -R           disable external recompression (decode)\n");
+  XPR(NTR "   -n           disable checksum (encode/decode)\n");
+  XPR(NTR "   -C           soft config (encode, undocumented)\n");
+  XPR(NTR "   -A [apphead] disable/provide application header (encode)\n");
+  XPR(NTR "   -J           disable output (check/compute only)\n");
+  XPR(NTR "   -m           arguments for \"merge\"\n");
+
+  XPR(NTR "the XDELTA environment variable may contain extra args:\n");
+  XPR(NTR "   XDELTA=\"-s source-x.y.tar.gz\" \\\n");
+  XPR(NTR "   tar --use-compress-program=xdelta3 \\\n");
+  XPR(NTR "       -cf target-x.z.tar.gz.vcdiff target-x.y\n");
+  return EXIT_FAILURE;
+}
diff --git a/third-party/xdelta3/xdelta3/xdelta3-merge.h b/third-party/xdelta3/xdelta3/xdelta3-merge.h
new file mode 100644
index 0000000000..a093843caa
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-merge.h
@@ -0,0 +1,583 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#ifndef _XDELTA3_MERGE_H_
+#define _XDELTA3_MERGE_H_
+
+int xd3_merge_inputs (xd3_stream *stream, 
+		      xd3_whole_state *source,
+		      xd3_whole_state *input);
+
+static int
+xd3_whole_state_init (xd3_stream *stream)
+{
+  XD3_ASSERT (stream->whole_target.adds == NULL);
+  XD3_ASSERT (stream->whole_target.inst == NULL);
+  XD3_ASSERT (stream->whole_target.wininfo == NULL);
+  XD3_ASSERT (stream->whole_target.length == 0);
+
+  stream->whole_target.adds_alloc = XD3_ALLOCSIZE;
+  stream->whole_target.inst_alloc = XD3_ALLOCSIZE;
+  stream->whole_target.wininfo_alloc = XD3_ALLOCSIZE;
+
+  if ((stream->whole_target.adds = (uint8_t*) 
+       xd3_alloc (stream, stream->whole_target.adds_alloc, 1)) == NULL ||
+      (stream->whole_target.inst = (xd3_winst*) 
+       xd3_alloc (stream, stream->whole_target.inst_alloc, 1)) == NULL ||
+      (stream->whole_target.wininfo = (xd3_wininfo*) 
+       xd3_alloc (stream, stream->whole_target.wininfo_alloc, 1)) == NULL)
+    {
+      return ENOMEM;
+    }
+  return 0;
+}
+
+static void
+xd3_swap_whole_state (xd3_whole_state *a, 
+		      xd3_whole_state *b)
+{
+  xd3_whole_state tmp;
+  XD3_ASSERT (a->inst != NULL && a->adds != NULL);
+  XD3_ASSERT (b->inst != NULL && b->adds != NULL);
+  XD3_ASSERT (b->wininfo != NULL && b->wininfo != NULL);
+  memcpy (&tmp, a, sizeof (xd3_whole_state));
+  memcpy (a, b, sizeof (xd3_whole_state));
+  memcpy (b, &tmp, sizeof (xd3_whole_state));
+}
+
+static int
+xd3_realloc_buffer (xd3_stream *stream,
+                    usize_t current_units,
+                    usize_t unit_size,
+                    usize_t new_units,
+                    usize_t *alloc_size,
+                    void **alloc_ptr)
+{
+  usize_t needed;
+  usize_t new_alloc;
+  usize_t cur_size;
+  uint8_t *new_buf;
+
+  needed = (current_units + new_units) * unit_size;
+
+  if (needed <= *alloc_size)
+    {
+      return 0;
+    }
+
+  cur_size = current_units * unit_size;
+  new_alloc = xd3_round_blksize (needed * 2, XD3_ALLOCSIZE);
+
+  if ((new_buf = (uint8_t*) xd3_alloc (stream, new_alloc, 1)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  if (cur_size != 0)
+    {
+      memcpy (new_buf, *alloc_ptr, cur_size);
+    }
+
+  if (*alloc_ptr != NULL)
+    {
+      xd3_free (stream, *alloc_ptr);
+    }
+
+  *alloc_size = new_alloc;
+  *alloc_ptr = new_buf;
+
+  return 0;
+}
+
+/* allocate one new output instruction */
+static int
+xd3_whole_alloc_winst (xd3_stream *stream,
+		       xd3_winst **winstp)
+{
+  int ret;
+
+  if ((ret = xd3_realloc_buffer (stream, 
+				 stream->whole_target.instlen, 
+				 sizeof (xd3_winst), 
+				 1, 
+				 & stream->whole_target.inst_alloc, 
+				 (void**) & stream->whole_target.inst))) 
+    { 
+      return ret; 
+    }
+
+  *winstp = &stream->whole_target.inst[stream->whole_target.instlen++];
+
+  return 0;
+}
+
+static int
+xd3_whole_alloc_adds (xd3_stream *stream,
+		      usize_t count)
+{
+  return xd3_realloc_buffer (stream,
+			     stream->whole_target.addslen,
+			     1,
+			     count,
+			     & stream->whole_target.adds_alloc,
+			     (void**) & stream->whole_target.adds);
+}
+
+static int
+xd3_whole_alloc_wininfo (xd3_stream *stream,
+			 xd3_wininfo **wininfop)
+{
+  int ret;
+
+  if ((ret = xd3_realloc_buffer (stream, 
+				 stream->whole_target.wininfolen, 
+				 sizeof (xd3_wininfo),
+				 1,
+				 & stream->whole_target.wininfo_alloc, 
+				 (void**) & stream->whole_target.wininfo))) 
+    { 
+      return ret; 
+    }
+
+  *wininfop = &stream->whole_target.wininfo[stream->whole_target.wininfolen++];
+
+  return 0;
+}
+
+static int
+xd3_whole_append_inst (xd3_stream *stream,
+                       xd3_hinst *inst)
+{
+  int ret;
+  xd3_winst *winst;
+
+  if ((ret = xd3_whole_alloc_winst (stream, &winst)))
+    {
+      return ret;
+    }
+
+  winst->type = inst->type;
+  winst->mode = 0;
+  winst->size = inst->size;
+  winst->position = stream->whole_target.length;
+  stream->whole_target.length += inst->size;
+
+  if (((inst->type == XD3_ADD) || (inst->type == XD3_RUN)) &&
+      (ret = xd3_whole_alloc_adds (stream, 
+				   (inst->type == XD3_RUN ? 1 : inst->size))))
+    {
+      return ret;
+    }
+
+  switch (inst->type)
+    {
+    case XD3_RUN:
+      winst->addr = stream->whole_target.addslen;
+      stream->whole_target.adds[stream->whole_target.addslen++] =
+        *stream->data_sect.buf++;
+      break;
+
+    case XD3_ADD:
+      winst->addr = stream->whole_target.addslen;
+      memcpy (stream->whole_target.adds + stream->whole_target.addslen,
+              stream->data_sect.buf,
+              inst->size);
+      stream->data_sect.buf += inst->size;
+      stream->whole_target.addslen += inst->size;
+      break;
+
+    default:
+      if (inst->addr < stream->dec_cpylen)
+	{
+	  winst->mode = SRCORTGT (stream->dec_win_ind);
+	  winst->addr = stream->dec_cpyoff + inst->addr;
+	}
+      else
+	{
+	  winst->addr = (stream->dec_winstart + 
+			 inst->addr - 
+			 stream->dec_cpylen);
+	}
+      break;
+    }
+
+  return 0;
+}
+
+int
+xd3_whole_append_window (xd3_stream *stream)
+{
+  int ret;
+  xd3_wininfo *wininfo;
+
+  if ((ret = xd3_whole_alloc_wininfo (stream, &wininfo))) { return ret; }
+
+  wininfo->length = stream->dec_tgtlen;
+  wininfo->offset = stream->dec_winstart;
+  wininfo->adler32 = stream->dec_adler32;
+
+  while (stream->inst_sect.buf < stream->inst_sect.buf_max)
+    {
+      if ((ret = xd3_decode_instruction (stream)))
+	{
+	  return ret;
+	}
+
+      if ((stream->dec_current1.type != XD3_NOOP) &&
+          (ret = xd3_whole_append_inst (stream,
+					& stream->dec_current1)))
+	{
+	  return ret;
+	}
+
+      if ((stream->dec_current2.type != XD3_NOOP) &&
+	  (ret = xd3_whole_append_inst (stream,
+					& stream->dec_current2)))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/* xd3_merge_input_output applies *source to *stream, returns the
+ * result in stream. */
+static int xd3_merge_input_output (xd3_stream *stream,
+				   xd3_whole_state *source)
+{
+  int ret;
+  xd3_stream tmp_stream;
+  memset (& tmp_stream, 0, sizeof (tmp_stream));
+  if ((ret = xd3_config_stream (& tmp_stream, NULL)) ||
+      (ret = xd3_whole_state_init (& tmp_stream)) ||
+      (ret = xd3_merge_inputs (& tmp_stream, 
+			       source,
+			       & stream->whole_target)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (&tmp_stream, ret));
+      return ret;
+    }
+
+  /* the output is in tmp_stream.whole_state, swap into input */
+  xd3_swap_whole_state (& stream->whole_target,
+			& tmp_stream.whole_target);
+  /* total allocation counts are preserved */
+  xd3_free_stream (& tmp_stream);
+  return 0;
+}
+
+static int
+xd3_merge_run (xd3_stream *stream,
+	       xd3_whole_state *target,
+	       xd3_winst *iinst)
+{
+  int ret;
+  xd3_winst *oinst;
+
+  if ((ret = xd3_whole_alloc_winst (stream, &oinst)) ||
+      (ret = xd3_whole_alloc_adds (stream, 1)))
+    {
+      return ret;
+    }
+
+  oinst->type = iinst->type;
+  oinst->mode = iinst->mode;
+  oinst->size = iinst->size;
+  oinst->addr = stream->whole_target.addslen;
+
+  XD3_ASSERT (stream->whole_target.length == iinst->position);
+  oinst->position = stream->whole_target.length;
+  stream->whole_target.length += iinst->size;
+
+  stream->whole_target.adds[stream->whole_target.addslen++] = 
+    target->adds[iinst->addr];
+
+  return 0;
+}
+
+static int
+xd3_merge_add (xd3_stream *stream,
+	       xd3_whole_state *target,
+	       xd3_winst *iinst)
+{
+  int ret;
+  xd3_winst *oinst;
+
+  if ((ret = xd3_whole_alloc_winst (stream, &oinst)) ||
+      (ret = xd3_whole_alloc_adds (stream, iinst->size)))
+    {
+      return ret;
+    }
+
+  oinst->type = iinst->type;
+  oinst->mode = iinst->mode;
+  oinst->size = iinst->size;
+  oinst->addr = stream->whole_target.addslen;
+
+  XD3_ASSERT (stream->whole_target.length == iinst->position);
+  oinst->position = stream->whole_target.length;
+  stream->whole_target.length += iinst->size;
+
+  memcpy(stream->whole_target.adds + stream->whole_target.addslen,
+	 target->adds + iinst->addr,
+	 iinst->size);
+
+  stream->whole_target.addslen += iinst->size;
+
+  return 0;
+}
+
+static int
+xd3_merge_target_copy (xd3_stream *stream,
+		       xd3_winst *iinst)
+{
+  int ret;
+  xd3_winst *oinst;
+
+  if ((ret = xd3_whole_alloc_winst (stream, &oinst)))
+    {
+      return ret;
+    }
+
+  XD3_ASSERT (stream->whole_target.length == iinst->position);
+
+  memcpy (oinst, iinst, sizeof (*oinst));
+  return 0;
+}
+
+static int
+xd3_merge_find_position (xd3_stream *stream,
+			 xd3_whole_state *source,
+			 xoff_t address,
+			 usize_t *inst_num)
+{
+  usize_t low;
+  usize_t high;
+
+  if (address >= source->length)
+    {
+      stream->msg = "Invalid copy offset in merge";
+      return XD3_INVALID_INPUT;
+    }
+
+  low = 0;
+  high = source->instlen;
+
+  while (low != high)
+    {
+      xoff_t mid_lpos;
+      xoff_t mid_hpos;
+      usize_t mid = low + (high - low) / 2;
+      mid_lpos = source->inst[mid].position;
+
+      if (address < mid_lpos)
+	{
+	  high = mid;
+	  continue;
+	}
+      
+      mid_hpos = mid_lpos + source->inst[mid].size;
+
+      if (address >= mid_hpos)
+	{
+	  low = mid + 1;
+	  continue;
+	}
+
+      *inst_num = mid;
+      return 0;
+    }
+
+  stream->msg = "Internal error in merge";
+  return XD3_INTERNAL;
+}
+
+static int
+xd3_merge_source_copy (xd3_stream *stream,
+		       xd3_whole_state *source,
+		       const xd3_winst *iinst_orig)
+{
+  int ret;
+  xd3_winst iinst;
+  usize_t sinst_num;
+
+  memcpy (& iinst, iinst_orig, sizeof (iinst));
+
+  XD3_ASSERT (iinst.mode == VCD_SOURCE);
+
+  if ((ret = xd3_merge_find_position (stream, source, 
+				      iinst.addr, &sinst_num)))
+    {
+      return ret;
+    }
+
+  while (iinst.size > 0)
+    {
+      xd3_winst *sinst;
+      xd3_winst *minst;
+      usize_t sinst_offset;
+      usize_t sinst_left;
+      usize_t this_take;
+
+      XD3_ASSERT (sinst_num < source->instlen);
+
+      sinst = &source->inst[sinst_num];
+
+      XD3_ASSERT (iinst.addr >= sinst->position);
+
+      sinst_offset = (usize_t)(iinst.addr - sinst->position);
+
+      XD3_ASSERT (sinst->size > sinst_offset);
+
+      sinst_left = sinst->size - sinst_offset;
+      this_take = xd3_min (iinst.size, sinst_left);
+
+      XD3_ASSERT (this_take > 0);
+
+      if ((ret = xd3_whole_alloc_winst (stream, &minst)))
+	{
+	  return ret;
+	}
+
+      minst->size = this_take;
+      minst->type = sinst->type;
+      minst->position = iinst.position;
+      minst->mode = 0;
+
+      switch (sinst->type)
+	{
+	case XD3_RUN:
+	  if ((ret = xd3_whole_alloc_adds (stream, 1)))
+	    {
+	      return ret;
+	    }
+
+	  minst->addr = stream->whole_target.addslen;
+	  stream->whole_target.adds[stream->whole_target.addslen++] = 
+	    source->adds[sinst->addr];
+	  break;
+	case XD3_ADD:
+	  if ((ret = xd3_whole_alloc_adds (stream, this_take)))
+	    {
+	      return ret;
+	    }
+
+	  minst->addr = stream->whole_target.addslen;
+	  memcpy(stream->whole_target.adds + stream->whole_target.addslen,
+		 source->adds + sinst->addr + sinst_offset,
+		 this_take);
+	  stream->whole_target.addslen += this_take;
+	  break;
+	default:
+	  if (sinst->mode != 0)
+	    {
+	      minst->mode = sinst->mode;
+	      minst->addr = sinst->addr + sinst_offset;
+	    }
+	  else
+	    {
+	      // Note: A better implementation will construct the
+	      // mapping of output ranges, starting from the input
+	      // range, applying deltas in forward order, using an
+	      // interval tree.  This code uses recursion to construct
+	      // each copied range, recursively (using binary search
+	      // in xd3_merge_find_position).
+	      //
+	      // TODO: This code can cause stack overflow. Fix as
+	      // described above.
+	      xd3_winst tinst;
+	      tinst.type = XD3_CPY;
+	      tinst.mode = iinst.mode;
+	      tinst.addr = sinst->addr + sinst_offset;
+	      tinst.size = this_take;
+	      tinst.position = iinst.position;
+
+	      // The instruction allocated in this frame will not be used.
+	      stream->whole_target.instlen -= 1;
+
+	      if ((ret = xd3_merge_source_copy (stream, source, &tinst)))
+		{ 
+		  return ret;
+		}
+	    }
+	  break;
+	}
+
+      iinst.position += this_take;
+      iinst.addr += this_take;
+      iinst.size -= this_take;
+      sinst_num += 1;
+    }
+
+  return 0;
+}
+
+/* xd3_merge_inputs() applies *input to *source, returns its result in
+ * stream. */
+int xd3_merge_inputs (xd3_stream *stream, 
+		      xd3_whole_state *source,
+		      xd3_whole_state *input)
+{
+  int ret = 0;
+  usize_t i;
+  size_t input_i;
+
+  for (i = 0; i < input->wininfolen; ++i) {
+    xd3_wininfo *copyinfo;
+
+    if ((ret = xd3_whole_alloc_wininfo (stream, &copyinfo))) { return ret; }
+
+    *copyinfo = input->wininfo[i];
+  }
+
+  /* iterate over each instruction. */
+  for (input_i = 0; ret == 0 && input_i < input->instlen; ++input_i)
+    {
+      xd3_winst *iinst = &input->inst[input_i];
+
+      switch (iinst->type)
+	{
+	case XD3_RUN:
+	  ret = xd3_merge_run (stream, input, iinst);
+	  break;
+	case XD3_ADD:
+	  ret = xd3_merge_add (stream, input, iinst);
+	  break;
+	default:
+	  if (iinst->mode == 0)
+	    {
+	      ret = xd3_merge_target_copy (stream, iinst);
+	    }
+	  else if (iinst->mode == VCD_TARGET)
+	    {
+	      ret = XD3_INVALID_INPUT;
+	    }
+	  else
+	    {
+	      ret = xd3_merge_source_copy (stream, source, iinst);
+	    }
+
+	  /* The whole_target.length is not updated in the xd3_merge*copy
+	   * routine because of recursion in xd3_merge_source_copy. */
+	  stream->whole_target.length += iinst->size;
+	  break;
+	}
+    }
+  
+  return ret;
+}
+
+#endif
diff --git a/third-party/xdelta3/xdelta3/xdelta3-second.h b/third-party/xdelta3/xdelta3/xdelta3-second.h
new file mode 100644
index 0000000000..8dc5b47738
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-second.h
@@ -0,0 +1,321 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+#ifndef _XDELTA3_SECOND_H_
+#define _XDELTA3_SECOND_H_
+
+static inline void xd3_bit_state_encode_init (bit_state *bits)
+{
+  bits->cur_byte = 0;
+  bits->cur_mask = 1;
+}
+
+static inline int xd3_decode_bits (xd3_stream     *stream,
+				   bit_state      *bits,
+				   const uint8_t **input,
+				   const uint8_t  *input_max,
+				   usize_t         nbits,
+				   usize_t        *valuep)
+{
+  usize_t value = 0;
+  usize_t vmask = 1 << nbits;
+
+  if (bits->cur_mask == 0x100) { goto next_byte; }
+
+  for (;;)
+    {
+      do
+	{
+	  vmask >>= 1;
+
+	  if (bits->cur_byte & bits->cur_mask)
+	    {
+	      value |= vmask;
+	    }
+
+	  bits->cur_mask <<= 1;
+
+	  if (vmask == 1) { goto done; }
+	}
+      while (bits->cur_mask != 0x100);
+
+    next_byte:
+
+      if (*input == input_max)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return XD3_INTERNAL;
+	}
+
+      bits->cur_byte = *(*input)++;
+      bits->cur_mask = 1;
+    }
+
+ done:
+
+  IF_DEBUG2 (DP(RINT "(d) %"W"u ", value));
+
+  (*valuep) = value;
+  return 0;
+}
+
+#if REGRESSION_TEST
+/* There may be extra bits at the end of secondary decompression, this macro
+ * checks for non-zero bits.  This is overly strict, but helps pass the
+ * single-bit-error regression test. */
+static int
+xd3_test_clean_bits (xd3_stream *stream, bit_state *bits)
+{
+  for (; bits->cur_mask != 0x100; bits->cur_mask <<= 1)
+    {
+      if (bits->cur_byte & bits->cur_mask)
+	{
+	  stream->msg = "secondary decoder garbage";
+	  return XD3_INTERNAL;
+	}
+    }
+
+  return 0;
+}
+#endif
+
+static int
+xd3_get_secondary (xd3_stream *stream, xd3_sec_stream **sec_streamp, 
+		   int is_encode)
+{
+  if (*sec_streamp == NULL)
+    {
+      int ret;
+
+      if ((*sec_streamp = stream->sec_type->alloc (stream)) == NULL)
+	{
+	  stream->msg = "error initializing secondary stream";
+	  return XD3_INVALID;
+	}
+
+      if ((ret = stream->sec_type->init (stream, *sec_streamp, is_encode)) != 0)
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+static int
+xd3_decode_secondary (xd3_stream      *stream,
+		      xd3_desect      *sect,
+		      xd3_sec_stream **sec_streamp)
+{
+  usize_t dec_size;
+  uint8_t *out_used;
+  int ret;
+
+  if ((ret = xd3_get_secondary (stream, sec_streamp, 0)) != 0)
+    {
+      return ret;
+    }
+
+  /* Decode the size, allocate the buffer. */
+  if ((ret = xd3_read_size (stream, & sect->buf,
+			    sect->buf_max, & dec_size)) ||
+      (ret = xd3_decode_allocate (stream, dec_size,
+				  & sect->copied2, & sect->alloc2)))
+    {
+      return ret;
+    }
+
+  if (dec_size == 0)
+    {
+      stream->msg = "secondary decoder invalid output size";
+      return XD3_INVALID_INPUT;
+    }
+
+  out_used = sect->copied2;
+
+  if ((ret = stream->sec_type->decode (stream, *sec_streamp,
+				       & sect->buf, sect->buf_max,
+				       & out_used, out_used + dec_size)))
+    {
+      return ret;
+    }
+
+  if (sect->buf != sect->buf_max)
+    {
+      stream->msg = "secondary decoder finished with unused input";
+      return XD3_INTERNAL;
+    }
+
+  if (out_used != sect->copied2 + dec_size)
+    {
+      stream->msg = "secondary decoder short output";
+      return XD3_INTERNAL;
+    }
+
+  sect->buf = sect->copied2;
+  sect->buf_max = sect->copied2 + dec_size;
+  sect->size = dec_size;
+
+  return 0;
+}
+
+#if XD3_ENCODER
+static inline int xd3_encode_bit (xd3_stream      *stream,
+				  xd3_output     **output,
+				  bit_state       *bits,
+				  usize_t          bit)
+{
+  int ret;
+
+  if (bit)
+    {
+      bits->cur_byte |= bits->cur_mask;
+    }
+
+  /* OPT: Might help to buffer more than 8 bits at once. */
+  if (bits->cur_mask == 0x80)
+    {
+      if ((ret = xd3_emit_byte (stream, output, bits->cur_byte)) != 0)
+	{
+	  return ret;
+	}
+
+      bits->cur_mask = 1;
+      bits->cur_byte = 0;
+    }
+  else
+    {
+      bits->cur_mask <<= 1;
+    }
+
+  return 0;
+}
+
+static inline int xd3_flush_bits (xd3_stream      *stream,
+				  xd3_output     **output,
+				  bit_state       *bits)
+{
+  return (bits->cur_mask == 1) ? 0 :
+    xd3_emit_byte (stream, output, bits->cur_byte);
+}
+
+static inline int xd3_encode_bits (xd3_stream      *stream,
+				   xd3_output     **output,
+				   bit_state       *bits,
+				   usize_t           nbits,
+				   usize_t           value)
+{
+  int ret;
+  usize_t mask = 1 << nbits;
+
+  XD3_ASSERT (nbits > 0);
+  XD3_ASSERT (nbits < sizeof (usize_t) * 8);
+  XD3_ASSERT (value < mask);
+
+  do
+    {
+      mask >>= 1;
+
+      if ((ret = xd3_encode_bit (stream, output, bits, value & mask)))
+	{
+	  return ret;
+	}
+    }
+  while (mask != 1);
+
+  IF_DEBUG2 (DP(RINT "(e) %"W"u ", value));
+
+  return 0;
+}
+
+static int
+xd3_encode_secondary (xd3_stream      *stream,
+		      xd3_output     **head,
+		      xd3_output     **tail,
+		      xd3_sec_stream **sec_streamp,
+		      xd3_sec_cfg     *cfg,
+		      int             *did_it)
+{
+  xd3_output     *tmp_head;
+  xd3_output     *tmp_tail;
+
+  usize_t comp_size;
+  usize_t orig_size;
+
+  int ret;
+
+  orig_size = xd3_sizeof_output (*head);
+
+  if (orig_size < SECONDARY_MIN_INPUT) { return 0; }
+
+  if ((ret = xd3_get_secondary (stream, sec_streamp, 1)) != 0)
+    {
+      return ret;
+    }
+
+  tmp_head = xd3_alloc_output (stream, NULL);
+
+  /* Encode the size, encode the data.  Encoding the size makes it
+   * simpler, but is a little gross.  Should not need the entire
+   * section in contiguous memory, but it is much easier this way. */
+  if ((ret = xd3_emit_size (stream, & tmp_head, orig_size)) ||
+      (ret = stream->sec_type->encode (stream, *sec_streamp, *head,
+				       tmp_head, cfg)))
+    {
+      goto getout;
+    }
+
+  /* If the secondary compressor determines it's no good, it returns
+   * XD3_NOSECOND. */
+
+  /* Setup tmp_tail, comp_size */
+  tmp_tail  = tmp_head;
+  comp_size = tmp_head->next;
+
+  while (tmp_tail->next_page != NULL)
+    {
+      tmp_tail = tmp_tail->next_page;
+      comp_size += tmp_tail->next;
+    }
+
+  XD3_ASSERT (comp_size == xd3_sizeof_output (tmp_head));
+  XD3_ASSERT (tmp_tail != NULL);
+
+  if (comp_size < (orig_size - SECONDARY_MIN_SAVINGS) || cfg->inefficient)
+    {
+      if (comp_size < orig_size)
+	{
+	  IF_DEBUG1(DP(RINT "[encode_secondary] saved %"W"u bytes: %"W"u -> %"W"u (%0.2f%%)\n",
+		       orig_size - comp_size, orig_size, comp_size,
+		       100.0 * (double) comp_size / (double) orig_size));
+	}
+
+      xd3_free_output (stream, *head);
+
+      *head = tmp_head;
+      *tail = tmp_tail;
+      *did_it = 1;
+    }
+  else
+    {
+    getout:
+      if (ret == XD3_NOSECOND) { ret = 0; }
+      xd3_free_output (stream, tmp_head);
+    }
+
+  return ret;
+}
+#endif /* XD3_ENCODER */
+#endif /* _XDELTA3_SECOND_H_ */
diff --git a/third-party/xdelta3/xdelta3/xdelta3-test.h b/third-party/xdelta3/xdelta3/xdelta3-test.h
new file mode 100644
index 0000000000..5d6cf45dc8
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3-test.h
@@ -0,0 +1,3022 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+static const uint32_t TEST_SEED1 = 5489UL;
+#define MT_LEN 624
+#define MT_IA 397
+static const uint32_t UPPER_MASK = 0x80000000;
+static const uint32_t LOWER_MASK = 0x7FFFFFFF;
+static const uint32_t MATRIX_A = 0x9908B0DF;
+
+#ifndef SHELL_TESTS
+#define SHELL_TESTS 1
+#endif
+
+typedef struct mtrand mtrand;
+
+struct mtrand {
+  int mt_index_;
+  uint32_t mt_buffer_[MT_LEN];
+};
+
+int test_compare_files (const char* tgt, const char *rec);
+void mt_init(mtrand *mt, uint32_t seed);
+uint32_t mt_random (mtrand *mt);
+int test_setup (void);
+
+/* The Mersenne Twister code used herein is code to Michael Brundage.  Thanks!
+ * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html
+ */
+void mt_init(mtrand *mt, uint32_t seed) {
+  int i;
+  mt->mt_buffer_[0] = seed;
+  mt->mt_index_ = MT_LEN;
+  for (i = 1; i < MT_LEN; i++) {
+    /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+    /* In the previous versions, MSBs of the seed affect   */
+    /* only MSBs of the array mt[].                        */
+    /* 2002/01/09 modified by Makoto Matsumoto             */
+    mt->mt_buffer_[i] =
+	(1812433253UL * (mt->mt_buffer_[i-1] ^
+			 (mt->mt_buffer_[i-1] >> 30)) + i);
+  }
+}
+
+uint32_t mt_random (mtrand *mt) {
+  uint32_t y;
+  unsigned long mag01[2];
+  mag01[0] = 0;
+  mag01[1] = MATRIX_A;
+
+  if (mt->mt_index_ >= MT_LEN) {
+    int kk;
+
+    for (kk = 0; kk < MT_LEN - MT_IA; kk++) {
+      y = (mt->mt_buffer_[kk] & UPPER_MASK) |
+	(mt->mt_buffer_[kk + 1] & LOWER_MASK);
+      mt->mt_buffer_[kk] = mt->mt_buffer_[kk + MT_IA] ^
+	(y >> 1) ^ mag01[y & 0x1UL];
+    }
+    for (;kk < MT_LEN - 1; kk++) {
+      y = (mt->mt_buffer_[kk] & UPPER_MASK) |
+	(mt->mt_buffer_[kk + 1] & LOWER_MASK);
+      mt->mt_buffer_[kk] = mt->mt_buffer_[kk + (MT_IA - MT_LEN)] ^
+	(y >> 1) ^ mag01[y & 0x1UL];
+    }
+    y = (mt->mt_buffer_[MT_LEN - 1] & UPPER_MASK) |
+      (mt->mt_buffer_[0] & LOWER_MASK);
+    mt->mt_buffer_[MT_LEN - 1] = mt->mt_buffer_[MT_IA - 1] ^
+      (y >> 1) ^ mag01[y & 0x1UL];
+    mt->mt_index_ = 0;
+  }
+
+  y = mt->mt_buffer_[mt->mt_index_++];
+
+  y ^= (y >> 11);
+  y ^= (y << 7) & 0x9d2c5680UL;
+  y ^= (y << 15) & 0xefc60000UL;
+  y ^= (y >> 18);
+
+  return y;
+}
+
+static mtrand static_mtrand;
+
+#include <math.h>
+
+static uint32_t
+mt_exp_rand (uint32_t mean, uint32_t max_value)
+{
+  double mean_d = mean;
+  double erand  = log (1.0 / (mt_random (&static_mtrand) /
+			      (double)UINT32_MAX));
+  uint32_t x = (uint32_t) (mean_d * erand + 0.5);
+
+  return xd3_min (x, max_value);
+}
+
+#if SHELL_TESTS
+#include <sys/wait.h>
+#endif
+
+#define MSG_IS(x) (stream->msg != NULL && strcmp ((x), stream->msg) == 0)
+
+static const usize_t TWO_MEGS_AND_DELTA = (3 << 20);
+static const usize_t ADDR_CACHE_ROUNDS = 10000;
+
+static const usize_t TEST_FILE_MEAN   = 16384;
+static const double TEST_ADD_MEAN     = 128;
+static const double TEST_ADD_MAX      = 512;
+static const double TEST_ADD_RATIO    = 0.1;
+static const double TEST_EPSILON      = 0.25;
+
+#define TESTBUFSIZE (1024 * 16)
+
+#define TESTFILESIZE (1024)
+
+static char   TEST_TARGET_FILE[TESTFILESIZE];
+static char   TEST_SOURCE_FILE[TESTFILESIZE];
+static char   TEST_DELTA_FILE[TESTFILESIZE];
+static char   TEST_RECON_FILE[TESTFILESIZE];
+static char   TEST_RECON2_FILE[TESTFILESIZE];
+static char   TEST_COPY_FILE[TESTFILESIZE];
+static char   TEST_NOPERM_FILE[TESTFILESIZE];
+
+#define CHECK(cond)						\
+  if (!(cond)) {						\
+    XPR(NT __FILE__":%d: check failure: " #cond, __LINE__);	\
+    abort(); }
+
+#if SHELL_TESTS
+/* Use a fixed soft config so that test values are fixed.  See also
+ * test_compress_text(). */
+static const char* test_softcfg_str = "-C9,3,4,8,2,36,70";
+#endif
+
+/***********************************************************************
+ TEST HELPERS
+ ***********************************************************************/
+
+static void DOT (void) { XPR(NTR "."); }
+static int do_cmd (xd3_stream *stream, const char *buf)
+{
+  int ret;
+  if ((ret = system (buf)) != 0)
+    {
+      if (WIFEXITED (ret))
+	{
+	  stream->msg = "command exited non-zero";
+	  IF_DEBUG1 (XPR(NT "command was: %s\n", buf));
+	}
+      else
+	{
+	  stream->msg = "abnormal command termination";
+	}
+      return ret;
+    }
+  return 0;
+}
+
+static int do_fail (xd3_stream *stream, const char *buf)
+{
+  int ret;
+  ret = system (buf);
+  if (! WIFEXITED (ret) || WEXITSTATUS (ret) != 1)
+    {
+      stream->msg = "command should have not succeeded";
+      XPR(NT "command was %s\n", buf);
+      return XD3_INTERNAL;
+    }
+  return 0;
+}
+
+/* Test that the exponential distribution actually produces its mean. */
+static int
+test_random_numbers (xd3_stream *stream, int ignore)
+{
+  usize_t i;
+  usize_t sum = 0;
+  usize_t mean = 50;
+  usize_t n_rounds = 1000000;
+  double average, error;
+  double allowed_error = 0.1;
+
+  mt_init (& static_mtrand, 0x9f73f7fe);
+
+  for (i = 0; i < n_rounds; i += 1)
+    {
+      sum += mt_exp_rand (mean, UINT32_MAX);
+    }
+
+  average = (double) sum / (double) n_rounds;
+  error   = average - (double) mean;
+
+  if (error < allowed_error && error > -allowed_error)
+    {
+      return 0;
+    }
+
+  /*XPR(NT "error is %f\n", error);*/
+  stream->msg = "random distribution looks broken";
+  return XD3_INTERNAL;
+}
+
+static int
+test_printf_xoff (xd3_stream *stream, int ignore)
+{
+  char buf[64];
+  xoff_t x = XOFF_T_MAX;
+  snprintf_func (buf, sizeof(buf), "%"Q"u", x);
+  const char *expect = XD3_USE_LARGEFILE64 ?
+    "18446744073709551615" : "4294967295";
+  if (strcmp (buf, expect) == 0) {
+    return 0;
+  }
+  return XD3_INTERNAL;
+}
+
+static void
+test_unlink (char* file)
+{
+  int ret;
+  if (file != NULL && *file != 0 &&
+      (ret = unlink (file)) != 0 && errno != ENOENT)
+    {
+      XPR(NT "unlink %s failed: %s\n", file, strerror(ret));
+    }
+}
+
+static void
+test_cleanup (void)
+{
+#if 1
+  test_unlink (TEST_TARGET_FILE);
+  test_unlink (TEST_SOURCE_FILE);
+  test_unlink (TEST_DELTA_FILE);
+  test_unlink (TEST_RECON_FILE);
+  test_unlink (TEST_RECON2_FILE);
+  test_unlink (TEST_COPY_FILE);
+  test_unlink (TEST_NOPERM_FILE);
+#endif
+}
+
+int test_setup (void)
+{
+  static int x = 0;
+  pid_t pid = getpid();
+  x++;
+
+  test_cleanup();
+
+  snprintf_func (TEST_TARGET_FILE, TESTFILESIZE,
+		 "/tmp/xdtest.%d.target.%d", pid, x);
+  snprintf_func (TEST_SOURCE_FILE, TESTFILESIZE,
+		 "/tmp/xdtest.%d.source.%d", pid, x);
+  snprintf_func (TEST_DELTA_FILE, TESTFILESIZE,
+		 "/tmp/xdtest.%d.delta.%d", pid, x);
+  snprintf_func (TEST_RECON_FILE, TESTFILESIZE,
+		 "/tmp/xdtest.%d.recon.%d", pid, x);
+  snprintf_func (TEST_RECON2_FILE, TESTFILESIZE,
+		 "/tmp/xdtest.%d.recon2.%d", pid, x);
+  snprintf_func (TEST_COPY_FILE, TESTFILESIZE,
+		 "/tmp/xdtest.%d.copy.%d", pid, x);
+  snprintf_func (TEST_NOPERM_FILE, TESTFILESIZE,
+		 "/tmp/xdtest.%d.noperm.%d", pid, x);
+
+  test_cleanup();
+  return 0;
+}
+
+static int
+test_make_inputs (xd3_stream *stream, xoff_t *ss_out, xoff_t *ts_out)
+{
+  usize_t ts = (mt_random (&static_mtrand) % TEST_FILE_MEAN) +
+    TEST_FILE_MEAN / 2;
+  usize_t ss = (mt_random (&static_mtrand) % TEST_FILE_MEAN) +
+    TEST_FILE_MEAN / 2;
+  uint8_t *buf = (uint8_t*) malloc (ts + ss), *sbuf = buf, *tbuf = buf + ss;
+  usize_t sadd = 0, sadd_max = (usize_t)(ss * TEST_ADD_RATIO);
+  FILE  *tf = NULL, *sf = NULL;
+  usize_t i, j;
+  int ret;
+
+  if (buf == NULL) { return ENOMEM; }
+
+  if ((tf = fopen (TEST_TARGET_FILE, "w")) == NULL ||
+      (ss_out != NULL && (sf = fopen (TEST_SOURCE_FILE, "w")) == NULL))
+    {
+      stream->msg = "write failed";
+      ret = get_errno ();
+      goto failure;
+    }
+
+  if (ss_out != NULL)
+    {
+      for (i = 0; i < ss; )
+	{
+	  sbuf[i++] = (uint8_t) mt_random (&static_mtrand);
+	}
+    }
+
+  /* Then modify the data to produce copies, everything not copied is
+   * an add.  The following logic produces the TEST_ADD_RATIO.  The
+   * variable SADD contains the number of adds so far, which should
+   * not exceed SADD_MAX. */
+
+  /* XPR(NT "ss = %u ts = %u\n", ss, ts); */
+  for (i = 0; i < ts; )
+    {
+      usize_t left = ts - i;
+      usize_t next = mt_exp_rand ((uint32_t) TEST_ADD_MEAN,
+				  (uint32_t) TEST_ADD_MAX);
+      usize_t add_left = sadd_max - sadd;
+      double add_prob = (left == 0) ? 0 : (add_left / (double) left);
+      int do_copy;
+
+      next = xd3_min (left, next);
+      do_copy = (next > add_left ||
+		 (mt_random (&static_mtrand) / \
+		  (double)USIZE_T_MAX) >= add_prob);
+
+      if (ss_out == NULL)
+	{
+	  do_copy &= (i > 0);
+	}
+      else
+	{
+	  do_copy &= (ss - next) > 0;
+	}
+
+      if (do_copy)
+	{
+	  /* Copy */
+	  size_t offset = mt_random (&static_mtrand) % ((ss_out == NULL) ?
+							i :
+							(ss - next));
+	  /* XPR(NT "[%u] copy %u at %u ", i, next, offset); */
+
+	  for (j = 0; j < next; j += 1)
+	    {
+	      char c = ((ss_out == NULL) ? tbuf : sbuf)[offset + j];
+	      /* XPR(NT "%x%x", (c >> 4) & 0xf, c & 0xf); */
+	      tbuf[i++] = c;
+	    }
+	  /* XPR(NT "\n"); */
+	}
+      else
+	{
+	  /* Add */
+	  /* XPR(NT "[%u] add %u ", i, next); */
+	  for (j = 0; j < next; j += 1)
+	    {
+	      char c = (char) mt_random (&static_mtrand);
+	      /* XPR(NT "%x%x", (c >> 4) & 0xf, c & 0xf); */
+	      tbuf[i++] = c;
+	    }
+	  /* XPR(NT "\n"); */
+	  sadd += next;
+	}
+    }
+
+  /* XPR(NT "sadd = %u max = %u\n", sadd, sadd_max); */
+
+  if ((fwrite (tbuf, 1, ts, tf) != ts) ||
+      (ss_out != NULL && (fwrite (sbuf, 1, ss, sf) != ss)))
+    {
+      stream->msg = "write failed";
+      ret = get_errno ();
+      goto failure;
+    }
+
+  if ((ret = fclose (tf)) || (ss_out != NULL && (ret = fclose (sf))))
+    {
+      stream->msg = "close failed";
+      ret = get_errno ();
+      goto failure;
+    }
+
+  if (ts_out) { (*ts_out) = ts; }
+  if (ss_out) { (*ss_out) = ss; }
+
+ failure:
+  free (buf);
+  return ret;
+}
+
+int
+test_compare_files (const char* tgt, const char *rec)
+{
+  FILE *orig, *recons;
+  static uint8_t obuf[TESTBUFSIZE], rbuf[TESTBUFSIZE];
+  xoff_t offset = 0;
+  size_t i;
+  size_t oc, rc;
+  xoff_t diffs = 0;
+
+  if ((orig = fopen (tgt, "r")) == NULL)
+    {
+      XPR(NT "open %s failed\n", tgt);
+      return get_errno ();
+    }
+
+  if ((recons = fopen (rec, "r")) == NULL)
+    {
+      XPR(NT "open %s failed\n", rec);
+      return get_errno ();
+    }
+
+  for (;;)
+    {
+      oc = fread (obuf, 1, TESTBUFSIZE, orig);
+      rc = fread (rbuf, 1, TESTBUFSIZE, recons);
+
+      if (oc != rc)
+	{
+	  return XD3_INTERNAL;
+	}
+
+      if (oc == 0)
+	{
+	  break;
+	}
+
+      for (i = 0; i < oc; i += 1)
+	{
+	  if (obuf[i] != rbuf[i])
+ 	    {
+	      XPR(NT "byte %u (read %u @ %"Q"u) %d != %d\n",
+		  (int)i, (int)oc, offset, obuf[i], rbuf[i]);
+	      diffs++;
+	      return XD3_INTERNAL;
+	    }
+	}
+
+      offset += oc;
+    }
+
+    fclose (orig);
+    fclose (recons);
+    if (diffs != 0)
+      {
+	return XD3_INTERNAL;
+      }
+    return 0;
+}
+
+static int
+test_copy_to (const char *from, const char *to)
+{
+  char buf[TESTBUFSIZE];
+  int ret;
+
+  snprintf_func (buf, TESTBUFSIZE, "cp -f %s %s", from, to);
+
+  if ((ret = system (buf)) != 0)
+    {
+      return XD3_INTERNAL;
+    }
+
+  return 0;
+}
+
+static int
+test_save_copy (const char *origname)
+{
+  return test_copy_to(origname, TEST_COPY_FILE);
+}
+
+static int
+test_file_size (const char* file, xoff_t *size)
+{
+  struct stat sbuf;
+  int ret;
+  (*size) = 0;
+
+  if (stat (file, & sbuf) < 0)
+    {
+      ret = get_errno ();
+      XPR(NT "stat failed: %s: %s\n", file, strerror (ret));
+      return ret;
+    }
+
+  if (! S_ISREG (sbuf.st_mode))
+    {
+      ret = XD3_INTERNAL;
+      XPR(NT "not a regular file: %s: %s\n", file, strerror (ret));
+      return ret;
+    }
+
+  (*size) = sbuf.st_size;
+  return 0;
+}
+
+/***********************************************************************
+ READ OFFSET
+ ***********************************************************************/
+
+/* Common test for read_integer errors: encodes a 64-bit value and
+ * then attempts to read as a 32-bit value.  If TRUNC is non-zero,
+ * attempts to get errors by shortening the input, otherwise it should
+ * overflow.  Expects XD3_INTERNAL and MSG. */
+static int
+test_read_integer_error (xd3_stream *stream, usize_t trunto, const char *msg)
+{
+  uint64_t eval = 1ULL << 34;
+  uint32_t rval;
+  xd3_output *buf = NULL;
+  const uint8_t *max;
+  const uint8_t *inp;
+  int ret;
+
+  buf = xd3_alloc_output (stream, buf);
+
+  if ((ret = xd3_emit_uint64_t (stream, & buf, eval)))
+    {
+      goto fail;
+    }
+
+ again:
+
+  inp = buf->base;
+  max = buf->base + buf->next - trunto;
+
+  if ((ret = xd3_read_uint32_t (stream, & inp, max, & rval)) !=
+      XD3_INVALID_INPUT ||
+      !MSG_IS (msg))
+    {
+      ret = XD3_INTERNAL;
+    }
+  else if (trunto && trunto < buf->next)
+    {
+      trunto += 1;
+      goto again;
+    }
+  else
+    {
+      ret = 0;
+    }
+
+ fail:
+  xd3_free_output (stream, buf);
+  return ret;
+}
+
+/* Test integer overflow using the above routine. */
+static int
+test_decode_integer_overflow (xd3_stream *stream, int unused)
+{
+  return test_read_integer_error (stream, 0, "overflow in read_intger");
+}
+
+/* Test integer EOI using the above routine. */
+static int
+test_decode_integer_end_of_input (xd3_stream *stream, int unused)
+{
+  return test_read_integer_error (stream, 1, "end-of-input in read_integer");
+}
+
+/* Test that emit_integer/decode_integer/sizeof_integer/read_integer
+ * work on correct inputs.  Tests powers of (2^7), plus or minus, up
+ * to the maximum value. */
+#define TEST_ENCODE_DECODE_INTEGER(TYPE,ONE,MAX) \
+  xd3_output *rbuf = NULL; \
+  xd3_output *dbuf = NULL; \
+  TYPE values[64]; \
+  usize_t nvalues = 0; \
+  usize_t i; \
+  int ret = 0; \
+ \
+  for (i = 0; i < (sizeof (TYPE) * 8); i += 7) \
+    { \
+      values[nvalues++] = (ONE << i) - ONE; \
+      values[nvalues++] = (ONE << i); \
+      values[nvalues++] = (ONE << i) + ONE; \
+    } \
+ \
+  values[nvalues++] = MAX-ONE; \
+  values[nvalues++] = MAX; \
+ \
+  rbuf = xd3_alloc_output (stream, rbuf); \
+  dbuf = xd3_alloc_output (stream, dbuf); \
+ \
+  for (i = 0; i < nvalues; i += 1) \
+    { \
+      const uint8_t *max; \
+      const uint8_t *inp; \
+      TYPE val;			\
+ \
+      DOT (); \
+      rbuf->next = 0; \
+ \
+      if ((ret = xd3_emit_ ## TYPE (stream, & rbuf, values[i])) || \
+	  (ret = xd3_emit_ ## TYPE (stream, & dbuf, values[i]))) \
+	{ \
+	  goto fail; \
+	} \
+ \
+      inp = rbuf->base; \
+      max = rbuf->base + rbuf->next; \
+ \
+      if (rbuf->next != xd3_sizeof_ ## TYPE (values[i])) \
+	{ \
+	  ret = XD3_INTERNAL; \
+	  goto fail; \
+	} \
+ \
+      if ((ret = xd3_read_ ## TYPE (stream, & inp, max, & val))) \
+	{ \
+	  goto fail; \
+	} \
+ \
+      if (val != values[i]) \
+	{ \
+	  ret = XD3_INTERNAL; \
+	  goto fail; \
+	} \
+ \
+      DOT (); \
+    } \
+ \
+  stream->next_in  = dbuf->base; \
+  stream->avail_in = dbuf->next; \
+ \
+  for (i = 0; i < nvalues; i += 1) \
+    { \
+      TYPE val; \
+ \
+      if ((ret = xd3_decode_ ## TYPE (stream, & val))) \
+        { \
+          goto fail; \
+        } \
+ \
+      if (val != values[i]) \
+        { \
+          ret = XD3_INTERNAL; \
+          goto fail; \
+        } \
+    } \
+ \
+  if (stream->avail_in != 0) \
+    { \
+      ret = XD3_INTERNAL; \
+      goto fail; \
+    } \
+ \
+ fail: \
+  xd3_free_output (stream, rbuf); \
+  xd3_free_output (stream, dbuf); \
+ \
+  return ret
+
+static int
+test_encode_decode_uint32_t (xd3_stream *stream, int unused)
+{
+  TEST_ENCODE_DECODE_INTEGER(uint32_t,1U,UINT32_MAX);
+}
+
+static int
+test_encode_decode_uint64_t (xd3_stream *stream, int unused)
+{
+  TEST_ENCODE_DECODE_INTEGER(uint64_t,1ULL,UINT64_MAX);
+}
+
+static int
+test_usize_t_overflow (xd3_stream *stream, int unused)
+{
+  if (USIZE_T_OVERFLOW (USIZE_T_MAX, 0)) { goto fail; }
+  if (USIZE_T_OVERFLOW (0, USIZE_T_MAX)) { goto fail; }
+  if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2)) { goto fail; }
+  if (USIZE_T_OVERFLOW (USIZE_T_MAX / 2, USIZE_T_MAX / 2 + 1)) { goto fail; }
+
+  if (! USIZE_T_OVERFLOW (USIZE_T_MAX, 1)) { goto fail; }
+  if (! USIZE_T_OVERFLOW (1, USIZE_T_MAX)) { goto fail; }
+  if (! USIZE_T_OVERFLOW (USIZE_T_MAX / 2 + 1, USIZE_T_MAX / 2 + 1)) { goto fail; }
+
+  return 0;
+
+ fail:
+  stream->msg = "incorrect overflow computation";
+  return XD3_INTERNAL;
+}
+
+static int
+test_forward_match (xd3_stream *stream, int unused)
+{
+  usize_t i;
+  uint8_t buf1[256], buf2[256];
+
+  memset(buf1, 0, 256);
+  memset(buf2, 0, 256);
+
+  for (i = 0; i < 256; i++)
+    {
+      CHECK(xd3_forward_match(buf1, buf2, i) == i);
+    }
+
+  for (i = 0; i < 255; i++)
+    {
+      buf2[i] = 1;
+      CHECK(xd3_forward_match(buf1, buf2, 256) == i);
+      buf2[i] = 0;
+    }
+
+  return 0;
+}
+
+/***********************************************************************
+ Address cache
+ ***********************************************************************/
+
+static int
+test_address_cache (xd3_stream *stream, int unused)
+{
+  int ret;
+  usize_t i;
+  usize_t offset;
+  usize_t *addrs;
+  uint8_t *big_buf, *buf_max;
+  const uint8_t *buf;
+  xd3_output *outp;
+  uint8_t *modes;
+  int mode_counts[16];
+
+  stream->acache.s_near = stream->code_table_desc->near_modes;
+  stream->acache.s_same = stream->code_table_desc->same_modes;
+
+  if ((ret = xd3_encode_init_partial (stream))) { return ret; }
+
+  addrs = (usize_t*) xd3_alloc (stream, sizeof (usize_t), ADDR_CACHE_ROUNDS);
+  modes = (uint8_t*) xd3_alloc (stream, sizeof (uint8_t), ADDR_CACHE_ROUNDS);
+
+  memset (mode_counts, 0, sizeof (mode_counts));
+  memset (modes, 0, ADDR_CACHE_ROUNDS);
+
+  addrs[0] = 0;
+
+  mt_init (& static_mtrand, 0x9f73f7fc);
+
+  /* First pass: encode addresses */
+  xd3_init_cache (& stream->acache);
+
+  for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1)
+    {
+      double p;
+      usize_t addr;
+      usize_t prev_i;
+      usize_t nearby;
+
+      p         = (mt_random (&static_mtrand) / (double)UINT32_MAX);
+      prev_i    = mt_random (&static_mtrand) % offset;
+      nearby    = (mt_random (&static_mtrand) % 256) % offset;
+      nearby    = xd3_max (1U, nearby);
+
+      if (p < 0.1)      { addr = addrs[offset-nearby]; }
+      else if (p < 0.4) { addr = xd3_min (addrs[prev_i] + nearby, offset-1); }
+      else              { addr = prev_i; }
+
+      if ((ret = xd3_encode_address (stream, addr, offset, & modes[offset]))) { return ret; }
+
+      addrs[offset] = addr;
+      mode_counts[modes[offset]] += 1;
+    }
+
+  /* Copy addresses into a contiguous buffer. */
+  big_buf = (uint8_t*) xd3_alloc (stream, xd3_sizeof_output (ADDR_HEAD (stream)), 1);
+
+  for (offset = 0, outp = ADDR_HEAD (stream); outp != NULL; offset += outp->next, outp = outp->next_page)
+    {
+      memcpy (big_buf + offset, outp->base, outp->next);
+    }
+
+  buf_max = big_buf + offset;
+  buf     = big_buf;
+
+  /* Second pass: decode addresses */
+  xd3_init_cache (& stream->acache);
+
+  for (offset = 1; offset < ADDR_CACHE_ROUNDS; offset += 1)
+    {
+      usize_t addr;
+
+      if ((ret = xd3_decode_address (stream, offset, modes[offset], 
+				     & buf, buf_max, & addr))) 
+	{ 
+	  return ret; 
+	}
+
+      if (addr != addrs[offset])
+	{
+	  stream->msg = "incorrect decoded address";
+	  return XD3_INTERNAL;
+	}
+    }
+
+  /* Check that every byte, mode was used. */
+  if (buf != buf_max)
+    {
+      stream->msg = "address bytes not used";
+      return XD3_INTERNAL;
+    }
+
+  for (i = 0; i < (2 + stream->acache.s_same + stream->acache.s_near); i += 1)
+    {
+      if (mode_counts[i] == 0)
+	{
+	  stream->msg = "address mode not used";
+	  return XD3_INTERNAL;
+	}
+    }
+
+  xd3_free (stream, modes);
+  xd3_free (stream, addrs);
+  xd3_free (stream, big_buf);
+
+  return 0;
+}
+
+/***********************************************************************
+ Encode and decode with single bit error
+ ***********************************************************************/
+
+/* It compresses from 256 to around 185 bytes.
+ * Avoids matching addresses that are a single-bit difference.
+ * Avoids matching address 0. */
+static const uint8_t test_text[] =
+"this is a story\n"
+"abouttttttttttt\n"
+"- his is a stor\n"
+"- about nothing "
+" all. boutique -"
+"his story is a -"
+"about           "
+"what happens all"
+" the time what -"
+"am I ttttttt the"
+" person said, so"
+" what, per son -"
+" gory story is -"
+" about nothing -"
+"tttttt to test -"
+"his sto nothing";
+
+static const uint8_t test_apphead[] = "header test";
+
+static int
+test_compress_text (xd3_stream  *stream,
+		    uint8_t     *encoded,
+		    usize_t     *encoded_size)
+{
+  int ret;
+  xd3_config cfg;
+  int oflags = stream->flags;
+  int flags = stream->flags | XD3_FLUSH;
+
+  xd3_free_stream (stream);
+  xd3_init_config (& cfg, flags);
+
+  /* This configuration is fixed so that the "expected non-error" the counts in
+   * decompress_single_bit_errors are too.  See test_coftcfg_str. */
+  cfg.smatch_cfg = XD3_SMATCH_SOFT;
+  cfg.smatcher_soft.name = "test";
+  cfg.smatcher_soft.large_look = 64; /* no source, not used */
+  cfg.smatcher_soft.large_step = 64; /* no source, not used */
+  cfg.smatcher_soft.small_look = 4;
+  cfg.smatcher_soft.small_chain = 128;
+  cfg.smatcher_soft.small_lchain = 16;
+  cfg.smatcher_soft.max_lazy = 8;
+  cfg.smatcher_soft.long_enough = 128;
+
+  xd3_config_stream (stream, & cfg);
+
+  (*encoded_size) = 0;
+
+  xd3_set_appheader (stream, test_apphead,
+		     (usize_t) strlen ((char*) test_apphead));
+
+  if ((ret = xd3_encode_stream (stream, test_text, sizeof (test_text),
+				encoded, encoded_size, 4*sizeof (test_text)))) { goto fail; }
+
+  if ((ret = xd3_close_stream (stream))) { goto fail; }
+
+ fail:
+  xd3_free_stream (stream);
+  xd3_init_config (& cfg, oflags);
+  xd3_config_stream (stream, & cfg);
+  return ret;
+}
+
+static int
+test_decompress_text (xd3_stream *stream, uint8_t *enc, usize_t enc_size, usize_t test_desize)
+{
+  xd3_config cfg;
+  char decoded[sizeof (test_text)];
+  uint8_t *apphead;
+  usize_t apphead_size;
+  usize_t decoded_size;
+  const char *msg;
+  int  ret;
+  usize_t pos = 0;
+  int flags = stream->flags;
+  usize_t take;
+
+ input:
+  /* Test decoding test_desize input bytes at a time */
+  take = xd3_min (enc_size - pos, test_desize);
+  CHECK(take > 0);
+
+  xd3_avail_input (stream, enc + pos, take);
+ again:
+  ret = xd3_decode_input (stream);
+
+  pos += take;
+  take = 0;
+
+  switch (ret)
+    {
+    case XD3_OUTPUT:
+      break;
+    case XD3_WINSTART:
+    case XD3_GOTHEADER:
+      goto again;
+    case XD3_INPUT:
+      if (pos < enc_size) { goto input; }
+      /* else fallthrough */
+    case XD3_WINFINISH:
+    default:
+      goto fail;
+    }
+
+  CHECK(ret == XD3_OUTPUT);
+  CHECK(pos == enc_size);
+
+  if (stream->avail_out != sizeof (test_text))
+    {
+      stream->msg = "incorrect output size";
+      ret = XD3_INTERNAL;
+      goto fail;
+    }
+
+  decoded_size = stream->avail_out;
+  memcpy (decoded, stream->next_out, stream->avail_out);
+
+  xd3_consume_output (stream);
+
+  if ((ret = xd3_get_appheader (stream, & apphead, & apphead_size))) { goto fail; }
+
+  if (apphead_size != strlen ((char*) test_apphead) ||
+      memcmp (apphead, test_apphead, strlen ((char*) test_apphead)) != 0)
+    {
+      stream->msg = "incorrect appheader";
+      ret = XD3_INTERNAL;
+      goto fail;
+    }
+
+  if ((ret = xd3_decode_input (stream)) != XD3_WINFINISH ||
+      (ret = xd3_close_stream (stream)) != 0)
+    {
+      goto fail;
+    }
+
+  if (decoded_size != sizeof (test_text) ||
+      memcmp (decoded, test_text, sizeof (test_text)) != 0)
+    {
+      stream->msg = "incorrect output text";
+      ret = EIO;
+    }
+
+ fail:
+  msg = stream->msg;
+  xd3_free_stream (stream);
+  xd3_init_config (& cfg, flags);
+  xd3_config_stream (stream, & cfg);
+  stream->msg = msg;
+
+  return ret;
+}
+
+static int
+test_decompress_single_bit_error (xd3_stream *stream, int expected_non_failures)
+{
+  int ret;
+  usize_t i;
+  uint8_t encoded[4*sizeof (test_text)]; /* make room for alt code table */
+  usize_t  encoded_size;
+  int non_failures = 0;
+  int cksum = (stream->flags & XD3_ADLER32) != 0;
+
+//#define DEBUG_TEST_FAILURES
+#ifndef DEBUG_TEST_FAILURES
+#define TEST_FAILURES()
+#else
+  /* For checking non-failure cases by hand, enable this macro and run
+   * xdelta printdelta with print_cpymode disabled.  Every non-failure
+   * should change a copy address mode, which doesn't cause a failure
+   * because the address cache starts out with all zeros.
+
+    ./xdelta3 test
+    for i in test_text.xz.*; do ./xdelta3 printdelta $i > $i.out;
+    diff $i.out test_text.xz.0.out; done
+
+   */
+  system ("rm -rf test_text.*");
+  {
+    char buf[TESTBUFSIZE];
+    FILE *f;
+    snprintf_func (buf, TESTBUFSIZE, "test_text");
+    f = fopen (buf, "w");
+    fwrite (test_text,1,sizeof (test_text),f);
+    fclose (f);
+  }
+#define TEST_FAILURES()                                         \
+  do {                                                          \
+    char buf[TESTBUFSIZE];      				\
+    FILE *f;                                                    \
+    snprintf_func (buf, TESTBUFSIZE, "test_text.xz.%d", non_failures);	\
+    f = fopen (buf, "w");                                       \
+    fwrite (encoded,1,encoded_size,f);                          \
+    fclose (f);                                                 \
+  } while (0)
+#endif
+
+  stream->sec_data.inefficient = 1;
+  stream->sec_inst.inefficient = 1;
+  stream->sec_addr.inefficient = 1;
+
+  /* Encode text, test correct input */
+  if ((ret = test_compress_text (stream, encoded, & encoded_size)))
+    {
+      /*stream->msg = "without error: encode failure";*/
+      return ret;
+    }
+
+  if ((ret = test_decompress_text (stream, encoded, encoded_size,
+				   sizeof (test_text) / 4)))
+    {
+      /*stream->msg = "without error: decode failure";*/
+      return ret;
+    }
+
+  TEST_FAILURES();
+
+  for (i = 0; i < encoded_size*8; i += 1)
+    {
+      /* Single bit error. */
+      encoded[i/8] ^= 1 << (i%8);
+
+      if ((ret = test_decompress_text (stream, encoded,
+				       encoded_size, sizeof (test_text))) == 0)
+	{
+	  non_failures += 1;
+#ifdef DEBUG_TEST_FAILURES
+	  XPR(NT "%u[%u] non-failure %u\n", i/8, i%8, non_failures);
+#endif
+	  TEST_FAILURES();
+	}
+      else
+	{
+	  /*XPR(NT "%u[%u] failure: %s\n", i/8, i%8, stream->msg);*/
+	}
+
+      /* decompress_text returns EIO when the final memcmp() fails, but that
+       * should never happen with checksumming on. */
+      if (cksum && ret == EIO)
+	{
+	  /*XPR(NT "%u[%u] cksum mismatch\n", i/8, i%8);*/
+	  stream->msg = "checksum mismatch";
+	  return XD3_INTERNAL;
+	}
+
+      /* Undo single bit error. */
+      encoded[i/8] ^= 1 << (i%8);
+    }
+
+  /* Test correct input again */
+  if ((ret = test_decompress_text (stream, encoded, encoded_size, 1)))
+    {
+      /*stream->msg = "without error: decode failure";*/
+      return ret;
+    }
+
+  /* Check expected non-failures */
+  if (non_failures > expected_non_failures)
+    {
+      XPR(NT "non-failures %u > expected %u",
+	 non_failures, expected_non_failures);
+      stream->msg = "incorrect";
+      return XD3_INTERNAL;
+    }
+
+  DOT ();
+
+  return 0;
+}
+
+/***********************************************************************
+ Secondary compression tests
+ ***********************************************************************/
+
+#if SECONDARY_ANY
+typedef int (*sec_dist_func) (xd3_stream *stream, xd3_output *data);
+
+static int sec_dist_func1 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func2 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func3 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func4 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func5 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func6 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func7 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func8 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func9 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func10 (xd3_stream *stream, xd3_output *data);
+static int sec_dist_func11 (xd3_stream *stream, xd3_output *data);
+
+static sec_dist_func sec_dists[] =
+{
+  sec_dist_func1,
+  sec_dist_func2,
+  sec_dist_func3,
+  sec_dist_func4,
+  sec_dist_func5,
+  sec_dist_func6,
+  sec_dist_func7,
+  sec_dist_func8,
+  sec_dist_func9,
+  sec_dist_func10,
+  sec_dist_func11,
+};
+
+/* Test ditsribution: 100 bytes of the same character (13). */
+static int
+sec_dist_func1 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+  for (i = 0; i < 100; i += 1)
+    {
+      if ((ret = xd3_emit_byte (stream, & data, 13))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test ditsribution: uniform covering half the alphabet. */
+static int
+sec_dist_func2 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+  for (i = 0; i < ALPHABET_SIZE; i += 1)
+    {
+      if ((ret = xd3_emit_byte (stream, & data, i%(ALPHABET_SIZE/2)))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test ditsribution: uniform covering the entire alphabet. */
+static int
+sec_dist_func3 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+  for (i = 0; i < ALPHABET_SIZE; i += 1)
+    {
+      if ((ret = xd3_emit_byte (stream, & data, i%ALPHABET_SIZE))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: An exponential distribution covering half the alphabet */
+static int
+sec_dist_func4 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret, x;
+  for (i = 0; i < ALPHABET_SIZE*20; i += 1)
+    {
+      x = mt_exp_rand (10, ALPHABET_SIZE/2);
+      if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: An exponential distribution covering the entire alphabet */
+static int
+sec_dist_func5 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret, x;
+  for (i = 0; i < ALPHABET_SIZE*20; i += 1)
+    {
+      x = mt_exp_rand (10, ALPHABET_SIZE-1);
+      if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: An uniform random distribution covering half the alphabet */
+static int
+sec_dist_func6 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret, x;
+  for (i = 0; i < ALPHABET_SIZE*20; i += 1)
+    {
+      x = mt_random (&static_mtrand) % (ALPHABET_SIZE/2);
+      if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: An uniform random distribution covering the entire alphabet */
+static int
+sec_dist_func7 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret, x;
+  for (i = 0; i < ALPHABET_SIZE*200; i += 1)
+    {
+      x = mt_random (&static_mtrand) % ALPHABET_SIZE;
+      if ((ret = xd3_emit_byte (stream, & data, x))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: A small number of frequent characters, difficult
+ * to divide into many groups */
+static int
+sec_dist_func8 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+  for (i = 0; i < ALPHABET_SIZE*5; i += 1)
+    {
+      if ((ret = xd3_emit_byte (stream, & data, 0))) { return ret; }
+      if ((ret = xd3_emit_byte (stream, & data, 64))) { return ret; }
+      if ((ret = xd3_emit_byte (stream, & data, 128))) { return ret; }
+      if ((ret = xd3_emit_byte (stream, & data, 255))) { return ret; }
+    }
+  return 0;
+}
+
+/* Test distribution: One that causes many FGK block promotions (found a bug) */
+static int
+sec_dist_func9 (xd3_stream *stream, xd3_output *data)
+{
+  int i, ret;
+
+  int ramp   = 0;
+  int rcount = 0;
+  int prom   = 0;
+  int pcount = 0;
+
+  /* 200 was long enough to trigger it--only when stricter checking
+   * that counted all blocks was turned on, but it seems I deleted
+   * this code. (missing fgk_free_block on line 398). */
+  for (i = 0; i < ALPHABET_SIZE*200; i += 1)
+    {
+    repeat:
+      if (ramp < ALPHABET_SIZE)
+	{
+	  /* Initially Nth symbol has (N+1) frequency */
+	  if (rcount <= ramp)
+	    {
+	      rcount += 1;
+	      if ((ret = xd3_emit_byte (stream, & data, ramp))) { return ret; }
+	      continue;
+	    }
+
+	  ramp   += 1;
+	  rcount  = 0;
+	  goto repeat;
+	}
+
+      /* Thereafter, promote least freq to max freq */
+      if (pcount == ALPHABET_SIZE)
+	{
+	  pcount = 0;
+	  prom   = (prom + 1) % ALPHABET_SIZE;
+	}
+
+      pcount += 1;
+      if ((ret = xd3_emit_byte (stream, & data, prom))) { return ret; }
+    }
+
+  return 0;
+}
+
+/* Test distribution: freq[i] == i*i, creates a 21-bit code length, fixed in 3.0r. */
+static int
+sec_dist_func10 (xd3_stream *stream, xd3_output *data)
+{
+  int i, j, ret;
+  for (i = 0; i < ALPHABET_SIZE; i += 1)
+    {
+      for (j = 0; j <= (i*i); j += 1)
+	{
+	  if ((ret = xd3_emit_byte (stream, & data, i))) { return ret; }
+	}
+    }
+  return 0;
+}
+
+/* Test distribution: fibonacci */
+static int
+sec_dist_func11 (xd3_stream *stream, xd3_output *data)
+{
+  int sum0 = 0;
+  int sum1 = 1;
+  int i, j, ret;
+  for (i = 0; i < 33; ++i)
+    {
+      for (j = 0; j < (sum0 + sum1); ++j)
+	{
+	  if ((ret = xd3_emit_byte (stream, & data, i))) { return ret; }
+	}
+      sum0 = sum1;
+      sum1 = j;
+    }
+  return 0;
+}
+
+static int
+test_secondary_decode (xd3_stream         *stream,
+		       const xd3_sec_type *sec,
+		       usize_t              input_size,
+		       usize_t              compress_size,
+		       const uint8_t      *dec_input,
+		       const uint8_t      *dec_correct,
+		       uint8_t            *dec_output)
+{
+  int ret;
+  xd3_sec_stream *dec_stream;
+  const uint8_t *dec_input_used, *dec_input_end;
+  uint8_t *dec_output_used, *dec_output_end;
+
+  if ((dec_stream = sec->alloc (stream)) == NULL) { return ENOMEM; }
+
+  if ((ret = sec->init (stream, dec_stream, 0)) != 0) { goto fail; }
+
+  dec_input_used = dec_input;
+  dec_input_end  = dec_input + compress_size;
+
+  dec_output_used = dec_output;
+  dec_output_end  = dec_output + input_size;
+
+  if ((ret = sec->decode (stream, dec_stream,
+			  & dec_input_used, dec_input_end,
+			  & dec_output_used, dec_output_end)))
+    {
+      goto fail;
+    }
+
+  if (dec_input_used != dec_input_end)
+    {
+      stream->msg = "unused input";
+      ret = XD3_INTERNAL;
+      goto fail;
+    }
+
+  if (dec_output_used != dec_output_end)
+    {
+      stream->msg = "unfinished output";
+      ret = XD3_INTERNAL;
+      goto fail;
+    }
+
+  if (memcmp (dec_output, dec_correct, input_size) != 0)
+    {
+      stream->msg = "incorrect output";
+      ret = XD3_INTERNAL;
+      goto fail;
+    }
+
+ fail:
+  sec->destroy (stream, dec_stream);
+  return ret;
+}
+
+static int
+test_secondary (xd3_stream *stream, const xd3_sec_type *sec, usize_t groups)
+{
+  usize_t test_i;
+  int ret;
+  xd3_output *in_head, *out_head, *p;
+  usize_t p_off, input_size, compress_size;
+  uint8_t *dec_input = NULL, *dec_output = NULL, *dec_correct = NULL;
+  xd3_sec_stream *enc_stream;
+  xd3_sec_cfg cfg;
+
+  memset (& cfg, 0, sizeof (cfg));
+
+  cfg.inefficient = 1;
+
+  for (cfg.ngroups = 1; cfg.ngroups <= groups; cfg.ngroups += 1)
+    {
+      XPR(NTR "\n...");
+      for (test_i = 0; test_i < SIZEOF_ARRAY (sec_dists); test_i += 1)
+	{
+	  mt_init (& static_mtrand, 0x9f73f7fc);
+
+	  in_head  = xd3_alloc_output (stream, NULL);
+	  out_head = xd3_alloc_output (stream, NULL);
+	  enc_stream = sec->alloc (stream);
+	  dec_input = NULL;
+	  dec_output = NULL;
+	  dec_correct = NULL;
+
+	  if (in_head == NULL || out_head == NULL || enc_stream == NULL)
+	    {
+	      goto nomem;
+	    }
+
+	  if ((ret = sec_dists[test_i] (stream, in_head))) { goto fail; }
+
+	  if ((ret = sec->init (stream, enc_stream, 1)) != 0) { goto fail; }
+
+	  /* Encode data */
+	  if ((ret = sec->encode (stream, enc_stream,
+				  in_head, out_head, & cfg)))
+	    {
+	      XPR(NT "test %"W"u: encode: %s", test_i, stream->msg);
+	      goto fail;
+	    }
+
+	  /* Calculate sizes, allocate contiguous arrays for decoding */
+	  input_size    = xd3_sizeof_output (in_head);
+	  compress_size = xd3_sizeof_output (out_head);
+
+	  XPR(NTR "%.3f", 8.0 * (double) compress_size / (double) input_size);
+
+	  if ((dec_input   = (uint8_t*) xd3_alloc (stream, compress_size, 1)) == NULL ||
+	      (dec_output  = (uint8_t*) xd3_alloc (stream, input_size, 1)) == NULL ||
+	      (dec_correct = (uint8_t*) xd3_alloc (stream, input_size, 1)) == NULL)
+	    {
+	      goto nomem;
+	    }
+
+	  /* Fill the compressed data array */
+	  for (p_off = 0, p = out_head; p != NULL;
+	       p_off += p->next, p = p->next_page)
+	    {
+	      memcpy (dec_input + p_off, p->base, p->next);
+	    }
+
+	  CHECK(p_off == compress_size);
+
+	  /* Fill the input data array */
+	  for (p_off = 0, p = in_head; p != NULL;
+	       p_off += p->next, p = p->next_page)
+	    {
+	      memcpy (dec_correct + p_off, p->base, p->next);
+	    }
+
+	  CHECK(p_off == input_size);
+
+	  if ((ret = test_secondary_decode (stream, sec, input_size,
+					    compress_size, dec_input,
+					    dec_correct, dec_output)))
+	    {
+	      XPR(NT "test %"W"u: decode: %s", test_i, stream->msg);
+	      goto fail;
+	    }
+
+	  /* Single-bit error test, only cover the first 10 bytes.
+	   * Some non-failures are expected in the Huffman case:
+	   * Changing the clclen array, for example, may not harm the
+	   * decoding.  Really looking for faults here. */
+	  {
+	    int i;
+	    int bytes = xd3_min (compress_size, 10U);
+	    for (i = 0; i < bytes * 8; i += 1)
+	      {
+		dec_input[i/8] ^= 1 << (i%8);
+
+		if ((ret = test_secondary_decode (stream, sec, input_size,
+						  compress_size, dec_input,
+						  dec_correct, dec_output))
+		    == 0)
+		  {
+		    /*XPR(NT "test %u: decode single-bit [%u/%u]
+		      error non-failure", test_i, i/8, i%8);*/
+		  }
+
+		dec_input[i/8] ^= 1 << (i%8);
+
+		if ((i % (2*bytes)) == (2*bytes)-1)
+		  {
+		    DOT ();
+		  }
+	      }
+	    ret = 0;
+	  }
+
+	  if (0) { nomem: ret = ENOMEM; }
+
+	fail:
+	  sec->destroy (stream, enc_stream);
+	  xd3_free_output (stream, in_head);
+	  xd3_free_output (stream, out_head);
+	  xd3_free (stream, dec_input);
+	  xd3_free (stream, dec_output);
+	  xd3_free (stream, dec_correct);
+
+	  if (ret != 0) { return ret; }
+	}
+    }
+
+  return 0;
+}
+
+IF_FGK (static int test_secondary_fgk  (xd3_stream *stream, usize_t gp)
+	{ return test_secondary (stream, & fgk_sec_type, gp); })
+IF_DJW (static int test_secondary_huff (xd3_stream *stream, usize_t gp)
+	{ return test_secondary (stream, & djw_sec_type, gp); })
+IF_LZMA (static int test_secondary_lzma (xd3_stream *stream, usize_t gp)
+	{ return test_secondary (stream, & lzma_sec_type, gp); })
+
+#endif  /* SECONDARY_ANY */
+
+/***********************************************************************
+ TEST INSTRUCTION TABLE
+ ***********************************************************************/
+
+/* Test that xd3_choose_instruction() does the right thing for its code
+ * table. */
+static int
+test_choose_instruction (xd3_stream *stream, int ignore)
+{
+  int i;
+
+  stream->code_table = (*stream->code_table_func) ();
+
+  for (i = 0; i < 256; i += 1)
+    {
+      const xd3_dinst *d = stream->code_table + i;
+      xd3_rinst prev, inst;
+
+      CHECK(d->type1 > 0);
+
+      memset (& prev, 0, sizeof (prev));
+      memset (& inst, 0, sizeof (inst));
+
+      if (d->type2 == 0)
+	{
+	  inst.type = d->type1;
+
+	  if ((inst.size = d->size1) == 0)
+	    {
+	      inst.size = TESTBUFSIZE;
+	    }
+
+	  XD3_CHOOSE_INSTRUCTION (stream, NULL, & inst);
+
+	  if (inst.code2 != 0 || inst.code1 != i)
+	    {
+	      stream->msg = "wrong single instruction";
+	      return XD3_INTERNAL;
+	    }
+	}
+      else
+	{
+	  prev.type = d->type1;
+	  prev.size = d->size1;
+	  inst.type = d->type2;
+	  inst.size = d->size2;
+
+	  XD3_CHOOSE_INSTRUCTION (stream, & prev, & inst);
+
+	  if (prev.code2 != i)
+	    {
+	      stream->msg = "wrong double instruction";
+	      return XD3_INTERNAL;
+	    }
+	}
+    }
+
+  return 0;
+}
+
+static int
+test_checksum_step (xd3_stream *stream, int ignore)
+{
+  const int bufsize = 128;
+  uint8_t buf[128];
+  for (int i = 0; i < bufsize; i++)
+    {
+      buf[i] = mt_random (&static_mtrand) & 0xff;
+    }
+
+  for (usize_t cksize = 4; cksize <= 32; cksize += 3)
+    {
+      xd3_hash_cfg h1;
+      usize_t x;
+      int ret;
+
+      if ((ret = xd3_size_hashtable (stream, XD3_ALLOCSIZE, cksize, &h1)) != 0)
+	{
+	  return ret;
+	}
+
+      x = xd3_large_cksum (&h1, buf, cksize);
+      for (usize_t pos = 0; pos <= (bufsize - cksize); pos++)
+	{
+	  usize_t y = xd3_large_cksum (&h1, buf + pos, cksize);
+	  if (x != y)
+	    {
+	      stream->msg = "checksum != incremental checksum";
+	      return XD3_INTERNAL;
+	    }
+	  x = xd3_large_cksum_update (&h1, x, buf + pos, cksize);
+	}
+
+      xd3_free (stream, h1.powers);
+    }
+
+  return 0;
+}
+
+/***********************************************************************
+ 64BIT STREAMING
+ ***********************************************************************/
+
+/* This test encodes and decodes a series of 1 megabyte windows, each
+ * containing a long run of zeros along with a single xoff_t size
+ * record to indicate the sequence. */
+static int
+test_streaming (xd3_stream *in_stream, uint8_t *encbuf, uint8_t *decbuf, uint8_t *delbuf, usize_t megs)
+{
+  xd3_stream estream, dstream;
+  int ret;
+  usize_t i, delsize, decsize;
+  xd3_config cfg;
+  xd3_init_config (& cfg, in_stream->flags);
+  cfg.flags |= XD3_COMPLEVEL_6;
+
+  if ((ret = xd3_config_stream (& estream, & cfg)) ||
+      (ret = xd3_config_stream (& dstream, & cfg)))
+    {
+      goto fail;
+    }
+
+  for (i = 0; i < megs; i += 1)
+    {
+      ((usize_t*) encbuf)[0] = i;
+
+      if ((i % 200) == 199) { DOT (); }
+
+      if ((ret = xd3_process_stream (1, & estream, xd3_encode_input, 0,
+				     encbuf, 1 << 20,
+				     delbuf, & delsize, 1 << 20)))
+	{
+	  in_stream->msg = estream.msg;
+	  goto fail;
+	}
+
+      if ((ret = xd3_process_stream (0, & dstream, xd3_decode_input, 0,
+				     delbuf, delsize,
+				     decbuf, & decsize, 1 << 20)))
+	{
+	  in_stream->msg = dstream.msg;
+	  goto fail;
+	}
+
+      if (decsize != 1 << 20 ||
+	  memcmp (encbuf, decbuf, 1 << 20) != 0)
+	{
+	  in_stream->msg = "wrong result";
+	  ret = XD3_INTERNAL;
+	  goto fail;
+	}
+    }
+
+  if ((ret = xd3_close_stream (& estream)) ||
+      (ret = xd3_close_stream (& dstream)))
+    {
+      goto fail;
+    }
+
+ fail:
+  xd3_free_stream (& estream);
+  xd3_free_stream (& dstream);
+  return ret;
+}
+
+/* Run tests of data streaming of over and around 4GB of data. */
+static int
+test_compressed_stream_overflow (xd3_stream *stream, int ignore)
+{
+  int ret;
+  int i;
+  uint8_t *buf;
+
+  if ((buf = (uint8_t*) malloc (TWO_MEGS_AND_DELTA)) == NULL) { return ENOMEM; }
+
+  memset (buf, 0, TWO_MEGS_AND_DELTA);
+  for (i = 0; i < (2 << 20); i += 256)
+    {
+      int j;
+      int off = mt_random(& static_mtrand) % 10;
+      for (j = 0; j < 256; j++)
+	{
+	  buf[i + j] = j + off;
+	}
+    }
+
+  /* Test overflow of a 32-bit file offset. */
+  if (SIZEOF_XOFF_T == 4)
+    {
+      ret = test_streaming (stream, buf, buf + (1 << 20), buf + (2 << 20), (1 << 12) + 1);
+
+      if (ret == XD3_INVALID_INPUT && MSG_IS ("decoder file offset overflow"))
+	{
+	  ret = 0;
+	}
+      else
+	{
+          XPR(NT XD3_LIB_ERRMSG (stream, ret));
+	  stream->msg = "expected overflow condition";
+	  ret = XD3_INTERNAL;
+	  goto fail;
+	}
+    }
+
+  /* Test transfer of exactly 32bits worth of data. */
+  if ((ret = test_streaming (stream,
+			     buf,
+			     buf + (1 << 20),
+			     buf + (2 << 20),
+			     1 << 12)))
+    {
+      goto fail;
+    }
+ fail:
+  free (buf);
+  return ret;
+}
+
+/***********************************************************************
+ COMMAND LINE
+ ***********************************************************************/
+
+#if SHELL_TESTS
+
+/* For each pair of command templates in the array below, test that
+ * encoding and decoding commands work.  Also check for the expected
+ * size delta, which should be approximately TEST_ADD_RATIO times the
+ * file size created by test_make_inputs.  Due to differences in the
+ * application header, it is suppressed (-A) so that all delta files
+ * are the same. */
+static int
+test_command_line_arguments (xd3_stream *stream, int ignore)
+{
+  int i, ret;
+
+  static const char* cmdpairs[] =
+  {
+    /* standard input, output */
+    "%s %s -A < %s > %s", "%s -d < %s > %s",
+    "%s %s -A -e < %s > %s", "%s -d < %s > %s",
+    "%s %s -A= encode < %s > %s", "%s decode < %s > %s",
+    "%s %s -A -q encode < %s > %s", "%s -qdq < %s > %s",
+
+    /* file input, standard output */
+    "%s %s -A= %s > %s", "%s -d %s > %s",
+    "%s %s -A -e %s > %s", "%s -d %s > %s",
+    "%s %s encode -A= %s > %s", "%s decode %s > %s",
+
+    /* file input, output */
+    "%s %s -A= %s %s", "%s -d %s %s",
+    "%s %s -A -e %s %s", "%s -d %s %s",
+    "%s %s -A= encode %s %s", "%s decode %s %s",
+
+    /* option placement */
+    "%s %s -A -f %s %s", "%s -f -d %s %s",
+    "%s %s -e -A= %s %s", "%s -d -f %s %s",
+    "%s %s -f encode -A= %s %s", "%s -f decode -f %s %s",
+  };
+
+  char ecmd[TESTBUFSIZE], dcmd[TESTBUFSIZE];
+  int pairs = SIZEOF_ARRAY (cmdpairs) / 2;
+  xoff_t tsize;
+  xoff_t dsize;
+  double ratio;
+
+  mt_init (& static_mtrand, 0x9f73f7fc);
+
+  for (i = 0; i < pairs; i += 1)
+    {
+      test_setup ();
+      if ((ret = test_make_inputs (stream, NULL, & tsize))) { return ret; }
+
+      snprintf_func (ecmd, TESTBUFSIZE, cmdpairs[2*i], program_name,
+	       test_softcfg_str, TEST_TARGET_FILE, TEST_DELTA_FILE);
+      snprintf_func (dcmd, TESTBUFSIZE, cmdpairs[2*i+1], program_name,
+	       TEST_DELTA_FILE, TEST_RECON_FILE);
+
+      /* Encode and decode. */
+      if ((ret = system (ecmd)) != 0)
+	{
+	  XPR(NT "encode command: %s\n", ecmd);
+	  stream->msg = "encode cmd failed";
+	  return XD3_INTERNAL;
+	}
+
+      if ((ret = system (dcmd)) != 0)
+	{
+	  XPR(NT "decode command: %s\n", dcmd);
+	  stream->msg = "decode cmd failed";
+	  return XD3_INTERNAL;
+	}
+
+      /* Compare the target file. */
+      if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_RECON_FILE)))
+	{
+	  return ret;
+	}
+
+      if ((ret = test_file_size (TEST_DELTA_FILE, & dsize)))
+	{
+	  return ret;
+	}
+
+      ratio = (double) dsize / (double) tsize;
+
+      /* Check that it is not too small, not too large. */
+      if (ratio >= TEST_ADD_RATIO + TEST_EPSILON)
+	{
+	  XPR(NT "test encode with size ratio %.4f, "
+	     "expected < %.4f (%"Q"u, %"Q"u)\n",
+	    ratio, TEST_ADD_RATIO + TEST_EPSILON, dsize, tsize);
+	  stream->msg = "strange encoding";
+	  return XD3_INTERNAL;
+	}
+
+      if (ratio <= TEST_ADD_RATIO * (1.0 - 2 * TEST_EPSILON))
+	{
+	  XPR(NT "test encode with size ratio %.4f, "
+	     "expected > %.4f\n",
+	    ratio, TEST_ADD_RATIO - TEST_EPSILON);
+	  stream->msg = "strange encoding";
+	  return XD3_INTERNAL;
+	}
+
+      /* Also check that test_compare_files works.  The delta and original should
+       * not be identical. */
+      if ((ret = test_compare_files (TEST_DELTA_FILE,
+				TEST_TARGET_FILE)) == 0)
+	{
+	  stream->msg = "broken test_compare_files";
+	  return XD3_INTERNAL;
+	}
+
+      test_cleanup ();
+      DOT ();
+    }
+
+  return 0;
+}
+
+static int
+check_vcdiff_header (xd3_stream *stream,
+		     const char *input,
+		     const char *line_start,
+		     const char *matches,
+		     int yes_or_no)
+{
+  int ret;
+  char vcmd[TESTBUFSIZE], gcmd[TESTBUFSIZE];
+
+  snprintf_func (vcmd, TESTBUFSIZE, "%s printhdr -f %s %s",
+	    program_name, input, TEST_RECON2_FILE);
+
+  if ((ret = system (vcmd)) != 0)
+    {
+      XPR(NT "printhdr command: %s\n", vcmd);
+      stream->msg = "printhdr cmd failed";
+      return XD3_INTERNAL;
+    }
+
+  snprintf_func (gcmd, TESTBUFSIZE, "grep \"%s.*%s.*\" %s > /dev/null",
+	    line_start, matches, TEST_RECON2_FILE);
+
+  if (yes_or_no)
+    {
+      if ((ret = do_cmd (stream, gcmd)))
+	{
+	  XPR(NT "%s\n", gcmd);
+	  return ret;
+	}
+    }
+  else
+    {
+      if ((ret = do_fail (stream, gcmd)))
+	{
+	  XPR(NT "%s\n", gcmd);
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+static int
+test_recode_command2 (xd3_stream *stream, int has_source,
+		      int variant, int change)
+{
+  int has_adler32 = (variant & 0x1) != 0;
+  int has_apphead = (variant & 0x2) != 0;
+  int has_secondary = (variant & 0x4) != 0;
+
+  int change_adler32 = (change & 0x1) != 0;
+  int change_apphead = (change & 0x2) != 0;
+  int change_secondary = (change & 0x4) != 0;
+
+  int recoded_adler32 = change_adler32 ? !has_adler32 : has_adler32;
+  int recoded_apphead = change_apphead ? !has_apphead : has_apphead;
+  int recoded_secondary = change_secondary ? !has_secondary : has_secondary;
+
+  char ecmd[TESTBUFSIZE], recmd[TESTBUFSIZE], dcmd[TESTBUFSIZE];
+  xoff_t tsize, ssize;
+  int ret;
+
+  test_setup ();
+
+  if ((ret = test_make_inputs (stream, has_source ? & ssize : NULL, & tsize)))
+    {
+      return ret;
+    }
+
+  /* First encode */
+  snprintf_func (ecmd, TESTBUFSIZE, "%s %s -f %s %s %s %s %s %s %s",
+	    program_name, test_softcfg_str,
+	    has_adler32 ? "" : "-n ",
+	    has_apphead ? "-A=encode_apphead " : "-A= ",
+	    has_secondary ? "-S djw " : "-S none ",
+	    has_source ? "-s " : "",
+	    has_source ? TEST_SOURCE_FILE : "",
+	    TEST_TARGET_FILE,
+	    TEST_DELTA_FILE);
+
+  if ((ret = system (ecmd)) != 0)
+    {
+      XPR(NT "encode command: %s\n", ecmd);
+      stream->msg = "encode cmd failed";
+      return XD3_INTERNAL;
+    }
+
+  /* Now recode */
+  snprintf_func (recmd, TESTBUFSIZE,
+	    "%s recode %s -f %s %s %s %s %s", program_name, test_softcfg_str,
+	    recoded_adler32 ? "" : "-n ",
+	    !change_apphead ? "" :
+	        (recoded_apphead ? "-A=recode_apphead " : "-A= "),
+	    recoded_secondary ? "-S djw " : "-S= ",
+	    TEST_DELTA_FILE,
+	    TEST_COPY_FILE);
+
+  if ((ret = system (recmd)) != 0)
+    {
+      XPR(NT "recode command: %s\n", recmd);
+      stream->msg = "recode cmd failed";
+      return XD3_INTERNAL;
+    }
+
+  /* Check recode changes. */
+
+  if ((ret = check_vcdiff_header (stream,
+				  TEST_COPY_FILE,
+				  "VCDIFF window indicator",
+				  "VCD_SOURCE",
+				  has_source))) { return ret; }
+
+  if ((ret = check_vcdiff_header (stream,
+				  TEST_COPY_FILE,
+				  "VCDIFF header indicator",
+				  "VCD_SECONDARY",
+				  recoded_secondary))) { return ret; }
+
+  if ((ret = check_vcdiff_header (stream,
+				  TEST_COPY_FILE,
+				  "VCDIFF window indicator",
+				  "VCD_ADLER32",
+				  /* Recode can't generate an adler32
+				   * checksum, it can only preserve it or
+				   * remove it. */
+				  has_adler32 && recoded_adler32)))
+    {
+      return ret;
+    }
+
+  if (!change_apphead)
+    {
+      if ((ret = check_vcdiff_header (stream,
+				      TEST_COPY_FILE,
+				      "VCDIFF header indicator",
+				      "VCD_APPHEADER",
+				      has_apphead)))
+	{
+	  return ret;
+	}
+      if ((ret = check_vcdiff_header (stream,
+				      TEST_COPY_FILE,
+				      "VCDIFF application header",
+				      "encode_apphead",
+				      has_apphead)))
+	{
+	  return ret;
+	}
+    }
+  else
+    {
+      if ((ret = check_vcdiff_header (stream,
+				      TEST_COPY_FILE,
+				      "VCDIFF header indicator",
+				      "VCD_APPHEADER",
+				      recoded_apphead)))
+	{
+	  return ret;
+	}
+      if (recoded_apphead &&
+	  (ret = check_vcdiff_header (stream,
+				      TEST_COPY_FILE,
+				      "VCDIFF application header",
+				      "recode_apphead",
+				      1)))
+	{
+	  return ret;
+	}
+    }
+
+  /* Now decode */
+  snprintf_func (dcmd, TESTBUFSIZE, "%s -fd %s %s %s %s ", program_name,
+	    has_source ? "-s " : "",
+	    has_source ? TEST_SOURCE_FILE : "",
+	    TEST_COPY_FILE,
+	    TEST_RECON_FILE);
+
+  if ((ret = system (dcmd)) != 0)
+    {
+      XPR(NT "decode command: %s\n", dcmd);
+      stream->msg = "decode cmd failed";
+      return XD3_INTERNAL;
+    }
+
+  /* Now compare. */
+  if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_RECON_FILE)))
+    {
+      return ret;
+    }
+  test_cleanup ();
+
+  return 0;
+}
+
+static int
+test_recode_command (xd3_stream *stream, int ignore)
+{
+  /* Things to test:
+   * - with and without a source file (recode does not change)
+   *
+   * (recode may or may not change -- 8 variations)
+   * - with and without adler32
+   * - with and without app header
+   * - with and without secondary
+   */
+  int has_source;
+  int variant;
+  int change;
+  int ret;
+
+  for (has_source = 0; has_source < 2; has_source++)
+    {
+      for (variant = 0; variant < 8; variant++)
+	{
+	  for (change = 0; change < 8; change++)
+	    {
+	      if ((ret = test_recode_command2 (stream, has_source,
+					       variant, change)))
+		{
+		  return ret;
+		}
+	    }
+	  DOT ();
+	}
+    }
+
+  return 0;
+}
+
+#if SECONDARY_LZMA
+static int test_secondary_lzma_default (xd3_stream *stream, int ignore)
+{
+  char ecmd[TESTBUFSIZE];
+  int ret;
+
+  test_setup ();
+
+  if ((ret = test_make_inputs (stream, NULL, NULL)))
+    {
+      return ret;
+    }
+
+  /* First encode */
+  snprintf_func (ecmd, TESTBUFSIZE, "%s -e %s %s",
+		 program_name,
+		 TEST_TARGET_FILE,
+		 TEST_DELTA_FILE);
+
+  if ((ret = system (ecmd)) != 0)
+    {
+      return XD3_INTERNAL;
+    }
+
+  if ((ret = check_vcdiff_header (stream,
+				  TEST_DELTA_FILE,
+				  "VCDIFF secondary compressor",
+				  "lzma",
+				  1)))
+    {
+      return ret;
+    }
+
+  test_cleanup ();
+  return 0;
+}
+
+#endif  /* SECONDARY_LZMA */
+#endif  /* SHELL_TESTS */
+
+/***********************************************************************
+ EXTERNAL I/O DECOMPRESSION/RECOMPRESSION
+ ***********************************************************************/
+
+#if EXTERNAL_COMPRESSION
+/* This performs one step of the test_externally_compressed_io
+ * function described below.  It builds a pipe containing both Xdelta
+ * and external compression/decompression that should not modify the
+ * data passing through. */
+static int
+test_compressed_pipe (xd3_stream *stream, main_extcomp *ext, char* buf,
+		      const char* comp_options, const char* decomp_options,
+		      int do_ext_recomp, const char* msg)
+{
+  int ret;
+  char decomp_buf[TESTBUFSIZE];
+
+  if (do_ext_recomp)
+    {
+      snprintf_func (decomp_buf, TESTBUFSIZE,
+		" | %s %s", ext->decomp_cmdname, ext->decomp_options);
+    }
+  else
+    {
+      decomp_buf[0] = 0;
+    }
+
+  snprintf_func (buf, TESTBUFSIZE, "%s %s < %s | %s %s | %s %s%s > %s",
+	   ext->recomp_cmdname, ext->recomp_options,
+	   TEST_TARGET_FILE,
+	   program_name, comp_options,
+	   program_name, decomp_options,
+	   decomp_buf,
+	   TEST_RECON_FILE);
+
+  if ((ret = system (buf)) != 0)
+    {
+      stream->msg = msg;
+      return XD3_INTERNAL;
+    }
+
+  if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_RECON_FILE)))
+    {
+      return XD3_INTERNAL;
+    }
+
+  DOT ();
+  return 0;
+}
+
+/* We want to test that a pipe such as:
+ *
+ * --> | gzip -cf | xdelta3 -cf | xdelta3 -dcf | gzip -dcf | -->
+ *
+ * is transparent, i.e., does not modify the stream of data.  However,
+ * we also want to verify that at the center the data is properly
+ * compressed, i.e., that we do not just have a re-compressed gzip
+ * format, that we have an VCDIFF format.  We do this in two steps.
+ * First test the above pipe, then test with suppressed output
+ * recompression (-D).  The result should be the original input:
+ *
+ * --> | gzip -cf | xdelta3 -cf | xdelta3 -Ddcf | -->
+ *
+ * Finally we want to test that -D also disables input decompression:
+ *
+ * --> | gzip -cf | xdelta3 -Dcf | xdelta3 -Ddcf | gzip -dcf | -->
+ */
+static int
+test_externally_compressed_io (xd3_stream *stream, int ignore)
+{
+  usize_t i;
+  int ret;
+  char buf[TESTBUFSIZE];
+
+  mt_init (& static_mtrand, 0x9f73f7fc);
+
+  if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
+
+  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+    {
+      main_extcomp *ext = & extcomp_types[i];
+
+      /* Test for the existence of the external command first, if not skip. */
+      snprintf_func (buf, TESTBUFSIZE, "%s %s < /dev/null > /dev/null", ext->recomp_cmdname, ext->recomp_options);
+
+      if ((ret = system (buf)) != 0)
+	{
+	  XPR(NT "%s=0", ext->recomp_cmdname);
+	  continue;
+	}
+
+      if ((ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-dcfq", 1,
+				       "compression failed: identity pipe")) ||
+	  (ret = test_compressed_pipe (stream, ext, buf, "-cfq", "-Rdcfq", 0,
+				       "compression failed: without recompression")) ||
+	  (ret = test_compressed_pipe (stream, ext, buf, "-Dcfq", "-Rdcfq", 1,
+				       "compression failed: without decompression")))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/* This tests the proper functioning of external decompression for
+ * source files.  The source and target files are identical and
+ * compressed by gzip.  Decoding such a delta with recompression
+ * disbaled (-R) should produce the original, uncompressed
+ * source/target file.  Then it checks with output recompression
+ * enabled--in this case the output should be a compressed copy of the
+ * original source/target file.  Then it checks that encoding with
+ * decompression disabled works--the compressed files are identical
+ * and decoding them should always produce a compressed output,
+ * regardless of -R since the encoded delta file had decompression
+ * disabled..
+ */
+static int
+test_source_decompression (xd3_stream *stream, int ignore)
+{
+  int ret;
+  char buf[TESTBUFSIZE];
+  const main_extcomp *ext;
+  xoff_t dsize;
+
+  mt_init (& static_mtrand, 0x9f73f7fc);
+
+  test_setup ();
+  if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
+
+  /* Use gzip. */
+  if ((ext = main_get_compressor ("G")) == NULL)
+    {
+      XPR(NT "skipped");
+      return 0;
+    }
+
+  /* Save an uncompressed copy. */
+  if ((ret = test_save_copy (TEST_TARGET_FILE))) { return ret; }
+
+  /* Compress the source. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -1 %s < %s > %s", ext->recomp_cmdname,
+	   ext->recomp_options, TEST_COPY_FILE, TEST_SOURCE_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  /* Compress the target. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -9 %s < %s > %s", ext->recomp_cmdname,
+	   ext->recomp_options, TEST_COPY_FILE, TEST_TARGET_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Now the two identical files are compressed.  Delta-encode the target,
+   * with decompression. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -e -vfq -s%s %s %s", program_name, TEST_SOURCE_FILE,
+	   TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Check that the compressed file is small (b/c inputs are
+   * identical). */
+  if ((ret = test_file_size (TEST_DELTA_FILE, & dsize))) { return ret; }
+  /* Deltas for identical files should be very small. */
+  if (dsize > 200)
+    {
+      XPR(NT "external compression did not happen\n");
+      stream->msg = "external compression did not happen";
+      return XD3_INTERNAL;
+    }
+
+  /* Decode the delta file with recompression disabled, should get an
+   * uncompressed file out. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -v -dq -R -s%s %s %s", program_name,
+	   TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  if ((ret = test_compare_files (TEST_COPY_FILE,
+			    TEST_RECON_FILE))) { return ret; }
+
+  /* Decode the delta file with recompression, should get a compressed file
+   * out.  But we can't compare compressed files directly. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -v -dqf -s%s %s %s", program_name,
+	   TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  snprintf_func (buf, TESTBUFSIZE, "%s %s < %s > %s", ext->decomp_cmdname, ext->decomp_options,
+	   TEST_RECON_FILE, TEST_RECON2_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  if ((ret = test_compare_files (TEST_COPY_FILE,
+			    TEST_RECON2_FILE))) { return ret; }
+
+  /* Encode with decompression disabled */
+  snprintf_func (buf, TESTBUFSIZE, "%s -e -D -vfq -s%s %s %s", program_name,
+	   TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Decode the delta file with decompression disabled, should get the
+   * identical compressed file out. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -d -D -vfq -s%s %s %s", program_name,
+	   TEST_SOURCE_FILE, TEST_DELTA_FILE, TEST_RECON_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  if ((ret = test_compare_files (TEST_TARGET_FILE,
+			    TEST_RECON_FILE))) { return ret; }
+
+  test_cleanup();
+  return 0;
+}
+#endif
+
+/***********************************************************************
+ FORCE, STDOUT
+ ***********************************************************************/
+
+/* This tests that output will not overwrite an existing file unless
+ * -f was specified.  The test is for encoding (the same code handles
+ * it for decoding). */
+static int
+test_force_behavior (xd3_stream *stream, int ignore)
+{
+  int ret;
+  char buf[TESTBUFSIZE];
+
+  /* Create empty target file */
+  test_setup ();
+  snprintf_func (buf, TESTBUFSIZE, "cp /dev/null %s", TEST_TARGET_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Encode to delta file */
+  snprintf_func (buf, TESTBUFSIZE, "%s -e %s %s", program_name,
+	   TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Encode again, should fail. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -q -e %s %s ", program_name,
+	   TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_fail (stream, buf))) { return ret; }
+
+  /* Force it, should succeed. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -f -e %s %s", program_name,
+	   TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  test_cleanup();
+  return 0;
+}
+
+/* This checks the proper operation of the -c flag.  When specified
+ * the default output becomes stdout, otherwise the input must be
+ * provided (encode) or it may be defaulted (decode w/ app header). */
+static int
+test_stdout_behavior (xd3_stream *stream, int ignore)
+{
+  int ret;
+  char buf[TESTBUFSIZE];
+
+  test_setup();
+  snprintf_func (buf, TESTBUFSIZE, "cp /dev/null %s", TEST_TARGET_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Without -c, encode writes to delta file */
+  snprintf_func (buf, TESTBUFSIZE, "%s -e %s %s", program_name,
+	   TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* With -c, encode writes to stdout */
+  snprintf_func (buf, TESTBUFSIZE, "%s -e -c %s > %s", program_name,
+	   TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Without -c, decode writes to target file name, but it fails because the
+   * file exists. */
+  snprintf_func (buf, TESTBUFSIZE, "%s -q -d %s ", program_name, TEST_DELTA_FILE);
+  if ((ret = do_fail (stream, buf))) { return ret; }
+
+  /* With -c, decode writes to stdout */
+  snprintf_func (buf, TESTBUFSIZE, "%s -d -c %s > /dev/null", program_name, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  test_cleanup();
+
+  return 0;
+}
+
+/* This tests that the no-output flag (-J) works. */
+static int
+test_no_output (xd3_stream *stream, int ignore)
+{
+  int ret;
+  char buf[TESTBUFSIZE];
+
+  test_setup ();
+
+  snprintf_func (buf, TESTBUFSIZE, "touch %s && chmod 0000 %s",
+	   TEST_NOPERM_FILE, TEST_NOPERM_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  if ((ret = test_make_inputs (stream, NULL, NULL))) { return ret; }
+
+  /* Try no_output encode w/out unwritable output file */
+  snprintf_func (buf, TESTBUFSIZE, "%s -q -f -e %s %s", program_name,
+	   TEST_TARGET_FILE, TEST_NOPERM_FILE);
+  if ((ret = do_fail (stream, buf))) { return ret; }
+  snprintf_func (buf, TESTBUFSIZE, "%s -J -e %s %s", program_name,
+	   TEST_TARGET_FILE, TEST_NOPERM_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  /* Now really write the delta to test decode no-output */
+  snprintf_func (buf, TESTBUFSIZE, "%s -e %s %s", program_name,
+	   TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  snprintf_func (buf, TESTBUFSIZE, "%s -q -f -d %s %s", program_name,
+	   TEST_DELTA_FILE, TEST_NOPERM_FILE);
+  if ((ret = do_fail (stream, buf))) { return ret; }
+  snprintf_func (buf, TESTBUFSIZE, "%s -J -d %s %s", program_name,
+	   TEST_DELTA_FILE, TEST_NOPERM_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  test_cleanup ();
+  return 0;
+}
+
+/* This tests that the default appheader works */
+static int
+test_appheader (xd3_stream *stream, int ignore)
+{
+  int i;
+  int ret;
+  char buf[TESTBUFSIZE];
+  char bogus[TESTBUFSIZE];
+  xoff_t ssize, tsize;
+  test_setup ();
+
+  if ((ret = test_make_inputs (stream, &ssize, &tsize))) { return ret; }
+
+  snprintf_func (buf, TESTBUFSIZE, "%s -q -f -e -s %s %s %s", program_name,
+		 TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  if ((ret = test_copy_to (program_name, TEST_RECON2_FILE))) { return ret; }
+
+  snprintf_func (buf, TESTBUFSIZE, "chmod 0700 %s", TEST_RECON2_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  if ((ret = test_save_copy (TEST_TARGET_FILE))) { return ret; }
+  if ((ret = test_copy_to (TEST_SOURCE_FILE, TEST_TARGET_FILE))) { return ret; }
+
+  if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_COPY_FILE)) == 0)
+    {
+      return XD3_INVALID;  // I.e., files are different!
+    }
+
+  // Test that the target file is restored.
+  snprintf_func (buf, TESTBUFSIZE, "(cd /tmp && %s -q -f -d %s)",
+		 TEST_RECON2_FILE,
+		 TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+
+  if ((ret = test_compare_files (TEST_TARGET_FILE, TEST_COPY_FILE)) != 0)
+    {
+      return ret;
+    }
+
+  // Test a malicious string w/ entries > 4 in the appheader by having
+  // the encoder write it:
+  for (i = 0; i < TESTBUFSIZE / 4; ++i)
+    {
+      bogus[2*i] = 'G';
+      bogus[2*i+1] = '/';
+    }
+  bogus[TESTBUFSIZE/2-1] = 0;
+
+  snprintf_func (buf, TESTBUFSIZE, 
+		 "%s -q -f -A=%s -e -s %s %s %s", program_name, bogus,
+		 TEST_SOURCE_FILE, TEST_TARGET_FILE, TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf))) { return ret; }
+  // Then read it:
+  snprintf_func (buf, TESTBUFSIZE, "(cd /tmp && %s -q -f -d %s)",
+		 TEST_RECON2_FILE,
+		 TEST_DELTA_FILE);
+  if ((ret = do_cmd (stream, buf)) == 0) 
+    { 
+      return XD3_INVALID;  // Impossible
+    }
+  if (!WIFEXITED(ret))
+    {
+      return XD3_INVALID;  // Must have crashed!
+    }
+
+  test_cleanup ();
+  return 0;
+}
+
+/***********************************************************************
+ Source identical optimization
+ ***********************************************************************/
+
+/* Computing a delta should be fastest when the two inputs are
+ * identical, this checks it.  The library is called to compute a
+ * delta between a 10000 byte file, 1000 byte winsize, 500 byte source
+ * blocksize.  The same buffer is used for both source and target. */
+static int
+test_identical_behavior (xd3_stream *stream, int ignore)
+{
+#define IDB_TGTSZ 10000  /* Not a power of two b/c of hard-coded expectations below. */
+#define IDB_BLKSZ 512
+#define IDB_WINSZ 1000
+#define IDB_DELSZ 1000
+#define IDB_WINCNT (IDB_TGTSZ / IDB_WINSZ)
+
+  int ret, i;
+  uint8_t buf[IDB_TGTSZ];
+  uint8_t del[IDB_DELSZ];
+  uint8_t rec[IDB_TGTSZ];
+  xd3_source source;
+  int nextencwin = 0;
+  int winstarts = 0, winfinishes = 0;
+  usize_t delpos = 0, recsize;
+  xd3_config config;
+  memset(&source, 0, sizeof(source));
+
+  for (i = 0; i < IDB_TGTSZ; i += 1)
+    {
+      buf[i] = (uint8_t) mt_random (&static_mtrand);
+    }
+
+  stream->winsize = IDB_WINSZ;
+
+  source.blksize  = IDB_BLKSZ;
+  source.name     = "";
+  source.curblk   = NULL;
+  source.curblkno = 0;
+
+  if ((ret = xd3_set_source (stream, & source))) { goto fail; }
+
+  /* Compute an delta between identical source and targets. */
+  for (;;)
+    {
+      ret = xd3_encode_input (stream);
+
+      if (ret == XD3_INPUT)
+	{
+	  xd3_avail_input (stream, buf + (IDB_WINSZ * nextencwin), IDB_WINSZ);
+	  nextencwin += 1;
+	  continue;
+	}
+
+      if (ret == XD3_GETSRCBLK)
+	{
+	  source.curblkno = source.getblkno;
+	  source.onblk    = IDB_BLKSZ;
+	  source.curblk   = buf + source.getblkno * IDB_BLKSZ;
+	  continue;
+	}
+
+      if (ret == XD3_WINSTART)
+	{
+	  winstarts++;
+	  continue;
+	}
+      if (ret == XD3_WINFINISH)
+	{
+	  winfinishes++;
+	  if (winfinishes == IDB_WINCNT)
+	    {
+	      break;
+	    }
+	  continue;
+	}
+
+      if (ret != XD3_OUTPUT) { goto fail; }
+
+      CHECK(delpos + stream->avail_out <= IDB_DELSZ);
+
+      memcpy (del + delpos, stream->next_out, stream->avail_out);
+
+      delpos += stream->avail_out;
+
+      xd3_consume_output (stream);
+    }
+
+  CHECK(winfinishes == IDB_WINCNT);
+  CHECK(winstarts == IDB_WINCNT);
+  CHECK(nextencwin == IDB_WINCNT);
+
+  /* Reset. */
+  memset(&source, 0, sizeof(source));
+  source.blksize  = IDB_TGTSZ;
+  source.onblk    = IDB_TGTSZ;
+  source.curblk   = buf;
+  source.curblkno = 0;
+
+  if ((ret = xd3_close_stream (stream))) { goto fail; }
+  xd3_free_stream (stream);
+  xd3_init_config (& config, 0);
+  if ((ret = xd3_config_stream (stream, & config))) { goto fail; }
+  if ((ret = xd3_set_source_and_size (stream, & source, IDB_TGTSZ))) { goto fail; }
+
+  /* Decode. */
+  if ((ret = xd3_decode_stream (stream, del, delpos, rec, & recsize, IDB_TGTSZ))) { goto fail; }
+
+  /* Check result size and data. */
+  if (recsize != IDB_TGTSZ) { stream->msg = "wrong size reconstruction"; goto fail; }
+  if (memcmp (rec, buf, IDB_TGTSZ) != 0) { stream->msg = "wrong data reconstruction"; goto fail; }
+
+  /* Check that there was one copy per window. */
+  IF_DEBUG (if (stream->n_scpy != IDB_WINCNT ||
+		stream->n_add != 0 ||
+		stream->n_run != 0) { stream->msg = "wrong copy count"; goto fail; });
+
+  /* Check that no checksums were computed because the initial match
+     was presumed. */
+  IF_DEBUG (if (stream->large_ckcnt != 0) { stream->msg = "wrong checksum behavior"; goto fail; });
+
+  ret = 0;
+ fail:
+  return ret;
+}
+
+/***********************************************************************
+ String matching test
+ ***********************************************************************/
+
+/* Check particular matching behaviors by calling
+ * xd3_string_match_soft directly with specific arguments. */
+typedef struct _string_match_test string_match_test;
+
+typedef enum
+{
+  SM_NONE    = 0,
+  SM_LAZY    = (1 << 1),
+} string_match_flags;
+
+struct _string_match_test
+{
+  const char *input;
+  int         flags;
+  const char *result;
+};
+
+static const string_match_test match_tests[] =
+{
+  /* nothing */
+  { "1234567890", SM_NONE, "" },
+
+  /* basic run, copy */
+  { "11111111112323232323", SM_NONE, "R0/10 C12/8@10" },
+
+  /* no run smaller than MIN_RUN=8 */
+  { "1111111",  SM_NONE, "C1/6@0" },
+  { "11111111", SM_NONE, "R0/8" },
+
+  /* simple promotion: the third copy address depends on promotion */
+  { "ABCDEF_ABCDEF^ABCDEF", SM_NONE,    "C7/6@0 C14/6@7" },
+  /* { "ABCDEF_ABCDEF^ABCDEF", SM_PROMOTE, "C7/6@0 C14/6@0" }, forgotten */
+
+  /* simple lazy: there is a better copy starting with "23 X" than "123 " */
+  { "123 23 XYZ 123 XYZ", SM_NONE, "C11/4@0" },
+  { "123 23 XYZ 123 XYZ", SM_LAZY, "C11/4@0 C12/6@4" },
+
+  /* trylazy: no lazy matches unless there are at least two characters beyond
+   * the first match */
+  { "2123_121212",   SM_LAZY, "C7/4@5" },
+  { "2123_1212123",  SM_LAZY, "C7/4@5" },
+  { "2123_1212123_", SM_LAZY, "C7/4@5 C8/5@0" },
+
+  /* trylazy: no lazy matches if the copy is >= MAXLAZY=10 */
+  { "2123_121212123_",   SM_LAZY, "C7/6@5 C10/5@0" },
+  { "2123_12121212123_", SM_LAZY, "C7/8@5 C12/5@0" },
+  { "2123_1212121212123_", SM_LAZY, "C7/10@5" },
+
+  /* lazy run: check a run overlapped by a longer copy */
+  { "11111112 111111112 1", SM_LAZY, "C1/6@0 R9/8 C10/10@0" },
+
+  /* lazy match: match_length,run_l >= min_match tests, shouldn't get any
+   * copies within the run, no run within the copy */
+  { "^________^________  ", SM_LAZY, "R1/8 C9/9@0" },
+
+  /* chain depth: it only goes back 10. this checks that the 10th match hits
+   * and the 11th misses. */
+  { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234 ", SM_NONE,
+    "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/5@0" },
+  { "1234 1234_1234-1234=1234+1234[1234]1234{1234}1234<1234>1234 ", SM_NONE,
+    "C5/4@0 C10/4@5 C15/4@10 C20/4@15 C25/4@20 C30/4@25 C35/4@30 C40/4@35 C45/4@40 C50/4@45 C55/4@50" },
+
+  /* ssmatch test */
+  { "ABCDE___ABCDE*** BCDE***", SM_NONE, "C8/5@0 C17/4@1" },
+  /*{ "ABCDE___ABCDE*** BCDE***", SM_SSMATCH, "C8/5@0 C17/7@9" }, forgotten */
+};
+
+static int
+test_string_matching (xd3_stream *stream, int ignore)
+{
+  usize_t i;
+  int ret;
+  xd3_config config;
+  char rbuf[TESTBUFSIZE];
+
+  for (i = 0; i < SIZEOF_ARRAY (match_tests); i += 1)
+    {
+      const string_match_test *test = & match_tests[i];
+      char *rptr = rbuf;
+      usize_t len = (usize_t) strlen (test->input);
+
+      xd3_free_stream (stream);
+      xd3_init_config (& config, 0);
+
+      config.smatch_cfg   = XD3_SMATCH_SOFT;
+      config.smatcher_soft.large_look   = 4;
+      config.smatcher_soft.large_step   = 4;
+      config.smatcher_soft.small_look   = 4;
+      config.smatcher_soft.small_chain  = 10;
+      config.smatcher_soft.small_lchain = 10;
+      config.smatcher_soft.max_lazy     = (test->flags & SM_LAZY) ? 10 : 0;
+      config.smatcher_soft.long_enough  = 10;
+
+      if ((ret = xd3_config_stream (stream, & config))) { return ret; }
+      if ((ret = xd3_encode_init_full (stream))) { return ret; }
+
+      xd3_avail_input (stream, (uint8_t*)test->input, len);
+
+      if ((ret = stream->smatcher.string_match (stream))) { return ret; }
+
+      *rptr = 0;
+      while (! xd3_rlist_empty (& stream->iopt_used))
+	{
+	  xd3_rinst *inst = xd3_rlist_pop_front (& stream->iopt_used);
+
+	  switch (inst->type)
+	    {
+	    case XD3_RUN: *rptr++ = 'R'; break;
+	    case XD3_CPY: *rptr++ = 'C'; break;
+	    default: CHECK(0);
+	    }
+
+	  snprintf_func (rptr, rbuf+TESTBUFSIZE-rptr, "%"W"u/%"W"u",
+			 inst->pos, inst->size);
+	  rptr += strlen (rptr);
+
+	  if (inst->type == XD3_CPY)
+	    {
+	      *rptr++ = '@';
+	      snprintf_func (rptr, rbuf+TESTBUFSIZE-rptr, "%"Q"u", inst->addr);
+	      rptr += strlen (rptr);
+	    }
+
+	  *rptr++ = ' ';
+
+	  xd3_rlist_push_back (& stream->iopt_free, inst);
+	}
+
+      if (rptr != rbuf)
+	{
+	  rptr -= 1; *rptr = 0;
+	}
+
+      if (strcmp (rbuf, test->result) != 0)
+	{
+	  XPR(NT "test %"W"u: expected %s: got %s", i, test->result, rbuf);
+	  stream->msg = "wrong result";
+	  return XD3_INTERNAL;
+	}
+    }
+
+  return 0;
+}
+
+/*
+ * This is a test for many overlapping instructions. It must be a lazy
+ * matcher.
+ */
+static int
+test_iopt_flush_instructions (xd3_stream *stream, int ignore)
+{
+  int ret, i;
+  usize_t tpos = 0;
+  usize_t delta_size, recon_size;
+  xd3_config config;
+  uint8_t target[TESTBUFSIZE];
+  uint8_t delta[TESTBUFSIZE];
+  uint8_t recon[TESTBUFSIZE];
+
+  xd3_free_stream (stream);
+  xd3_init_config (& config, 0);
+
+  config.smatch_cfg    = XD3_SMATCH_SOFT;
+  config.smatcher_soft.large_look    = 16;
+  config.smatcher_soft.large_step    = 16;
+  config.smatcher_soft.small_look    = 4;
+  config.smatcher_soft.small_chain   = 128;
+  config.smatcher_soft.small_lchain  = 16;
+  config.smatcher_soft.max_lazy      = 8;
+  config.smatcher_soft.long_enough   = 128;
+
+  if ((ret = xd3_config_stream (stream, & config))) { return ret; }
+
+  for (i = 1; i < 250; i++)
+    {
+      target[tpos++] = i;
+      target[tpos++] = i+1;
+      target[tpos++] = i+2;
+      target[tpos++] = i+3;
+      target[tpos++] = 0;
+    }
+  for (i = 1; i < 253; i++)
+    {
+      target[tpos++] = i;
+    }
+
+  if ((ret = xd3_encode_stream (stream, target, tpos,
+				    delta, & delta_size, sizeof (delta))))
+    {
+      return ret;
+    }
+
+  xd3_free_stream(stream);
+  if ((ret = xd3_config_stream (stream, & config))) { return ret; }
+
+  if ((ret = xd3_decode_stream (stream, delta, delta_size,
+				recon, & recon_size, sizeof (recon))))
+    {
+      return ret;
+    }
+
+  CHECK(tpos == recon_size);
+  CHECK(memcmp(target, recon, recon_size) == 0);
+
+  return 0;
+}
+
+/*
+ * This tests the 32/64bit ambiguity for source-window matching.
+ */
+#if !XD3_USE_LARGESIZET
+static int
+test_source_cksum_offset (xd3_stream *stream, int ignore)
+ {
+  xd3_source source;
+
+  // Inputs are:
+  struct {
+    xoff_t   cpos;   // stream->srcwin_cksum_pos;
+    xoff_t   ipos;   // stream->total_in;
+    xoff_t   size;   // stream->src->size;
+
+    usize_t  input;  // input  32-bit offset
+    xoff_t   output; // output 64-bit offset
+
+  } cksum_test[] = {
+    // If cpos is <= 2^32
+    { 1, 1, 1, 1, 1 },
+
+#if XD3_USE_LARGEFILE64
+//    cpos            ipos            size            input         output
+//    0x____xxxxxULL, 0x____xxxxxULL, 0x____xxxxxULL, 0x___xxxxxUL, 0x____xxxxxULL
+    { 0x100100000ULL, 0x100000000ULL, 0x100200000ULL, 0x00000000UL, 0x100000000ULL },
+    { 0x100100000ULL, 0x100000000ULL, 0x100200000ULL, 0xF0000000UL, 0x0F0000000ULL },
+
+    { 0x100200000ULL, 0x100100000ULL, 0x100200000ULL, 0x00300000UL, 0x000300000ULL },
+
+    { 25771983104ULL, 25770000000ULL, 26414808769ULL, 2139216707UL, 23614053187ULL },
+
+#endif
+
+    { 0, 0, 0, 0, 0 },
+  }, *test_ptr;
+
+  stream->src = &source;
+
+  for (test_ptr = cksum_test; test_ptr->cpos; test_ptr++) {
+    xoff_t r;
+    stream->srcwin_cksum_pos = test_ptr->cpos;
+    stream->total_in = test_ptr->ipos;
+
+    r = xd3_source_cksum_offset(stream, test_ptr->input);
+    CHECK(r == test_ptr->output);
+  }
+  return 0;
+}
+#endif /* !XD3_USE_LARGESIZET */
+
+static int
+test_in_memory (xd3_stream *stream, int ignore)
+{
+  // test_text is 256 bytes
+  uint8_t ibuf[sizeof(test_text)];
+  uint8_t dbuf[sizeof(test_text)];
+  uint8_t obuf[sizeof(test_text)];
+  usize_t size = sizeof(test_text);
+  usize_t dsize, osize;
+  int r1, r2;
+  int eflags = SECONDARY_DJW ? XD3_SEC_DJW : 0;
+
+  memcpy(ibuf, test_text, size);
+  memset(ibuf + 128, 0, 16);
+
+  r1 = xd3_encode_memory(ibuf, size,
+			 test_text, size,
+			 dbuf, &dsize, size, eflags);
+
+  r2 = xd3_decode_memory(dbuf, dsize,
+			 test_text, size,
+			 obuf, &osize, size, 0);
+
+  if (r1 != 0 || r2 != 0 || dsize >= (size/2) || dsize < 1 ||
+      osize != size) {
+    stream->msg = "encode/decode size error";
+    return XD3_INTERNAL;
+  }
+
+  if (memcmp(obuf, ibuf, size) != 0) {
+    stream->msg = "encode/decode data error";
+    return XD3_INTERNAL;
+  }
+
+  return 0;
+}
+
+/***********************************************************************
+ TEST MAIN
+ ***********************************************************************/
+
+int xd3_selftest (void)
+{
+#define DO_TEST(fn,flags,arg)                                         \
+  do {                                                                \
+    xd3_stream stream;                                                \
+    xd3_config config;                                                \
+    xd3_init_config (& config, flags);                                \
+    XPR(NT "testing " #fn "%s...",                          \
+             flags ? (" (" #flags ")") : "");                         \
+    if ((ret = xd3_config_stream (& stream, & config) == 0) &&        \
+        (ret = test_ ## fn (& stream, arg)) == 0) {                   \
+      XPR(NTR " success\n");                                          \
+    } else {                                                          \
+      XPR(NTR " failed: %s: %s\n", xd3_errstring (& stream),          \
+               xd3_mainerror (ret)); }                                \
+    xd3_free_stream (& stream);                                       \
+    if (ret != 0) { goto failure; }                                   \
+  } while (0)
+
+  int ret;
+  DO_TEST (random_numbers, 0, 0);
+  DO_TEST (printf_xoff, 0, 0);
+
+  DO_TEST (decode_integer_end_of_input, 0, 0);
+  DO_TEST (decode_integer_overflow, 0, 0);
+  DO_TEST (encode_decode_uint32_t, 0, 0);
+  DO_TEST (encode_decode_uint64_t, 0, 0);
+  DO_TEST (usize_t_overflow, 0, 0);
+  DO_TEST (checksum_step, 0, 0);
+  DO_TEST (forward_match, 0, 0);
+  DO_TEST (address_cache, 0, 0);
+
+  DO_TEST (string_matching, 0, 0);
+  DO_TEST (choose_instruction, 0, 0);
+  DO_TEST (identical_behavior, 0, 0);
+  DO_TEST (in_memory, 0, 0);
+
+  DO_TEST (iopt_flush_instructions, 0, 0);
+#if !XD3_USE_LARGESIZET
+  DO_TEST (source_cksum_offset, 0, 0);
+#endif
+
+  DO_TEST (decompress_single_bit_error, 0, 3);
+  DO_TEST (decompress_single_bit_error, XD3_ADLER32, 3);
+
+  IF_LZMA (DO_TEST (decompress_single_bit_error, XD3_SEC_LZMA, 54));
+  IF_FGK (DO_TEST (decompress_single_bit_error, XD3_SEC_FGK, 3));
+  IF_DJW (DO_TEST (decompress_single_bit_error, XD3_SEC_DJW, 8));
+
+#if SHELL_TESTS
+  DO_TEST (force_behavior, 0, 0);
+  DO_TEST (stdout_behavior, 0, 0);
+  DO_TEST (no_output, 0, 0);
+  DO_TEST (appheader, 0, 0);
+  DO_TEST (command_line_arguments, 0, 0);
+
+#if EXTERNAL_COMPRESSION
+  DO_TEST (source_decompression, 0, 0);
+  DO_TEST (externally_compressed_io, 0, 0);
+#endif
+
+  DO_TEST (recode_command, 0, 0);
+  IF_LZMA (DO_TEST (secondary_lzma_default, 0, 0));
+#endif
+
+  IF_LZMA (DO_TEST (secondary_lzma, 0, 1));
+  IF_DJW (DO_TEST (secondary_huff, 0, DJW_MAX_GROUPS));
+  IF_FGK (DO_TEST (secondary_fgk, 0, 1));
+
+  DO_TEST (compressed_stream_overflow, 0, 0);
+  IF_LZMA (DO_TEST (compressed_stream_overflow, XD3_SEC_LZMA, 0));
+
+failure:
+  test_cleanup ();
+  return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+#undef DO_TEST
+}
diff --git a/third-party/xdelta3/xdelta3/xdelta3.1 b/third-party/xdelta3/xdelta3/xdelta3.1
new file mode 100644
index 0000000000..693171e3dd
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3.1
@@ -0,0 +1,153 @@
+.TH XDELTA3 "1" "August 2009" "Xdelta3"
+.SH NAME
+xdelta3 \- VCDIFF (RFC 3284) binary diff tool
+.SH SYNOPSIS
+.B xdelta3 
+.RI [ command ]
+.RI [ options ] 
+.RI [ input 
+.RI [ output ]]
+.SH DESCRIPTION
+.B xdelta3
+is a binary diff tool that uses the VCDIFF (RFC 3284) format and compression.
+.SH COMMANDS
+.TP
+.BI config
+prints xdelta3 configuration
+.TP
+.BI decode
+decompress the input, also set by -d
+.TP
+.BI encode
+compress the input, also set by -e (default)
+.TP
+.BI test
+run the builtin tests
+.TP
+.BI printdelta
+print information about the entire delta
+.TP
+.BI printhdr
+print information about the first window
+.TP
+.BI printhdrs
+print information about all windows
+.TP
+.BI recode
+encode with new application/secondary settings
+
+.SH OPTIONS
+standard options:
+.TP
+.BI "\-0 .. \-9"
+compression level
+.TP
+.BI "\-c"
+use stdout
+.TP
+.BI "\-d"
+decompress
+.TP
+.BI \-e
+compress
+.TP
+.BI \-f
+force overwrite
+.TP
+.BI \-h
+show help
+.TP
+.BI \-q
+be quiet
+.TP
+.BI \-v
+be verbose (max 2)
+.TP
+.BI \-V
+show version
+
+.TP
+memory options:
+.TP
+.BI \-B 
+.RI bytes
+source window size
+.TP
+.BI \-W 
+.RI bytes
+input window size
+.TP
+.BI \-P 
+.RI size
+compression duplicates window
+.TP
+.BI \-I 
+.RI size
+instruction buffer size (0 = unlimited)
+
+.TP
+compression options:
+.TP
+.BI \-s
+.RI source
+source file to copy from (if any)
+.TP
+.BI "\-S " [djw|fgk]
+enable/disable secondary compression
+.TP
+.BI \-N
+disable small string-matching compression
+.TP
+.BI \-D
+disable external decompression (encode/decode)
+.TP
+.BI \-R
+disable external recompression (decode)
+.TP
+.BI \-n
+disable checksum (encode/decode)
+.TP
+.BI \-C 
+soft config (encode, undocumented)
+.TP
+.BI "\-A " [apphead]
+disable/provide application header (encode)
+.TP
+.BI \-J
+disable output (check/compute only)
+.TP
+.BI \-T
+use alternate code table (test)
+
+.SH NOTES
+The 
+.B XDELTA
+environment variable may contain extra args:
+
+.RS
+XDELTA="-s source-x.y.tar.gz" \\
+.br
+tar --use-compress-program=xdelta3 -cf \\
+.br
+target-x.z.tar.gz.vcdiff target-x.y/
+
+.SH EXAMPLES
+
+Compress the differences between SOURCE and TARGET, yielding OUT, 
+using "djw" secondary compression:
+
+xdelta3 -S djw -s SOURCE TARGET OUT
+
+Do the same, using standard input and output:
+
+xdelta3 -S djw -s SOURCE < TARGET > OUT
+
+To decompress OUT, using SOURCE, yielding TARGET:
+
+xdelta3 -d -s SOURCE OUT TARGET
+
+.SH AUTHOR
+xdelta3 was written by Josh MacDonald <josh.macdonald@gmail.com>.
+.PP
+This manual page was written by Leo 'costela' Antunes <costela@debian.org>
+for the Debian project (but may be used by others).
diff --git a/third-party/xdelta3/xdelta3/xdelta3.c b/third-party/xdelta3/xdelta3/xdelta3.c
new file mode 100644
index 0000000000..ef518cdbf7
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3.c
@@ -0,0 +1,4819 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+   -------------------------------------------------------------------
+
+			       Xdelta 3
+
+   The goal of this library is to to implement both the (stand-alone)
+   data-compression and delta-compression aspects of VCDIFF encoding, and
+   to support a programming interface that works like Zlib
+   (http://www.gzip.org/zlib.html). See RFC3284: The VCDIFF Generic
+   Differencing and Compression Data Format.
+
+   VCDIFF is a unified encoding that combines data-compression and
+   delta-encoding ("differencing").
+
+   VCDIFF has a detailed byte-code instruction set with many features.
+   The instruction format supports an immediate size operand for small
+   COPYs and ADDs (e.g., under 18 bytes).  There are also instruction
+   "modes", which are used to compress COPY addresses by using two
+   address caches.  An instruction mode refers to slots in the NEAR
+   and SAME caches for recent addresses.  NEAR remembers the
+   previous 4 (by default) COPY addresses, and SAME catches
+   frequent re-uses of the same address using a 3-way (by default)
+   256-entry associative cache of [ADDR mod 256], the encoded byte.
+   A hit in the NEAR/SAME cache requires 0/1 ADDR bytes.
+
+   VCDIFF has a default instruction table, but an alternate
+   instruction tables may themselves be be delta-compressed and
+   included in the encoding header.  This allows even more freedom.
+   There are 9 instruction modes in the default code table, 4 near, 3
+   same, VCD_SELF (absolute encoding) and VCD_HERE (relative to the
+   current position).
+
+   ----------------------------------------------------------------------
+
+  			      Algorithms
+
+   Aside from the details of encoding and decoding, there are a bunch
+   of algorithms needed.
+
+   1. STRING-MATCH.  A two-level fingerprinting approach is used.  A
+   single loop computes the two checksums -- small and large -- at
+   successive offsets in the TARGET file.  The large checksum is more
+   accurate and is used to discover SOURCE matches, which are
+   potentially very long.  The small checksum is used to discover
+   copies within the TARGET.  Small matching, which is more expensive,
+   usually dominates the large STRING-MATCH costs in this code - the
+   more exhaustive the search, the better the results.  Either of the
+   two string-matching mechanisms may be disabled.
+
+   2. INSTRUCTION SELECTION.  The IOPT buffer here represents a queue
+   used to store overlapping copy instructions.  There are two possible
+   optimizations that go beyond a greedy search.  Both of these fall
+   into the category of "non-greedy matching" optimizations.
+
+   The first optimization stems from backward SOURCE-COPY matching.
+   When a new SOURCE-COPY instruction covers a previous instruction in
+   the target completely, it is erased from the queue.  Randal Burns
+   originally analyzed these algorithms and did a lot of related work
+   (\cite the 1.5-pass algorithm).
+
+   The second optimization comes by the encoding of common very-small
+   COPY and ADD instructions, for which there are special DOUBLE-code
+   instructions, which code two instructions in a single byte.
+
+   The cost of bad instruction-selection overhead is relatively high
+   for data-compression, relative to delta-compression, so this second
+   optimization is fairly important.  With "lazy" matching (the name
+   used in Zlib for a similar optimization), the string-match
+   algorithm searches after a match for potential overlapping copy
+   instructions.  In Xdelta and by default, VCDIFF, the minimum match
+   size is 4 bytes, whereas Zlib searches with a 3-byte minimum.  This
+   feature, combined with double instructions, provides a nice
+   challenge.  Search in this file for "black magic", a heuristic.
+
+   3. STREAM ALIGNMENT.  Stream alignment is needed to compress large
+   inputs in constant space.  See xd3_srcwin_move_point().
+
+   4. WINDOW SELECTION.  When the IOPT buffer flushes, in the first call
+   to xd3_iopt_finish_encoding containing any kind of copy instruction,
+   the parameters of the source window must be decided: the offset into
+   the source and the length of the window.  Since the IOPT buffer is
+   finite, the program may be forced to fix these values before knowing
+   the best offset/length.
+
+   5. SECONDARY COMPRESSION.  VCDIFF supports a secondary encoding to
+   be applied to the individual sections of the data format, which are
+   ADDRess, INSTruction, and DATA.  Several secondary compressor
+   variations are implemented here, although none is standardized yet.
+
+   One is an adaptive huffman algorithm -- the FGK algorithm (Faller,
+   Gallager, and Knuth, 1985).  This compressor is extremely slow.
+
+   The other is a simple static Huffman routine, which is the base
+   case of a semi-adaptive scheme published by D.J. Wheeler and first
+   widely used in bzip2 (by Julian Seward).  This is a very
+   interesting algorithm, originally published in nearly cryptic form
+   by D.J. Wheeler. !!!NOTE!!! Because these are not standardized,
+   secondary compression remains off by default.
+   ftp://ftp.cl.cam.ac.uk/users/djw3/bred3.{c,ps}
+   --------------------------------------------------------------------
+
+			    Other Features
+
+   1. USER CONVENIENCE
+
+   For user convenience, it is essential to recognize Gzip-compressed
+   files and automatically Gzip-decompress them prior to
+   delta-compression (or else no delta-compression will be achieved
+   unless the user manually decompresses the inputs).  The compressed
+   represention competes with Xdelta, and this must be hidden from the
+   command-line user interface.  The Xdelta-1.x encoding was simple, not
+   compressed itself, so Xdelta-1.x uses Zlib internally to compress the
+   representation.
+
+   This implementation supports external compression, which implements
+   the necessary fork() and pipe() mechanics.  There is a tricky step
+   involved to support automatic detection of a compressed input in a
+   non-seekable input.  First you read a bit of the input to detect
+   magic headers.  When a compressed format is recognized, exec() the
+   external compression program and create a second child process to
+   copy the original input stream. [Footnote: There is a difficulty
+   related to using Gzip externally. It is not possible to decompress
+   and recompress a Gzip file transparently.  If FILE.GZ had a
+   cryptographic signature, then, after: (1) Gzip-decompression, (2)
+   Xdelta-encoding, (3) Gzip-compression the signature could be
+   broken.  The only way to solve this problem is to guess at Gzip's
+   compression level or control it by other means.  I recommend that
+   specific implementations of any compression scheme store
+   information needed to exactly re-compress the input, that way
+   external compression is transparent - however, this won't happen
+   here until it has stabilized.]
+
+   2. APPLICATION-HEADER
+
+   This feature was introduced in RFC3284.  It allows any application
+   to include a header within the VCDIFF file format.  This allows
+   general inter-application data exchange with support for
+   application-specific extensions to communicate metadata.
+
+   3. VCDIFF CHECKSUM
+
+   An optional checksum value is included with each window, which can
+   be used to validate the final result.  This verifies the correct source
+   file was used for decompression as well as the obvious advantage:
+   checking the implementation (and underlying) correctness.
+
+   4. LIGHT WEIGHT
+
+   The code makes efforts to avoid copying data more than necessary.
+   The code delays many initialization tasks until the first use, it
+   optimizes for identical (perfectly matching) inputs.  It does not
+   compute any checksums until the first lookup misses.  Memory usage
+   is reduced.  String-matching is templatized (by slightly gross use
+   of CPP) to hard-code alternative compile-time defaults.  The code
+   has few outside dependencies.
+   ----------------------------------------------------------------------
+
+		The default rfc3284 instruction table:
+		    (see RFC for the explanation)
+
+           TYPE      SIZE     MODE    TYPE     SIZE     MODE     INDEX
+   --------------------------------------------------------------------
+       1.  Run         0        0     Noop       0        0        0
+       2.  Add    0, [1,17]     0     Noop       0        0      [1,18]
+       3.  Copy   0, [4,18]     0     Noop       0        0     [19,34]
+       4.  Copy   0, [4,18]     1     Noop       0        0     [35,50]
+       5.  Copy   0, [4,18]     2     Noop       0        0     [51,66]
+       6.  Copy   0, [4,18]     3     Noop       0        0     [67,82]
+       7.  Copy   0, [4,18]     4     Noop       0        0     [83,98]
+       8.  Copy   0, [4,18]     5     Noop       0        0     [99,114]
+       9.  Copy   0, [4,18]     6     Noop       0        0    [115,130]
+      10.  Copy   0, [4,18]     7     Noop       0        0    [131,146]
+      11.  Copy   0, [4,18]     8     Noop       0        0    [147,162]
+      12.  Add       [1,4]      0     Copy     [4,6]      0    [163,174]
+      13.  Add       [1,4]      0     Copy     [4,6]      1    [175,186]
+      14.  Add       [1,4]      0     Copy     [4,6]      2    [187,198]
+      15.  Add       [1,4]      0     Copy     [4,6]      3    [199,210]
+      16.  Add       [1,4]      0     Copy     [4,6]      4    [211,222]
+      17.  Add       [1,4]      0     Copy     [4,6]      5    [223,234]
+      18.  Add       [1,4]      0     Copy       4        6    [235,238]
+      19.  Add       [1,4]      0     Copy       4        7    [239,242]
+      20.  Add       [1,4]      0     Copy       4        8    [243,246]
+      21.  Copy        4      [0,8]   Add        1        0    [247,255]
+   --------------------------------------------------------------------
+
+		     Reading the source: Overview
+
+   This file includes itself in several passes to macro-expand certain
+   sections with variable forms.  Just read ahead, there's only a
+   little confusion.  I know this sounds ugly, but hard-coding some of
+   the string-matching parameters results in a 10-15% increase in
+   string-match performance.  The only time this hurts is when you have
+   unbalanced #if/endifs.
+
+   A single compilation unit tames the Makefile.  In short, this is to
+   allow the above-described hack without an explodingMakefile.  The
+   single compilation unit includes the core library features,
+   configurable string-match templates, optional main() command-line
+   tool, misc optional features, and a regression test.  Features are
+   controled with CPP #defines, see Makefile.am.
+
+   The initial __XDELTA3_C_HEADER_PASS__ starts first, the _INLINE_ and
+   _TEMPLATE_ sections follow.  Easy stuff first, hard stuff last.
+
+   Optional features include:
+
+     xdelta3-main.h     The command-line interface, external compression
+                        support, POSIX-specific, info & VCDIFF-debug tools.
+     xdelta3-second.h   The common secondary compression routines.
+     xdelta3-decoder.h  All decoding routines.
+     xdelta3-djw.h      The semi-adaptive huffman secondary encoder.
+     xdelta3-fgk.h      The adaptive huffman secondary encoder.
+     xdelta3-test.h     The unit test covers major algorithms,
+                        encoding and decoding.  There are single-bit
+                        error decoding tests.  There are 32/64-bit file size
+                        boundary tests.  There are command-line tests.
+                        There are compression tests.  There are external
+                        compression tests.  There are string-matching tests.
+			There should be more tests...
+
+   Additional headers include:
+
+     xdelta3.h          The public header file.
+     xdelta3-cfgs.h     The default settings for default, built-in
+                        encoders.  These are hard-coded at
+                        compile-time.  There is also a single
+                        soft-coded string matcher for experimenting
+                        with arbitrary values.
+     xdelta3-list.h     A cyclic list template
+
+   Misc little debug utilities:
+
+     badcopy.c          Randomly modifies an input file based on two
+                        parameters: (1) the probability that a byte in
+                        the file is replaced with a pseudo-random value,
+                        and (2) the mean change size.  Changes are
+                        generated using an expoential distribution
+                        which approximates the expected error_prob
+			distribution.
+   --------------------------------------------------------------------
+
+   This file itself is unusually large.  I hope to defend this layout
+   with lots of comments.  Everything in this file is related to
+   encoding and decoding.  I like it all together - the template stuff
+   is just a hack. */
+
+#ifndef __XDELTA3_C_HEADER_PASS__
+#define __XDELTA3_C_HEADER_PASS__
+
+#include "xdelta3.h"
+#include "xdelta3-internal.h"
+
+/***********************************************************************
+ STATIC CONFIGURATION
+ ***********************************************************************/
+
+#ifndef XD3_MAIN                  /* the main application */
+#define XD3_MAIN 0
+#endif
+
+#ifndef VCDIFF_TOOLS
+#define VCDIFF_TOOLS XD3_MAIN
+#endif
+
+#ifndef SECONDARY_FGK    /* one from the algorithm preservation department: */
+#define SECONDARY_FGK 0  /* adaptive Huffman routines */
+#endif
+
+#ifndef SECONDARY_DJW    /* semi-adaptive/static Huffman for the eventual */
+#define SECONDARY_DJW 0  /* standardization, off by default until such time. */
+#endif
+
+#ifndef SECONDARY_LZMA
+#ifdef HAVE_LZMA_H
+#define SECONDARY_LZMA 1
+#else
+#define SECONDARY_LZMA 0
+#endif
+#endif
+
+#if XD3_ENCODER
+#define IF_ENCODER(x) x
+#else
+#define IF_ENCODER(x)
+#endif
+
+/***********************************************************************/
+
+  /* header indicator bits */
+#define VCD_SECONDARY (1U << 0)  /* uses secondary compressor */
+#define VCD_CODETABLE (1U << 1)  /* supplies code table data */
+#define VCD_APPHEADER (1U << 2)  /* supplies application data */
+#define VCD_INVHDR    (~0x7U)
+
+  /* window indicator bits */
+#define VCD_SOURCE   (1U << 0)  /* copy window in source file */
+#define VCD_TARGET   (1U << 1)  /* copy window in target file */
+#define VCD_ADLER32  (1U << 2)  /* has adler32 checksum */
+#define VCD_INVWIN   (~0x7U)
+
+#define VCD_SRCORTGT (VCD_SOURCE | VCD_TARGET)
+
+  /* delta indicator bits */
+#define VCD_DATACOMP (1U << 0)
+#define VCD_INSTCOMP (1U << 1)
+#define VCD_ADDRCOMP (1U << 2)
+#define VCD_INVDEL   (~0x7U)
+
+typedef enum {
+  VCD_DJW_ID    = 1,
+  VCD_LZMA_ID   = 2,
+  VCD_FGK_ID    = 16  /* Note: these are not standard IANA-allocated IDs! */
+} xd3_secondary_ids;
+
+typedef enum {
+  SEC_NOFLAGS     = 0,
+
+  /* Note: SEC_COUNT_FREQS Not implemented (to eliminate 1st Huffman pass) */
+  SEC_COUNT_FREQS = (1 << 0)
+} xd3_secondary_flags;
+
+typedef enum {
+  DATA_SECTION, /* These indicate which section to the secondary
+                 * compressor. */
+  INST_SECTION, /* The header section is not compressed, therefore not
+                 * listed here. */
+  ADDR_SECTION
+} xd3_section_type;
+
+typedef unsigned int xd3_rtype;
+
+/***********************************************************************/
+
+#include "xdelta3-list.h"
+
+#if XD3_ENCODER
+XD3_MAKELIST(xd3_rlist, xd3_rinst, link);
+#endif
+
+/***********************************************************************/
+
+#define SECONDARY_MIN_SAVINGS 2  /* Secondary compression has to save
+				    at least this many bytes. */
+#define SECONDARY_MIN_INPUT   10 /* Secondary compression needs at
+				    least this many bytes. */
+
+#define VCDIFF_MAGIC1  0xd6  /* 1st file byte */
+#define VCDIFF_MAGIC2  0xc3  /* 2nd file byte */
+#define VCDIFF_MAGIC3  0xc4  /* 3rd file byte */
+#define VCDIFF_VERSION 0x00  /* 4th file byte */
+
+#define VCD_SELF       0     /* 1st address mode */
+#define VCD_HERE       1     /* 2nd address mode */
+
+#define SECONDARY_ANY (SECONDARY_DJW || SECONDARY_FGK || SECONDARY_LZMA)
+
+#define ALPHABET_SIZE      256  /* Used in test code--size of the secondary
+				 * compressor alphabet. */
+
+#define HASH_CKOFFSET      1U   /* Table entries distinguish "no-entry" from
+				 * offset 0 using this offset. */
+
+#define MAX_MATCH_SPLIT   18U   /* VCDIFF code table: 18 is the default limit
+				 * for direct-coded ADD sizes */
+
+#define LEAST_MATCH_INCR  0   /* The least number of bytes an overlapping
+			       * match must beat the preceding match by.  This
+			       * is a bias for the lazy match optimization.  A
+			       * non-zero value means that an adjacent match
+			       * has to be better by more than the step
+			       * between them.  0. */
+
+#define MIN_MATCH         4U  /* VCDIFF code table: MIN_MATCH=4 */
+#define MIN_RUN           8U  /* The shortest run, if it is shorter than this
+			       * an immediate add/copy will be just as good.
+			       * ADD1/COPY6 = 1I+1D+1A bytes, RUN18 =
+			       * 1I+1D+1A. */
+
+#define MAX_MODES         9  /* Maximum number of nodes used for
+			      * compression--does not limit decompression. */
+
+#define ENC_SECTS         4  /* Number of separate output sections. */
+
+#define HDR_TAIL(s)  ((s)->enc_tails[0])
+#define DATA_TAIL(s) ((s)->enc_tails[1])
+#define INST_TAIL(s) ((s)->enc_tails[2])
+#define ADDR_TAIL(s) ((s)->enc_tails[3])
+
+#define HDR_HEAD(s)  ((s)->enc_heads[0])
+#define DATA_HEAD(s) ((s)->enc_heads[1])
+#define INST_HEAD(s) ((s)->enc_heads[2])
+#define ADDR_HEAD(s) ((s)->enc_heads[3])
+
+/* Template instances. */
+#if XD3_BUILD_SLOW
+#define IF_BUILD_SLOW(x) x
+#else
+#define IF_BUILD_SLOW(x)
+#endif
+#if XD3_BUILD_FAST
+#define IF_BUILD_FAST(x) x
+#else
+#define IF_BUILD_FAST(x)
+#endif
+#if XD3_BUILD_FASTER
+#define IF_BUILD_FASTER(x) x
+#else
+#define IF_BUILD_FASTER(x)
+#endif
+#if XD3_BUILD_FASTEST
+#define IF_BUILD_FASTEST(x) x
+#else
+#define IF_BUILD_FASTEST(x)
+#endif
+#if XD3_BUILD_SOFT
+#define IF_BUILD_SOFT(x) x
+#else
+#define IF_BUILD_SOFT(x)
+#endif
+#if XD3_BUILD_DEFAULT
+#define IF_BUILD_DEFAULT(x) x
+#else
+#define IF_BUILD_DEFAULT(x)
+#endif
+
+/* Update the run-length state */
+#define NEXTRUN(c) do { if ((c) == run_c) { run_l += 1; } \
+  else { run_c = (c); run_l = 1; } } while (0)
+
+/* This CPP-conditional stuff can be cleaned up... */
+#if REGRESSION_TEST
+#define IF_REGRESSION(x) x
+#else
+#define IF_REGRESSION(x)
+#endif
+
+/***********************************************************************/
+
+#if XD3_ENCODER
+static void*       xd3_alloc0 (xd3_stream *stream,
+			       usize_t      elts,
+			       usize_t      size);
+
+
+static int         xd3_alloc_iopt (xd3_stream *stream, usize_t elts);
+
+static void        xd3_free_output (xd3_stream *stream,
+				    xd3_output *output);
+
+static int         xd3_emit_double (xd3_stream *stream, xd3_rinst *first,
+				    xd3_rinst *second, uint8_t code);
+static int         xd3_emit_single (xd3_stream *stream, xd3_rinst *single,
+				    uint8_t code);
+
+static usize_t      xd3_sizeof_output (xd3_output *output);
+static void        xd3_encode_reset (xd3_stream *stream);
+
+static int         xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos);
+static int         xd3_source_extend_match (xd3_stream *stream);
+static int         xd3_srcwin_setup (xd3_stream *stream);
+static usize_t     xd3_iopt_last_matched (xd3_stream *stream);
+static int         xd3_emit_uint32_t (xd3_stream *stream, xd3_output **output,
+				      uint32_t num);
+
+static usize_t xd3_smatch (xd3_stream *stream,
+			   usize_t base,
+			   usize_t scksum,
+			   usize_t *match_offset);
+static int xd3_string_match_init (xd3_stream *stream);
+static uint32_t xd3_scksum (uint32_t *state, const uint8_t *seg,
+			    const usize_t ln);
+static usize_t xd3_comprun (const uint8_t *seg, usize_t slook, uint8_t *run_cp);
+static int xd3_srcwin_move_point (xd3_stream *stream,
+				  usize_t *next_move_point);
+
+static int xd3_emit_run (xd3_stream *stream, usize_t pos,
+			 usize_t size, uint8_t *run_c);
+static xoff_t xd3_source_cksum_offset(xd3_stream *stream, usize_t low);
+static void xd3_scksum_insert (xd3_stream *stream,
+			       usize_t inx,
+			       usize_t scksum,
+			       usize_t pos);
+
+
+#if XD3_DEBUG
+static void xd3_verify_run_state (xd3_stream    *stream,
+				  const uint8_t *inp,
+				  usize_t        x_run_l,
+				  uint8_t       *x_run_c);
+static void xd3_verify_large_state (xd3_stream *stream,
+				    const uint8_t *inp,
+				    usize_t x_cksum);
+static void xd3_verify_small_state (xd3_stream    *stream,
+				    const uint8_t *inp,
+				    uint32_t       x_cksum);
+
+#endif /* XD3_DEBUG */
+#endif /* XD3_ENCODER */
+
+static int         xd3_decode_allocate (xd3_stream *stream, usize_t size,
+					uint8_t **copied1, usize_t *alloc1);
+
+static void*       xd3_alloc (xd3_stream *stream, usize_t elts, usize_t size);
+static void        xd3_free  (xd3_stream *stream, void *ptr);
+
+const char* xd3_strerror (int ret)
+{
+  switch (ret)
+    {
+    case XD3_INPUT: return "XD3_INPUT";
+    case XD3_OUTPUT: return "XD3_OUTPUT";
+    case XD3_GETSRCBLK: return "XD3_GETSRCBLK";
+    case XD3_GOTHEADER: return "XD3_GOTHEADER";
+    case XD3_WINSTART: return "XD3_WINSTART";
+    case XD3_WINFINISH: return "XD3_WINFINISH";
+    case XD3_TOOFARBACK: return "XD3_TOOFARBACK";
+    case XD3_INTERNAL: return "XD3_INTERNAL";
+    case XD3_INVALID: return "XD3_INVALID";
+    case XD3_INVALID_INPUT: return "XD3_INVALID_INPUT";
+    case XD3_NOSECOND: return "XD3_NOSECOND";
+    case XD3_UNIMPLEMENTED: return "XD3_UNIMPLEMENTED";
+    }
+  return NULL;
+}
+
+/***********************************************************************/
+
+#define xd3_sec_data(s) ((s)->sec_stream_d)
+#define xd3_sec_inst(s) ((s)->sec_stream_i)
+#define xd3_sec_addr(s) ((s)->sec_stream_a)
+
+struct _xd3_sec_type
+{
+  uint8_t       id;
+  const char *name;
+  xd3_secondary_flags flags;
+
+  /* xd3_sec_stream is opaque to the generic code */
+  xd3_sec_stream* (*alloc)   (xd3_stream     *stream);
+  void            (*destroy) (xd3_stream     *stream,
+			      xd3_sec_stream *sec);
+  int             (*init)    (xd3_stream     *stream,
+			      xd3_sec_stream *sec_stream,
+			      int             is_encode);
+  int             (*decode)  (xd3_stream     *stream,
+			      xd3_sec_stream *sec_stream,
+			      const uint8_t **input,
+			      const uint8_t  *input_end,
+			      uint8_t       **output,
+			      const uint8_t  *output_end);
+#if XD3_ENCODER
+  int             (*encode)  (xd3_stream     *stream,
+			      xd3_sec_stream *sec_stream,
+			      xd3_output     *input,
+			      xd3_output     *output,
+			      xd3_sec_cfg    *cfg);
+#endif
+};
+
+#define BIT_STATE_ENCODE_INIT { 0, 1 }
+#define BIT_STATE_DECODE_INIT { 0, 0x100 }
+
+typedef struct _bit_state bit_state;
+struct _bit_state
+{
+  uint8_t cur_byte;
+  usize_t cur_mask;
+};
+
+#if SECONDARY_ANY == 0
+#define IF_SEC(x)
+#define IF_NSEC(x) x
+#else /* yuck */
+#define IF_SEC(x) x
+#define IF_NSEC(x)
+static int
+xd3_decode_secondary (xd3_stream      *stream,
+		      xd3_desect      *sect,
+		      xd3_sec_stream **sec_streamp);
+#if XD3_ENCODER
+static int
+xd3_encode_secondary (xd3_stream      *stream,
+		      xd3_output     **head,
+		      xd3_output     **tail,
+		      xd3_sec_stream **sec_streamp,
+		      xd3_sec_cfg     *cfg,
+		      int             *did_it);
+#endif
+#endif /* SECONDARY_ANY */
+
+#if SECONDARY_FGK
+extern const xd3_sec_type fgk_sec_type;
+#define IF_FGK(x) x
+#define FGK_CASE(s) \
+  s->sec_type = & fgk_sec_type; \
+  break;
+#else
+#define IF_FGK(x)
+#define FGK_CASE(s) \
+  s->msg = "unavailable secondary compressor: FGK Adaptive Huffman"; \
+  return XD3_INTERNAL;
+#endif
+
+#if SECONDARY_DJW
+extern const xd3_sec_type djw_sec_type;
+#define IF_DJW(x) x
+#define DJW_CASE(s) \
+  s->sec_type = & djw_sec_type; \
+  break;
+#else
+#define IF_DJW(x)
+#define DJW_CASE(s) \
+  s->msg = "unavailable secondary compressor: DJW Static Huffman"; \
+  return XD3_INTERNAL;
+#endif
+
+#if SECONDARY_LZMA
+extern const xd3_sec_type lzma_sec_type;
+#define IF_LZMA(x) x
+#define LZMA_CASE(s) \
+  s->sec_type = & lzma_sec_type; \
+  break;
+#else
+#define IF_LZMA(x)
+#define LZMA_CASE(s) \
+  s->msg = "unavailable secondary compressor: LZMA"; \
+  return XD3_INTERNAL;
+#endif
+
+/***********************************************************************/
+
+#include "xdelta3-hash.h"
+
+/* Process template passes - this includes xdelta3.c several times. */
+#define __XDELTA3_C_TEMPLATE_PASS__
+#include "xdelta3-cfgs.h"
+#undef __XDELTA3_C_TEMPLATE_PASS__
+
+/* Process the inline pass. */
+#define __XDELTA3_C_INLINE_PASS__
+#include "xdelta3.c"
+#undef __XDELTA3_C_INLINE_PASS__
+
+/* Secondary compression */
+#if SECONDARY_ANY
+#include "xdelta3-second.h"
+#endif
+
+#if SECONDARY_FGK
+#include "xdelta3-fgk.h"
+const xd3_sec_type fgk_sec_type =
+{
+  VCD_FGK_ID,
+  "FGK Adaptive Huffman",
+  SEC_NOFLAGS,
+  (xd3_sec_stream* (*)(xd3_stream*)) fgk_alloc,
+  (void (*)(xd3_stream*, xd3_sec_stream*)) fgk_destroy,
+  (int (*)(xd3_stream*, xd3_sec_stream*, int)) fgk_init,
+  (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*,
+	   uint8_t**, const uint8_t*)) xd3_decode_fgk,
+  IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*,
+		      xd3_output*, xd3_sec_cfg*))   xd3_encode_fgk)
+};
+#endif
+
+#if SECONDARY_DJW
+#include "xdelta3-djw.h"
+const xd3_sec_type djw_sec_type =
+{
+  VCD_DJW_ID,
+  "Static Huffman",
+  SEC_COUNT_FREQS,
+  (xd3_sec_stream* (*)(xd3_stream*)) djw_alloc,
+  (void (*)(xd3_stream*, xd3_sec_stream*)) djw_destroy,
+  (int (*)(xd3_stream*, xd3_sec_stream*, int)) djw_init,
+  (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*,
+	   uint8_t**, const uint8_t*)) xd3_decode_huff,
+  IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*,
+		      xd3_output*, xd3_sec_cfg*))   xd3_encode_huff)
+};
+#endif
+
+#if SECONDARY_LZMA
+#include "xdelta3-lzma.h"
+const xd3_sec_type lzma_sec_type =
+{
+  VCD_LZMA_ID,
+  "lzma",
+  SEC_NOFLAGS,
+  (xd3_sec_stream* (*)(xd3_stream*)) xd3_lzma_alloc,
+  (void (*)(xd3_stream*, xd3_sec_stream*)) xd3_lzma_destroy,
+  (int (*)(xd3_stream*, xd3_sec_stream*, int)) xd3_lzma_init,
+  (int (*)(xd3_stream*, xd3_sec_stream*, const uint8_t**, const uint8_t*,
+	   uint8_t**, const uint8_t*)) xd3_decode_lzma,
+  IF_ENCODER((int (*)(xd3_stream*, xd3_sec_stream*, xd3_output*,
+		      xd3_output*, xd3_sec_cfg*))   xd3_encode_lzma)
+};
+#endif
+
+#if XD3_MAIN || PYTHON_MODULE || SWIG_MODULE || NOT_MAIN
+#include "xdelta3-main.h"
+#endif
+
+#if REGRESSION_TEST
+#include "xdelta3-test.h"
+#endif
+
+#endif /* __XDELTA3_C_HEADER_PASS__ */
+#ifdef __XDELTA3_C_INLINE_PASS__
+
+/****************************************************************
+ Instruction tables
+ *****************************************************************/
+
+/* The following code implements a parametrized description of the
+ * code table given above for a few reasons.  It is not necessary for
+ * implementing the standard, to support compression with variable
+ * tables, so an implementation is only required to know the default
+ * code table to begin decompression.  (If the encoder uses an
+ * alternate table, the table is included in compressed form inside
+ * the VCDIFF file.)
+ *
+ * Before adding variable-table support there were two functions which
+ * were hard-coded to the default table above.
+ * xd3_compute_default_table() would create the default table by
+ * filling a 256-elt array of xd3_dinst values.  The corresponding
+ * function, xd3_choose_instruction(), would choose an instruction
+ * based on the hard-coded parameters of the default code table.
+ *
+ * Notes: The parametrized code table description here only generates
+ * tables of a certain regularity similar to the default table by
+ * allowing to vary the distribution of single- and
+ * double-instructions and change the number of near and same copy
+ * modes.  More exotic tables are only possible by extending this
+ * code.
+ *
+ * For performance reasons, both the parametrized and non-parametrized
+ * versions of xd3_choose_instruction remain.  The parametrized
+ * version is only needed for testing multi-table decoding support.
+ * If ever multi-table encoding is required, this can be optimized by
+ * compiling static functions for each table.
+ */
+
+/* The XD3_CHOOSE_INSTRUCTION calls xd3_choose_instruction with the
+ * table description when GENERIC_ENCODE_TABLES are in use.  The
+ * IF_GENCODETBL macro enables generic-code-table specific code
+ * (removed 10/2014). */
+#define XD3_CHOOSE_INSTRUCTION(stream,prev,inst) \
+  xd3_choose_instruction (prev, inst)
+
+/* This structure maintains information needed by
+ * xd3_choose_instruction to compute the code for a double instruction
+ * by first indexing an array of code_table_sizes by copy mode, then
+ * using (offset + (muliplier * X)) */
+struct _xd3_code_table_sizes {
+  uint8_t cpy_max;
+  uint8_t offset;
+  uint8_t mult;
+};
+
+/* This contains a complete description of a code table. */
+struct _xd3_code_table_desc
+{
+  /* Assumes a single RUN instruction */
+  /* Assumes that MIN_MATCH is 4 */
+
+  uint8_t add_sizes;            /* Number of immediate-size single
+				   adds (default 17) */
+  uint8_t near_modes;           /* Number of near copy modes (default 4) */
+  uint8_t same_modes;           /* Number of same copy modes (default 3) */
+  uint8_t cpy_sizes;            /* Number of immediate-size single
+				   copies (default 15) */
+
+  uint8_t addcopy_add_max;      /* Maximum add size for an add-copy
+				   double instruction, all modes
+				   (default 4) */
+  uint8_t addcopy_near_cpy_max; /* Maximum cpy size for an add-copy
+				   double instruction, up through
+				   VCD_NEAR modes (default 6) */
+  uint8_t addcopy_same_cpy_max; /* Maximum cpy size for an add-copy
+				   double instruction, VCD_SAME modes
+				   (default 4) */
+
+  uint8_t copyadd_add_max;      /* Maximum add size for a copy-add
+				   double instruction, all modes
+				   (default 1) */
+  uint8_t copyadd_near_cpy_max; /* Maximum cpy size for a copy-add
+				   double instruction, up through
+				   VCD_NEAR modes (default 4) */
+  uint8_t copyadd_same_cpy_max; /* Maximum cpy size for a copy-add
+				   double instruction, VCD_SAME modes
+				   (default 4) */
+
+  xd3_code_table_sizes addcopy_max_sizes[MAX_MODES];
+  xd3_code_table_sizes copyadd_max_sizes[MAX_MODES];
+};
+
+/* The rfc3284 code table is represented: */
+static const xd3_code_table_desc __rfc3284_code_table_desc = {
+  17, /* add sizes */
+  4,  /* near modes */
+  3,  /* same modes */
+  15, /* copy sizes */
+
+  4,  /* add-copy max add */
+  6,  /* add-copy max cpy, near */
+  4,  /* add-copy max cpy, same */
+
+  1,  /* copy-add max add */
+  4,  /* copy-add max cpy, near */
+  4,  /* copy-add max cpy, same */
+
+  /* addcopy */
+  { {6,163,3},{6,175,3},{6,187,3},{6,199,3},{6,211,3},{6,223,3},
+    {4,235,1},{4,239,1},{4,243,1} },
+  /* copyadd */
+  { {4,247,1},{4,248,1},{4,249,1},{4,250,1},{4,251,1},{4,252,1},
+    {4,253,1},{4,254,1},{4,255,1} },
+};
+
+/* Computes code table entries of TBL using the specified description. */
+static void
+xd3_build_code_table (const xd3_code_table_desc *desc, xd3_dinst *tbl)
+{
+  uint8_t size1, size2;
+  uint8_t mode;
+  usize_t cpy_modes = 2U + desc->near_modes + desc->same_modes;
+  xd3_dinst *d = tbl;
+
+  (d++)->type1 = XD3_RUN;
+  (d++)->type1 = XD3_ADD;
+
+  for (size1 = 1; size1 <= desc->add_sizes; size1 += 1, d += 1)
+    {
+      d->type1 = XD3_ADD;
+      d->size1 = size1;
+    }
+
+  for (mode = 0; mode < cpy_modes; mode += 1)
+    {
+      (d++)->type1 = XD3_CPY + mode;
+
+      for (size1 = MIN_MATCH; size1 < MIN_MATCH + desc->cpy_sizes;
+	   size1 += 1, d += 1)
+	{
+	  d->type1 = XD3_CPY + mode;
+	  d->size1 = size1;
+	}
+    }
+
+  for (mode = 0; mode < cpy_modes; mode += 1)
+    {
+      for (size1 = 1; size1 <= desc->addcopy_add_max; size1 += 1)
+	{
+	  usize_t max = (mode < 2U + desc->near_modes) ?
+	    desc->addcopy_near_cpy_max :
+	    desc->addcopy_same_cpy_max;
+
+	  for (size2 = MIN_MATCH; size2 <= max; size2 += 1, d += 1)
+	    {
+	      d->type1 = XD3_ADD;
+	      d->size1 = size1;
+	      d->type2 = XD3_CPY + mode;
+	      d->size2 = size2;
+	    }
+	}
+    }
+
+  for (mode = 0; mode < cpy_modes; mode += 1)
+    {
+      usize_t max = (mode < 2U + desc->near_modes) ?
+	desc->copyadd_near_cpy_max :
+	desc->copyadd_same_cpy_max;
+
+      for (size1 = MIN_MATCH; size1 <= max; size1 += 1)
+	{
+	  for (size2 = 1; size2 <= desc->copyadd_add_max; size2 += 1, d += 1)
+	    {
+	      d->type1 = XD3_CPY + mode;
+	      d->size1 = size1;
+	      d->type2 = XD3_ADD;
+	      d->size2 = size2;
+	    }
+	}
+    }
+
+  XD3_ASSERT (d - tbl == 256);
+}
+
+/* This function generates the static default code table. */
+static const xd3_dinst*
+xd3_rfc3284_code_table (void)
+{
+  static xd3_dinst __rfc3284_code_table[256];
+
+  if (__rfc3284_code_table[0].type1 != XD3_RUN)
+    {
+      xd3_build_code_table (& __rfc3284_code_table_desc, __rfc3284_code_table);
+    }
+
+  return __rfc3284_code_table;
+}
+
+#if XD3_ENCODER
+/* This version of xd3_choose_instruction is hard-coded for the default
+   table. */
+static void
+xd3_choose_instruction (xd3_rinst *prev, xd3_rinst *inst)
+{
+  switch (inst->type)
+    {
+    case XD3_RUN:
+      inst->code1 = 0;
+      break;
+
+    case XD3_ADD:
+      inst->code1 = 1;
+
+      if (inst->size <= 17)
+	{
+	  inst->code1 += inst->size;
+
+	  if ( (inst->size == 1) &&
+	       (prev != NULL) &&
+	       (prev->size == 4) &&
+	       (prev->type >= XD3_CPY) )
+	    {
+	      prev->code2 = 247 + (prev->type - XD3_CPY);
+	    }
+	}
+
+      break;
+
+    default:
+      {
+	uint8_t mode = inst->type - XD3_CPY;
+
+	XD3_ASSERT (inst->type >= XD3_CPY && inst->type < 12);
+
+	inst->code1 = 19 + 16 * mode;
+
+	if (inst->size <= 18 && inst->size >= 4)
+	  {
+	    inst->code1 += inst->size - 3;
+
+	    if ( (prev != NULL) &&
+		 (prev->type == XD3_ADD) &&
+		 (prev->size <= 4) )
+	      {
+		if ( (inst->size <= 6) &&
+		     (mode       <= 5) )
+		  {
+		    prev->code2 = (uint8_t)(163 + (mode * 12) +
+					    (3 * (prev->size - 1)) +
+					    (inst->size - 4));
+		    XD3_ASSERT (prev->code2 <= 234);
+		  }
+		else if ( (inst->size == 4) &&
+			  (mode       >= 6) )
+		  {
+		    prev->code2 = 235 + ((mode - 6) * 4) + (prev->size - 1);
+
+		    XD3_ASSERT (prev->code2 <= 246);
+		  }
+	      }
+	  }
+
+	XD3_ASSERT (inst->code1 <= 162);
+      }
+      break;
+    }
+}
+#endif /* XD3_ENCODER */
+
+/***********************************************************************/
+
+static inline void
+xd3_swap_uint8p (uint8_t** p1, uint8_t** p2)
+{
+  uint8_t *t = (*p1);
+  (*p1) = (*p2);
+  (*p2) = t;
+}
+
+static inline void
+xd3_swap_usize_t (usize_t* p1, usize_t* p2)
+{
+  usize_t t = (*p1);
+  (*p1) = (*p2);
+  (*p2) = t;
+}
+
+/* It's not constant time, but it computes the log. */
+static int
+xd3_check_pow2 (xoff_t value, usize_t *logof)
+{
+  xoff_t x = 1;
+  usize_t nolog;
+  if (logof == NULL) {
+    logof = &nolog;
+  }
+
+  *logof = 0;
+
+  for (; x != 0; x <<= 1, *logof += 1)
+    {
+      if (x == value)
+	{
+	  return 0;
+	}
+    }
+
+  return XD3_INTERNAL;
+}
+
+usize_t
+xd3_pow2_roundup (usize_t x)
+{
+  usize_t i = 1;
+  while (x > i) {
+    i <<= 1U;
+  }
+  return i;
+}
+
+static xoff_t
+xd3_xoff_roundup (xoff_t x)
+{
+  xoff_t i = 1;
+  while (x > i) {
+    i <<= 1U;
+  }
+  return i;
+}
+
+static usize_t
+xd3_round_blksize (usize_t sz, usize_t blksz)
+{
+  usize_t mod = sz & (blksz-1);
+
+  XD3_ASSERT (xd3_check_pow2 (blksz, NULL) == 0);
+
+  if (mod == 0)
+    {
+      return sz;
+    }
+
+  if (sz > USIZE_T_MAXBLKSZ)
+    {
+      return USIZE_T_MAXBLKSZ;
+    }
+
+  return sz + (blksz - mod);
+}
+
+/***********************************************************************
+ Adler32 stream function: code copied from Zlib, defined in RFC1950
+ ***********************************************************************/
+
+#define A32_BASE 65521L /* Largest prime smaller than 2^16 */
+#define A32_NMAX 5552   /* NMAX is the largest n such that 255n(n+1)/2
+			   + (n+1)(BASE-1) <= 2^32-1 */
+
+#define A32_DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
+#define A32_DO2(buf,i)  A32_DO1(buf,i); A32_DO1(buf,i+1);
+#define A32_DO4(buf,i)  A32_DO2(buf,i); A32_DO2(buf,i+2);
+#define A32_DO8(buf,i)  A32_DO4(buf,i); A32_DO4(buf,i+4);
+#define A32_DO16(buf)   A32_DO8(buf,0); A32_DO8(buf,8);
+
+static uint32_t adler32 (uint32_t adler, const uint8_t *buf, usize_t len)
+{
+    uint32_t s1 = adler & 0xffffU;
+    uint32_t s2 = (adler >> 16) & 0xffffU;
+    int k;
+
+    while (len > 0)
+      {
+        k    = (len < A32_NMAX) ? len : A32_NMAX;
+        len -= k;
+
+	while (k >= 16)
+	  {
+	    A32_DO16(buf);
+	    buf += 16;
+            k -= 16;
+	  }
+
+	if (k != 0)
+	  {
+	    do
+	      {
+		s1 += *buf++;
+		s2 += s1;
+	      }
+	    while (--k);
+	  }
+
+        s1 %= A32_BASE;
+        s2 %= A32_BASE;
+    }
+
+    return (s2 << 16) | s1;
+}
+
+/***********************************************************************
+ Run-length function
+ ***********************************************************************/
+
+#if XD3_ENCODER
+static usize_t
+xd3_comprun (const uint8_t *seg, usize_t slook, uint8_t *run_cp)
+{
+  usize_t i;
+  usize_t run_l = 0;
+  uint8_t run_c = 0;
+
+  for (i = 0; i < slook; i += 1)
+    {
+      NEXTRUN(seg[i]);
+    }
+
+  (*run_cp) = run_c;
+
+  return run_l;
+}
+#endif
+
+/***********************************************************************
+ Basic encoder/decoder functions
+ ***********************************************************************/
+
+#if XD3_ENCODER
+inline int
+xd3_emit_byte (xd3_stream  *stream,
+	       xd3_output **outputp,
+	       uint8_t      code)
+{
+  xd3_output *output = (*outputp);
+
+  if (output->next == output->avail)
+    {
+      xd3_output *aoutput;
+
+      if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      output = (*outputp) = aoutput;
+    }
+
+  output->base[output->next++] = code;
+
+  return 0;
+}
+
+inline int
+xd3_emit_bytes (xd3_stream     *stream,
+		xd3_output    **outputp,
+		const uint8_t  *base,
+		usize_t         size)
+{
+  xd3_output *output = (*outputp);
+
+  do
+    {
+      usize_t take;
+
+      if (output->next == output->avail)
+	{
+	  xd3_output *aoutput;
+
+	  if ((aoutput = xd3_alloc_output (stream, output)) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+
+	  output = (*outputp) = aoutput;
+	}
+
+      take = xd3_min (output->avail - output->next, size);
+
+      memcpy (output->base + output->next, base, (size_t) take);
+
+      output->next += take;
+      size -= take;
+      base += take;
+    }
+  while (size > 0);
+
+  return 0;
+}
+#endif /* XD3_ENCODER */
+
+/***********************************************************************
+ Address cache stuff
+ ***********************************************************************/
+
+static int
+xd3_alloc_cache (xd3_stream *stream)
+{
+  if (stream->acache.near_array != NULL)
+    {
+      xd3_free (stream, stream->acache.near_array);
+    }
+
+  if (stream->acache.same_array != NULL)
+    {
+      xd3_free (stream, stream->acache.same_array);
+    }
+
+  if (((stream->acache.s_near > 0) &&
+       (stream->acache.near_array = (usize_t*)
+	xd3_alloc (stream, stream->acache.s_near,
+		   (usize_t) sizeof (usize_t)))
+       == NULL) ||
+      ((stream->acache.s_same > 0) &&
+       (stream->acache.same_array = (usize_t*)
+	xd3_alloc (stream, stream->acache.s_same * 256,
+		   (usize_t) sizeof (usize_t)))
+       == NULL))
+    {
+      return ENOMEM;
+    }
+
+  return 0;
+}
+
+void
+xd3_init_cache (xd3_addr_cache* acache)
+{
+  if (acache->s_near > 0)
+    {
+      memset (acache->near_array, 0, acache->s_near * sizeof (usize_t));
+      acache->next_slot = 0;
+    }
+
+  if (acache->s_same > 0)
+    {
+      memset (acache->same_array, 0, acache->s_same * 256 * sizeof (usize_t));
+    }
+}
+
+static void
+xd3_update_cache (xd3_addr_cache* acache, usize_t addr)
+{
+  if (acache->s_near > 0)
+    {
+      acache->near_array[acache->next_slot] = addr;
+      acache->next_slot = (acache->next_slot + 1) % acache->s_near;
+    }
+
+  if (acache->s_same > 0)
+    {
+      acache->same_array[addr % (acache->s_same*256)] = addr;
+    }
+}
+
+#if XD3_ENCODER
+/* OPT: this gets called a lot, can it be optimized? */
+static int
+xd3_encode_address (xd3_stream *stream,
+		    usize_t addr,
+		    usize_t here,
+		    uint8_t* mode)
+{
+  usize_t d, bestd;
+  usize_t i, bestm;
+  int ret;
+  xd3_addr_cache* acache = & stream->acache;
+
+#define SMALLEST_INT(x) do { if (((x) & ~127U) == 0) { goto good; } } while (0)
+
+  /* Attempt to find the address mode that yields the smallest integer value
+   * for "d", the encoded address value, thereby minimizing the encoded size
+   * of the address. */
+  bestd = addr;
+  bestm = VCD_SELF;
+
+  XD3_ASSERT (addr < here);
+
+  SMALLEST_INT (bestd);
+
+  if ((d = here-addr) < bestd)
+    {
+      bestd = d;
+      bestm = VCD_HERE;
+
+      SMALLEST_INT (bestd);
+    }
+
+  for (i = 0; i < acache->s_near; i += 1)
+    {
+      /* Note: If we used signed computation here, we'd could compte d
+       * and then check (d >= 0 && d < bestd). */
+      if (addr >= acache->near_array[i])
+	{
+	  d = addr - acache->near_array[i];
+
+	  if (d < bestd)
+	    {
+	      bestd = d;
+	      bestm = i+2; /* 2 counts the VCD_SELF, VCD_HERE modes */
+
+	      SMALLEST_INT (bestd);
+	    }
+	}
+    }
+
+  if (acache->s_same > 0 &&
+      acache->same_array[d = addr%(acache->s_same*256)] == addr)
+    {
+      bestd = d%256;
+      /* 2 + s_near offsets past the VCD_NEAR modes */
+      bestm = acache->s_near + 2 + d/256;
+
+      if ((ret = xd3_emit_byte (stream, & ADDR_TAIL (stream), bestd)))
+	{
+	  return ret;
+	}
+    }
+  else
+    {
+    good:
+
+      if ((ret = xd3_emit_size (stream, & ADDR_TAIL (stream), bestd)))
+	{
+	  return ret;
+	}
+    }
+
+  xd3_update_cache (acache, addr);
+
+  (*mode) += bestm;
+
+  return 0;
+}
+#endif
+
+static int
+xd3_decode_address (xd3_stream *stream, usize_t here,
+		    usize_t mode, const uint8_t **inpp,
+		    const uint8_t *max, usize_t *valp)
+{
+  int ret;
+  usize_t same_start = 2 + stream->acache.s_near;
+
+  if (mode < same_start)
+    {
+      if ((ret = xd3_read_size (stream, inpp, max, valp))) { return ret; }
+
+      switch (mode)
+	{
+	case VCD_SELF:
+	  break;
+	case VCD_HERE:
+	  (*valp) = here - (*valp);
+	  break;
+	default:
+	  (*valp) += stream->acache.near_array[mode - 2];
+	  break;
+	}
+    }
+  else
+    {
+      if (*inpp == max)
+	{
+	  stream->msg = "address underflow";
+	  return XD3_INVALID_INPUT;
+	}
+
+      mode -= same_start;
+
+      (*valp) = stream->acache.same_array[mode*256 + (**inpp)];
+
+      (*inpp) += 1;
+    }
+
+  xd3_update_cache (& stream->acache, *valp);
+
+  return 0;
+}
+
+/***********************************************************************
+ Alloc/free
+***********************************************************************/
+
+static void*
+__xd3_alloc_func (void* opaque, size_t items, usize_t size)
+{
+  return malloc (items * (size_t) size);
+}
+
+static void
+__xd3_free_func (void* opaque, void* address)
+{
+  free (address);
+}
+
+static void*
+xd3_alloc (xd3_stream *stream,
+	   usize_t      elts,
+	   usize_t      size)
+{
+  void *a = stream->alloc (stream->opaque, elts, size);
+
+  if (a != NULL)
+    {
+      IF_DEBUG (stream->alloc_cnt += 1);
+      IF_DEBUG2 (DP(RINT "[stream %p malloc] size %"W"u ptr %p\n",
+		    (void*)stream, elts * size, a));
+    }
+  else
+    {
+      stream->msg = "out of memory";
+    }
+
+  return a;
+}
+
+static void
+xd3_free (xd3_stream *stream,
+	  void       *ptr)
+{
+  if (ptr != NULL)
+    {
+      IF_DEBUG (stream->free_cnt += 1);
+      XD3_ASSERT (stream->free_cnt <= stream->alloc_cnt);
+      IF_DEBUG2 (DP(RINT "[stream %p free] %p\n",
+		    (void*)stream, ptr));
+      stream->free (stream->opaque, ptr);
+    }
+}
+
+#if XD3_ENCODER
+static void*
+xd3_alloc0 (xd3_stream *stream,
+	    usize_t      elts,
+	    usize_t      size)
+{
+  void *a = xd3_alloc (stream, elts, size);
+
+  if (a != NULL)
+    {
+      memset (a, 0, (size_t) (elts * size));
+    }
+
+  return a;
+}
+
+xd3_output*
+xd3_alloc_output (xd3_stream *stream,
+		  xd3_output *old_output)
+{
+  xd3_output *output;
+  uint8_t    *base;
+
+  if (stream->enc_free != NULL)
+    {
+      output = stream->enc_free;
+      stream->enc_free = output->next_page;
+    }
+  else
+    {
+      if ((output = (xd3_output*) xd3_alloc (stream, 1,
+					     (usize_t) sizeof (xd3_output)))
+	  == NULL)
+	{
+	  return NULL;
+	}
+
+      if ((base = (uint8_t*) xd3_alloc (stream, XD3_ALLOCSIZE,
+					sizeof (uint8_t))) == NULL)
+	{
+	  xd3_free (stream, output);
+	  return NULL;
+	}
+
+      output->base  = base;
+      output->avail = XD3_ALLOCSIZE;
+    }
+
+  output->next = 0;
+
+  if (old_output)
+    {
+      old_output->next_page = output;
+    }
+
+  output->next_page = NULL;
+
+  return output;
+}
+
+static usize_t
+xd3_sizeof_output (xd3_output *output)
+{
+  usize_t s = 0;
+
+  for (; output; output = output->next_page)
+    {
+      s += output->next;
+    }
+
+  return s;
+}
+
+static void
+xd3_freelist_output (xd3_stream *stream,
+		     xd3_output *output)
+{
+  xd3_output *tmp;
+
+  while (output)
+    {
+      tmp    = output;
+      output = output->next_page;
+
+      tmp->next = 0;
+      tmp->next_page = stream->enc_free;
+      stream->enc_free = tmp;
+    }
+}
+
+static void
+xd3_free_output (xd3_stream *stream,
+		 xd3_output *output)
+{
+  xd3_output *next;
+
+ again:
+  if (output == NULL)
+    {
+      return;
+    }
+
+  next = output->next_page;
+
+  xd3_free (stream, output->base);
+  xd3_free (stream, output);
+
+  output = next;
+  goto again;
+}
+#endif /* XD3_ENCODER */
+
+void
+xd3_free_stream (xd3_stream *stream)
+{
+  xd3_iopt_buflist *blist = stream->iopt_alloc;
+
+  while (blist != NULL)
+    {
+      xd3_iopt_buflist *tmp = blist;
+      blist = blist->next;
+      xd3_free (stream, tmp->buffer);
+      xd3_free (stream, tmp);
+    }
+
+#if XD3_ENCODER
+  xd3_free (stream, stream->large_table);
+  xd3_free (stream, stream->small_table);
+  xd3_free (stream, stream->large_hash.powers);
+  xd3_free (stream, stream->small_hash.powers);
+  xd3_free (stream, stream->small_prev);
+
+  {
+    int i;
+    for (i = 0; i < ENC_SECTS; i += 1)
+      {
+	xd3_free_output (stream, stream->enc_heads[i]);
+      }
+    xd3_free_output (stream, stream->enc_free);
+  }
+#endif
+
+  xd3_free (stream, stream->acache.near_array);
+  xd3_free (stream, stream->acache.same_array);
+
+  xd3_free (stream, stream->inst_sect.copied1);
+  xd3_free (stream, stream->addr_sect.copied1);
+  xd3_free (stream, stream->data_sect.copied1);
+
+  if (stream->dec_lastwin != stream->dec_buffer)
+    {
+      xd3_free (stream, (uint8_t*) stream->dec_lastwin);
+    }
+  xd3_free (stream, stream->dec_buffer);
+
+  xd3_free (stream, stream->buf_in);
+  xd3_free (stream, stream->dec_appheader);
+  xd3_free (stream, stream->dec_codetbl);
+  xd3_free (stream, stream->code_table_alloc);
+
+#if SECONDARY_ANY
+  xd3_free (stream, stream->inst_sect.copied2);
+  xd3_free (stream, stream->addr_sect.copied2);
+  xd3_free (stream, stream->data_sect.copied2);
+
+  if (stream->sec_type != NULL)
+    {
+      stream->sec_type->destroy (stream, stream->sec_stream_d);
+      stream->sec_type->destroy (stream, stream->sec_stream_i);
+      stream->sec_type->destroy (stream, stream->sec_stream_a);
+    }
+#endif
+
+  xd3_free (stream, stream->whole_target.adds);
+  xd3_free (stream, stream->whole_target.inst);
+  xd3_free (stream, stream->whole_target.wininfo);
+
+  XD3_ASSERT (stream->alloc_cnt == stream->free_cnt);
+
+  memset (stream, 0, sizeof (xd3_stream));
+}
+
+#if (XD3_DEBUG > 1 || VCDIFF_TOOLS)
+static const char*
+xd3_rtype_to_string (xd3_rtype type, int print_mode)
+{
+  switch (type)
+    {
+    case XD3_NOOP:
+      return "NOOP ";
+    case XD3_RUN:
+      return "RUN  ";
+    case XD3_ADD:
+      return "ADD  ";
+    default: break;
+    }
+  if (! print_mode)
+    {
+      return "CPY  ";
+    }
+  switch (type)
+    {
+    case XD3_CPY + 0: return "CPY_0";
+    case XD3_CPY + 1: return "CPY_1";
+    case XD3_CPY + 2: return "CPY_2";
+    case XD3_CPY + 3: return "CPY_3";
+    case XD3_CPY + 4: return "CPY_4";
+    case XD3_CPY + 5: return "CPY_5";
+    case XD3_CPY + 6: return "CPY_6";
+    case XD3_CPY + 7: return "CPY_7";
+    case XD3_CPY + 8: return "CPY_8";
+    case XD3_CPY + 9: return "CPY_9";
+    default:          return "CPY>9";
+    }
+}
+#endif
+
+/****************************************************************
+ Stream configuration
+ ******************************************************************/
+
+int
+xd3_config_stream(xd3_stream *stream,
+		  xd3_config *config)
+{
+  int ret;
+  xd3_config defcfg;
+  xd3_smatcher *smatcher = &stream->smatcher;
+
+  if (config == NULL)
+    {
+      config = & defcfg;
+      memset (config, 0, sizeof (*config));
+    }
+
+  /* Initial setup: no error checks yet */
+  memset (stream, 0, sizeof (*stream));
+
+  stream->winsize = config->winsize ? config->winsize : XD3_DEFAULT_WINSIZE;
+  stream->sprevsz = config->sprevsz ? config->sprevsz : XD3_DEFAULT_SPREVSZ;
+
+  if (config->iopt_size == 0)
+    {
+      stream->iopt_size = XD3_ALLOCSIZE / sizeof(xd3_rinst);
+      stream->iopt_unlimited = 1;
+    }
+  else
+    {
+      stream->iopt_size = config->iopt_size;
+    }
+
+  stream->getblk    = config->getblk;
+  stream->alloc     = config->alloc ? config->alloc : __xd3_alloc_func;
+  stream->free      = config->freef ? config->freef : __xd3_free_func;
+  stream->opaque    = config->opaque;
+  stream->flags     = config->flags;
+
+  /* Secondary setup. */
+  stream->sec_data  = config->sec_data;
+  stream->sec_inst  = config->sec_inst;
+  stream->sec_addr  = config->sec_addr;
+
+  stream->sec_data.data_type = DATA_SECTION;
+  stream->sec_inst.data_type = INST_SECTION;
+  stream->sec_addr.data_type = ADDR_SECTION;
+
+  /* Check static sizes. */
+  if (sizeof (usize_t) != SIZEOF_USIZE_T ||
+      sizeof (xoff_t) != SIZEOF_XOFF_T ||
+      (ret = xd3_check_pow2(XD3_ALLOCSIZE, NULL)))
+    {
+      stream->msg = "incorrect compilation: wrong integer sizes";
+      return XD3_INTERNAL;
+    }
+
+  /* Check/set secondary compressor. */
+  switch (stream->flags & XD3_SEC_TYPE)
+    {
+    case 0:
+      if (stream->flags & XD3_SEC_NOALL)
+	{
+	  stream->msg = "XD3_SEC flags require a secondary compressor type";
+	  return XD3_INTERNAL;
+	}
+      break;
+    case XD3_SEC_FGK:
+      FGK_CASE (stream);
+    case XD3_SEC_DJW:
+      DJW_CASE (stream);
+    case XD3_SEC_LZMA:
+      LZMA_CASE (stream);
+    default:
+      stream->msg = "too many secondary compressor types set";
+      return XD3_INTERNAL;
+    }
+
+  stream->code_table_desc = & __rfc3284_code_table_desc;
+  stream->code_table_func = xd3_rfc3284_code_table;
+
+  /* Check sprevsz */
+  if (smatcher->small_chain == 1 &&
+      smatcher->small_lchain == 1)
+    {
+      stream->sprevsz = 0;
+    }
+  else
+    {
+      if ((ret = xd3_check_pow2 (stream->sprevsz, NULL)))
+	{
+	  stream->msg = "sprevsz is required to be a power of two";
+	  return XD3_INTERNAL;
+	}
+
+      stream->sprevmask = stream->sprevsz - 1;
+    }
+
+  /* Default scanner settings. */
+#if XD3_ENCODER
+  switch (config->smatch_cfg)
+    {
+      IF_BUILD_SOFT(case XD3_SMATCH_SOFT:
+      {
+	*smatcher = config->smatcher_soft;
+	smatcher->string_match = __smatcher_soft.string_match;
+	smatcher->name = __smatcher_soft.name;
+	if (smatcher->large_look  < MIN_MATCH ||
+	    smatcher->large_step  < 1         ||
+	    smatcher->small_look  < MIN_MATCH)
+	  {
+	    stream->msg = "invalid soft string-match config";
+	    return XD3_INVALID;
+	  }
+	break;
+      })
+
+      IF_BUILD_DEFAULT(case XD3_SMATCH_DEFAULT:
+		    *smatcher = __smatcher_default;
+		    break;)
+      IF_BUILD_SLOW(case XD3_SMATCH_SLOW:
+		    *smatcher = __smatcher_slow;
+		    break;)
+      IF_BUILD_FASTEST(case XD3_SMATCH_FASTEST:
+		    *smatcher = __smatcher_fastest;
+		    break;)
+      IF_BUILD_FASTER(case XD3_SMATCH_FASTER:
+		    *smatcher = __smatcher_faster;
+		    break;)
+      IF_BUILD_FAST(case XD3_SMATCH_FAST:
+		    *smatcher = __smatcher_fast;
+		    break;)
+    default:
+      stream->msg = "invalid string match config type";
+      return XD3_INTERNAL;
+    }
+
+  if (config->smatch_cfg == XD3_SMATCH_DEFAULT &&
+      (stream->flags & XD3_COMPLEVEL_MASK) != 0)
+    {
+      int level = (stream->flags & XD3_COMPLEVEL_MASK) >> XD3_COMPLEVEL_SHIFT;
+
+      switch (level)
+	{
+	case 1:
+	  IF_BUILD_FASTEST(*smatcher = __smatcher_fastest;
+			   break;)
+	case 2:
+	  IF_BUILD_FASTER(*smatcher = __smatcher_faster;
+			   break;)
+	case 3: case 4: case 5:
+	  IF_BUILD_FAST(*smatcher = __smatcher_fast;
+			break;)
+	case 6:
+	  IF_BUILD_DEFAULT(*smatcher = __smatcher_default;
+			   break;)
+	default:
+	  IF_BUILD_SLOW(*smatcher = __smatcher_slow;
+			break;)
+	  IF_BUILD_DEFAULT(*smatcher = __smatcher_default;
+			   break;)
+	  IF_BUILD_FAST(*smatcher = __smatcher_fast;
+			break;)
+	  IF_BUILD_FASTER(*smatcher = __smatcher_faster;
+			break;)
+	  IF_BUILD_FASTEST(*smatcher = __smatcher_fastest;
+			   break;)
+	}
+    }
+#endif
+
+  return 0;
+}
+
+/***********************************************************
+ Getblk interface
+ ***********************************************************/
+
+inline
+xoff_t xd3_source_eof(const xd3_source *src)
+{
+  xoff_t r = (src->max_blkno << src->shiftby) + (xoff_t)src->onlastblk;
+  return r;
+}
+
+inline
+usize_t xd3_bytes_on_srcblk (xd3_source *src, xoff_t blkno)
+{
+  usize_t r = (blkno == src->max_blkno ?
+	       src->onlastblk :
+	       src->blksize);
+  return r;
+}
+
+/* This function interfaces with the client getblk function, checks
+ * its results, updates max_blkno, onlastblk, eof_known. */
+static int
+xd3_getblk (xd3_stream *stream, xoff_t blkno)
+{
+  int ret;
+  xd3_source *source = stream->src;
+
+  if (source->curblk == NULL || blkno != source->curblkno)
+    {
+      source->getblkno = blkno;
+
+      if (stream->getblk == NULL)
+	{
+	  IF_DEBUG2 (DP(RINT "[getblk] XD3_GETSRCBLK %"Q"u\n", blkno));
+	  stream->msg = "getblk source input";
+	  return XD3_GETSRCBLK;
+	}
+
+      ret = stream->getblk (stream, source, blkno);
+      if (ret != 0)
+	{
+	  IF_DEBUG2 (DP(RINT "[getblk] app error blkno %"Q"u: %s\n",
+			blkno, xd3_strerror (ret)));
+	  return ret;
+	}
+
+      IF_DEBUG2 (DP(RINT "[getblk] read source block %"Q"u onblk "
+		    "%"W"u blksize %"W"u max_blkno %"Q"u\n", blkno, source->onblk,
+		    source->blksize, source->max_blkno));
+    }
+
+  if (blkno > source->max_blkno)
+    {
+      source->max_blkno = blkno;
+
+      if (source->onblk == source->blksize)
+	{
+	  IF_DEBUG1 (DP(RINT "[getblk] full source blkno %"Q"u: "
+			"source length unknown %"Q"u\n",
+			blkno,
+			xd3_source_eof (source)));
+	}
+      else if (!source->eof_known)
+	{
+	  IF_DEBUG1 (DP(RINT "[getblk] eof block has %"W"u bytes; "
+			"source length known %"Q"u\n",
+			xd3_bytes_on_srcblk (source, blkno),
+			xd3_source_eof (source)));
+	  source->eof_known = 1;
+	}
+    }
+
+  XD3_ASSERT (source->curblk != NULL);
+
+  if (blkno == source->max_blkno)
+    {
+      /* In case the application sets the source as 1 block w/ a
+       * preset buffer. */
+      source->onlastblk = source->onblk;
+    }
+  return 0;
+}
+
+/***********************************************************
+ Stream open/close
+ ***************************************************************/
+
+int
+xd3_set_source (xd3_stream *stream,
+		xd3_source *src)
+{
+  usize_t shiftby;
+
+  stream->src = src;
+  src->srclen  = 0;
+  src->srcbase = 0;
+
+  /* Enforce power-of-two blocksize so that source-block number
+   * calculations are cheap. */
+  if (xd3_check_pow2 (src->blksize, &shiftby) != 0)
+    {
+      src->blksize = xd3_pow2_roundup(src->blksize);
+      xd3_check_pow2 (src->blksize, &shiftby);
+      IF_DEBUG1 (DP(RINT "raising src_blksz to %"W"u\n", src->blksize));
+    }
+
+  src->shiftby = shiftby;
+  src->maskby = (1ULL << shiftby) - 1ULL;
+
+  if (xd3_check_pow2 (src->max_winsize, NULL) != 0)
+    {
+      src->max_winsize = xd3_xoff_roundup(src->max_winsize);
+      IF_DEBUG1 (DP(RINT "raising src_maxsize to %"W"u\n", src->blksize));
+    }
+  src->max_winsize = xd3_max (src->max_winsize, XD3_ALLOCSIZE);
+  return 0;
+}
+
+int
+xd3_set_source_and_size (xd3_stream *stream,
+			 xd3_source *user_source,
+			 xoff_t source_size) {
+  int ret = xd3_set_source (stream, user_source);
+  if (ret == 0)
+    {
+      stream->src->eof_known = 1;
+      IF_DEBUG2 (DP(RINT "[set source] size known %"Q"u\n",
+		    source_size));
+      xd3_blksize_div(source_size,
+		      stream->src,
+		      &stream->src->max_blkno,
+		      &stream->src->onlastblk);
+
+      IF_DEBUG1 (DP(RINT "[set source] size known %"Q"u max_blkno %"Q"u\n",
+		    source_size, stream->src->max_blkno));
+    }
+  return ret;
+}
+
+void
+xd3_abort_stream (xd3_stream *stream)
+{
+  stream->dec_state = DEC_ABORTED;
+  stream->enc_state = ENC_ABORTED;
+}
+
+int
+xd3_close_stream (xd3_stream *stream)
+{
+  if (stream->enc_state != 0 && stream->enc_state != ENC_ABORTED)
+    {
+      if (stream->buf_leftover != NULL)
+	{
+	  stream->msg = "encoding is incomplete";
+	  return XD3_INTERNAL;
+	}
+
+      if (stream->enc_state == ENC_POSTWIN)
+	{
+#if XD3_ENCODER
+	  xd3_encode_reset (stream);
+#endif
+	  stream->current_window += 1;
+	  stream->enc_state = ENC_INPUT;
+	}
+
+      /* If encoding, should be ready for more input but not actually
+	 have any. */
+      if (stream->enc_state != ENC_INPUT || stream->avail_in != 0)
+	{
+	  stream->msg = "encoding is incomplete";
+	  return XD3_INTERNAL;
+	}
+    }
+  else
+    {
+      switch (stream->dec_state)
+	{
+	case DEC_VCHEAD:
+	case DEC_WININD:
+	  /* TODO: Address the zero-byte ambiguity.  Does the encoder
+	   * emit a window or not?  If so, then catch an error here.
+	   * If not, need another routine to say
+	   * decode_at_least_one_if_empty. */
+	case DEC_ABORTED:
+	  break;
+	default:
+	  /* If decoding, should be ready for the next window. */
+	  stream->msg = "eof in decode";
+	  return XD3_INVALID_INPUT;
+	}
+    }
+
+  return 0;
+}
+
+/**************************************************************
+ Application header
+ ****************************************************************/
+
+int
+xd3_get_appheader (xd3_stream  *stream,
+		   uint8_t    **data,
+		   usize_t      *size)
+{
+  if (stream->dec_state < DEC_WININD)
+    {
+      stream->msg = "application header not available";
+      return XD3_INTERNAL;
+    }
+
+  (*data) = stream->dec_appheader;
+  (*size) = stream->dec_appheadsz;
+  return 0;
+}
+
+/**********************************************************
+ Decoder stuff
+ *************************************************/
+
+#include "xdelta3-decode.h"
+
+/****************************************************************
+ Encoder stuff
+ *****************************************************************/
+
+#if XD3_ENCODER
+void
+xd3_set_appheader (xd3_stream    *stream,
+		   const uint8_t *data,
+		   usize_t         size)
+{
+  stream->enc_appheader = data;
+  stream->enc_appheadsz = size;
+}
+
+#if XD3_DEBUG
+static int
+xd3_iopt_check (xd3_stream *stream)
+{
+  usize_t ul = xd3_rlist_length (& stream->iopt_used);
+  usize_t fl = xd3_rlist_length (& stream->iopt_free);
+
+  return (ul + fl + (stream->iout ? 1 : 0)) == stream->iopt_size;
+}
+#endif
+
+static xd3_rinst*
+xd3_iopt_free (xd3_stream *stream, xd3_rinst *i)
+{
+  xd3_rinst *n = xd3_rlist_remove (i);
+  xd3_rlist_push_back (& stream->iopt_free, i);
+  return n;
+}
+
+static void
+xd3_iopt_free_nonadd (xd3_stream *stream, xd3_rinst *i)
+{
+  if (i->type != XD3_ADD)
+    {
+      xd3_rlist_push_back (& stream->iopt_free, i);
+    }
+}
+
+/* When an instruction is ready to flush from the iopt buffer, this
+ * function is called to produce an encoding.  It writes the
+ * instruction plus size, address, and data to the various encoding
+ * sections. */
+static int
+xd3_iopt_finish_encoding (xd3_stream *stream, xd3_rinst *inst)
+{
+  int ret;
+
+  /* Check for input overflow. */
+  XD3_ASSERT (inst->pos + inst->size <= stream->avail_in);
+
+  switch (inst->type)
+    {
+    case XD3_CPY:
+      {
+	/* the address may have an offset if there is a source window. */
+	usize_t addr;
+	xd3_source *src = stream->src;
+
+	if (src != NULL)
+	  {
+	    /* If there is a source copy, the source must have its
+	     * source window decided before we can encode.  This can
+	     * be bad -- we have to make this decision even if no
+	     * source matches have been found. */
+	    if (stream->srcwin_decided == 0)
+	      {
+		if ((ret = xd3_srcwin_setup (stream))) { return ret; }
+	      }
+	    else
+	      {
+		stream->srcwin_decided_early = (!stream->src->eof_known ||
+						(stream->srcwin_cksum_pos <
+						 xd3_source_eof (stream->src)));
+	      }
+
+	    /* xtra field indicates the copy is from the source */
+	    if (inst->xtra)
+	      {
+		XD3_ASSERT (inst->addr >= src->srcbase);
+		XD3_ASSERT (inst->addr + inst->size <=
+			    src->srcbase + src->srclen);
+		addr = inst->addr - src->srcbase;
+		stream->n_scpy += 1;
+		stream->l_scpy += inst->size;
+	      }
+	    else
+	      {
+		/* with source window: target copy address is offset
+		 * by taroff. */
+		addr = stream->taroff + inst->addr;
+		stream->n_tcpy += 1;
+		stream->l_tcpy += inst->size;
+	      }
+	  }
+	else
+	  {
+	    addr = inst->addr;
+	    stream->n_tcpy += 1;
+	    stream->l_tcpy += inst->size;
+	  }
+
+	/* Note: used to assert inst->size >= MIN_MATCH, but not true
+	 * for merge operations & identical match heuristics. */
+	/* the "here" position is always offset by taroff */
+	if ((ret = xd3_encode_address (stream, addr, inst->pos + stream->taroff,
+				       & inst->type)))
+	  {
+	    return ret;
+	  }
+
+	IF_DEBUG2 ({
+	  static int cnt;
+	  DP(RINT "[iopt copy:%d] pos %"Q"u-%"Q"u addr %"Q"u-%"Q"u size %"W"u\n",
+		   cnt++,
+		   stream->total_in + inst->pos,
+		   stream->total_in + inst->pos + inst->size,
+		   inst->addr, inst->addr + inst->size, inst->size);
+	});
+	break;
+      }
+    case XD3_RUN:
+      {
+	if ((ret = xd3_emit_byte (stream, & DATA_TAIL (stream), inst->xtra))) { return ret; }
+
+	stream->n_run += 1;
+	stream->l_run += inst->size;
+
+	IF_DEBUG2 ({
+	  static int cnt;
+	  DP(RINT "[iopt run:%d] pos %"Q"u size %"W"u\n", cnt++, stream->total_in + inst->pos, inst->size);
+	});
+	break;
+      }
+    case XD3_ADD:
+      {
+	if ((ret = xd3_emit_bytes (stream, & DATA_TAIL (stream),
+				   stream->next_in + inst->pos, inst->size))) { return ret; }
+
+	stream->n_add += 1;
+	stream->l_add += inst->size;
+
+	IF_DEBUG2 ({
+	  static int cnt;
+	  DP(RINT "[iopt add:%d] pos %"Q"u size %"W"u\n", cnt++, stream->total_in + inst->pos, inst->size);
+	});
+
+	break;
+      }
+    }
+
+  /* This is the only place stream->unencoded_offset is incremented. */
+  XD3_ASSERT (stream->unencoded_offset == inst->pos);
+  stream->unencoded_offset += inst->size;
+
+  inst->code2 = 0;
+
+  XD3_CHOOSE_INSTRUCTION (stream, stream->iout, inst);
+
+  if (stream->iout != NULL)
+    {
+      if (stream->iout->code2 != 0)
+	{
+	  if ((ret = xd3_emit_double (stream, stream->iout, inst, 
+				      stream->iout->code2))) { return ret; }
+
+	  xd3_iopt_free_nonadd (stream, stream->iout);
+	  xd3_iopt_free_nonadd (stream, inst);
+	  stream->iout = NULL;
+	  return 0;
+	}
+      else
+	{
+	  if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
+
+	  xd3_iopt_free_nonadd (stream, stream->iout);
+	}
+    }
+
+  stream->iout = inst;
+
+  return 0;
+}
+
+/* This possibly encodes an add instruction, iadd, which must remain
+ * on the stack until the following call to
+ * xd3_iopt_finish_encoding. */
+static int
+xd3_iopt_add (xd3_stream *stream, usize_t pos, xd3_rinst *iadd)
+{
+  int ret;
+  usize_t off = stream->unencoded_offset;
+
+  if (pos > off)
+    {
+      iadd->type = XD3_ADD;
+      iadd->pos  = off;
+      iadd->size = pos - off;
+
+      if ((ret = xd3_iopt_finish_encoding (stream, iadd))) { return ret; }
+    }
+
+  return 0;
+}
+
+/* This function calls xd3_iopt_finish_encoding to finish encoding an
+ * instruction, and it may also produce an add instruction for an
+ * unmatched region. */
+static int
+xd3_iopt_add_encoding (xd3_stream *stream, xd3_rinst *inst)
+{
+  int ret;
+  xd3_rinst iadd;
+
+  if ((ret = xd3_iopt_add (stream, inst->pos, & iadd))) { return ret; }
+
+  if ((ret = xd3_iopt_finish_encoding (stream, inst))) { return ret; }
+
+  return 0;
+}
+
+/* Generates a final add instruction to encode the remaining input. */
+static int
+xd3_iopt_add_finalize (xd3_stream *stream)
+{
+  int ret;
+  xd3_rinst iadd;
+
+  if ((ret = xd3_iopt_add (stream, stream->avail_in, & iadd))) { return ret; }
+
+  if (stream->iout)
+    {
+      if ((ret = xd3_emit_single (stream, stream->iout, stream->iout->code1))) { return ret; }
+
+      xd3_iopt_free_nonadd (stream, stream->iout);
+      stream->iout = NULL;
+    }
+
+  return 0;
+}
+
+/* Compact the instruction buffer by choosing the best non-overlapping
+ * instructions when lazy string-matching.  There are no ADDs in the
+ * iopt buffer because those are synthesized in xd3_iopt_add_encoding
+ * and during xd3_iopt_add_finalize. */
+static int
+xd3_iopt_flush_instructions (xd3_stream *stream, int force)
+{
+  xd3_rinst *r1 = xd3_rlist_front (& stream->iopt_used);
+  xd3_rinst *r2;
+  xd3_rinst *r3;
+  usize_t r1end;
+  usize_t r2end;
+  usize_t r2off;
+  usize_t r2moff;
+  usize_t gap;
+  usize_t flushed;
+  int ret;
+
+  XD3_ASSERT (xd3_iopt_check (stream));
+
+  /* Note: once tried to skip this step if it's possible to assert
+   * there are no overlapping instructions.  Doesn't work because
+   * xd3_opt_erase leaves overlapping instructions. */
+  while (! xd3_rlist_end (& stream->iopt_used, r1) &&
+	 ! xd3_rlist_end (& stream->iopt_used, r2 = xd3_rlist_next (r1)))
+    {
+      r1end = r1->pos + r1->size;
+
+      /* If the instructions do not overlap, continue. */
+      if (r1end <= r2->pos)
+	{
+	  r1 = r2;
+	  continue;
+	}
+
+      r2end = r2->pos + r2->size;
+
+      /* The min_match adjustments prevent this. */
+      XD3_ASSERT (r2end > (r1end + LEAST_MATCH_INCR));
+
+      /* If r3 is available... */
+      if (! xd3_rlist_end (& stream->iopt_used, r3 = xd3_rlist_next (r2)))
+	{
+	  /* If r3 starts before r1 finishes or just about, r2 is irrelevant */
+	  if (r3->pos <= r1end + 1)
+	    {
+	      xd3_iopt_free (stream, r2);
+	      continue;
+	    }
+	}
+      else if (! force)
+	{
+	  /* Unless force, end the loop when r3 is not available. */
+	  break;
+	}
+
+      r2off  = r2->pos - r1->pos;
+      r2moff = r2end - r1end;
+      gap    = r2end - r1->pos;
+
+      /* If the two matches overlap almost entirely, choose the better match
+       * and discard the other.  The else branch can still create inefficient
+       * copies, e.g., a 4-byte copy that takes 4 bytes to encode, which
+       * xd3_smatch() wouldn't allow by its crude efficiency check.  However,
+       * in this case there are adjacent copies which mean the add would cost
+       * one extra byte.  Allow the inefficiency here. */
+      if (gap < 2*MIN_MATCH || r2moff <= 2 || r2off <= 2)
+	{
+	  /* Only one match should be used, choose the longer one. */
+	  if (r1->size < r2->size)
+	    {
+	      xd3_iopt_free (stream, r1);
+	      r1 = r2;
+	    }
+	  else
+	    {
+	      /* We are guaranteed that r1 does not overlap now, so advance past r2 */
+	      r1 = xd3_iopt_free (stream, r2);
+	    }
+	  continue;
+	}
+      else
+	{
+	  /* Shorten one of the instructions -- could be optimized
+	   * based on the address cache. */
+	  usize_t average;
+	  usize_t newsize;
+	  usize_t adjust1;
+
+	  XD3_ASSERT (r1end > r2->pos && r2end > r1->pos);
+
+	  /* Try to balance the length of both instructions, but avoid
+	   * making both longer than MAX_MATCH_SPLIT . */
+	  average = gap / 2;
+	  newsize = xd3_min (MAX_MATCH_SPLIT, gap - average);
+
+	  /* Should be possible to simplify this code. */
+	  if (newsize > r1->size)
+	    {
+	      /* shorten r2 */
+	      adjust1 = r1end - r2->pos;
+	    }
+	  else if (newsize > r2->size)
+	    {
+	      /* shorten r1 */
+	      adjust1 = r1end - r2->pos;
+
+	      XD3_ASSERT (r1->size > adjust1);
+
+	      r1->size -= adjust1;
+
+	      /* don't shorten r2 */
+	      adjust1 = 0;
+	    }
+	  else
+	    {
+	      /* shorten r1 */
+	      adjust1 = r1->size - newsize;
+
+	      if (r2->pos > r1end - adjust1)
+		{
+		  adjust1 -= r2->pos - (r1end - adjust1);
+		}
+
+	      XD3_ASSERT (r1->size > adjust1);
+
+	      r1->size -= adjust1;
+
+	      /* shorten r2 */
+	      XD3_ASSERT (r1->pos + r1->size >= r2->pos);
+
+	      adjust1 = r1->pos + r1->size - r2->pos;
+	    }
+
+	  /* Fallthrough above if-else, shorten r2 */
+	  XD3_ASSERT (r2->size > adjust1);
+
+	  r2->size -= adjust1;
+	  r2->pos  += adjust1;
+	  r2->addr += adjust1;
+
+	  XD3_ASSERT (r1->size >= MIN_MATCH);
+	  XD3_ASSERT (r2->size >= MIN_MATCH);
+
+	  r1 = r2;
+	}
+    }
+
+  XD3_ASSERT (xd3_iopt_check (stream));
+
+  /* If forcing, pick instructions until the list is empty, otherwise
+   * this empties 50% of the queue. */
+  for (flushed = 0; ! xd3_rlist_empty (& stream->iopt_used); )
+    {
+      xd3_rinst *renc = xd3_rlist_pop_front (& stream->iopt_used);
+      if ((ret = xd3_iopt_add_encoding (stream, renc)))
+	{
+	  return ret;
+	}
+
+      if (! force)
+	{
+	  if (++flushed > stream->iopt_size / 2)
+	    {
+	      break;
+	    }
+
+	  /* If there are only two instructions remaining, break,
+	   * because they were not optimized.  This means there were
+	   * more than 50% eliminated by the loop above. */
+ 	  r1 = xd3_rlist_front (& stream->iopt_used);
+ 	  if (xd3_rlist_end(& stream->iopt_used, r1) ||
+ 	      xd3_rlist_end(& stream->iopt_used, r2 = xd3_rlist_next (r1)) ||
+ 	      xd3_rlist_end(& stream->iopt_used, r3 = xd3_rlist_next (r2)))
+ 	    {
+ 	      break;
+ 	    }
+	}
+    }
+
+  XD3_ASSERT (xd3_iopt_check (stream));
+
+  XD3_ASSERT (!force || xd3_rlist_length (& stream->iopt_used) == 0);
+
+  return 0;
+}
+
+static int
+xd3_iopt_get_slot (xd3_stream *stream, xd3_rinst** iptr)
+{
+  xd3_rinst *i;
+  int ret;
+
+  if (xd3_rlist_empty (& stream->iopt_free))
+    {
+      if (stream->iopt_unlimited)
+	{
+	  usize_t elts = XD3_ALLOCSIZE / sizeof(xd3_rinst);
+
+	  if ((ret = xd3_alloc_iopt (stream, elts)))
+	    {
+	      return ret;
+	    }
+
+	  stream->iopt_size += elts;
+	}
+      else
+	{
+	  if ((ret = xd3_iopt_flush_instructions (stream, 0))) { return ret; }
+
+	  XD3_ASSERT (! xd3_rlist_empty (& stream->iopt_free));
+	}
+    }
+
+  i = xd3_rlist_pop_back (& stream->iopt_free);
+
+  xd3_rlist_push_back (& stream->iopt_used, i);
+
+  (*iptr) = i;
+
+  ++stream->i_slots_used;
+
+  return 0;
+}
+
+/* A copy is about to be emitted that extends backwards to POS,
+ * therefore it may completely cover some existing instructions in the
+ * buffer.  If an instruction is completely covered by this new match,
+ * erase it.  If the new instruction is covered by the previous one,
+ * return 1 to skip it. */
+static void
+xd3_iopt_erase (xd3_stream *stream, usize_t pos, usize_t size)
+{
+  while (! xd3_rlist_empty (& stream->iopt_used))
+    {
+      xd3_rinst *r = xd3_rlist_back (& stream->iopt_used);
+
+      /* Verify that greedy is working.  The previous instruction
+       * should end before the new one begins. */
+      XD3_ASSERT ((stream->flags & XD3_BEGREEDY) == 0 || (r->pos + r->size <= pos));
+      /* Verify that min_match is working.  The previous instruction
+       * should end before the new one ends. */
+      XD3_ASSERT ((stream->flags & XD3_BEGREEDY) != 0 || (r->pos + r->size < pos + size));
+
+      /* See if the last instruction starts before the new
+       * instruction.  If so, there is nothing to erase. */
+      if (r->pos < pos)
+	{
+	  return;
+	}
+
+      /* Otherwise, the new instruction covers the old one, delete it
+	 and repeat. */
+      xd3_rlist_remove (r);
+      xd3_rlist_push_back (& stream->iopt_free, r);
+      --stream->i_slots_used;
+    }
+}
+
+/* This function tells the last matched input position. */
+static usize_t
+xd3_iopt_last_matched (xd3_stream *stream)
+{
+  xd3_rinst *r;
+
+  if (xd3_rlist_empty (& stream->iopt_used))
+    {
+      return 0;
+    }
+
+  r = xd3_rlist_back (& stream->iopt_used);
+
+  return r->pos + r->size;
+}
+
+/*********************************************************
+ Emit routines
+ ***********************************************************/
+
+static int
+xd3_emit_single (xd3_stream *stream, xd3_rinst *single, uint8_t code)
+{
+  int has_size = stream->code_table[code].size1 == 0;
+  int ret;
+
+  IF_DEBUG2 (DP(RINT "[emit1] %"W"u %s (%"W"u) code %u\n",
+		single->pos,
+		xd3_rtype_to_string ((xd3_rtype) single->type, 0),
+		single->size,
+		code));
+
+  if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code)))
+    {
+      return ret;
+    }
+
+  if (has_size)
+    {
+      if ((ret = xd3_emit_size (stream, & INST_TAIL (stream), single->size)))
+        {
+          return ret;
+        }
+    }
+
+  return 0;
+}
+
+static int
+xd3_emit_double (xd3_stream *stream, xd3_rinst *first,
+                 xd3_rinst *second, uint8_t code)
+{
+  int ret;
+
+  /* All double instructions use fixed sizes, so all we need to do is
+   * output the instruction code, no sizes. */
+  XD3_ASSERT (stream->code_table[code].size1 != 0 &&
+	      stream->code_table[code].size2 != 0);
+
+  if ((ret = xd3_emit_byte (stream, & INST_TAIL (stream), code)))
+    {
+      return ret;
+    }
+
+  IF_DEBUG2 (DP(RINT "[emit2]: %"W"u %s (%"W"u) %s (%"W"u) code %u\n",
+		first->pos,
+		xd3_rtype_to_string ((xd3_rtype) first->type, 0),
+		first->size,
+		xd3_rtype_to_string ((xd3_rtype) second->type, 0),
+		second->size,
+		code));
+
+  return 0;
+}
+
+/* This enters a potential run instruction into the iopt buffer.  The
+ * position argument is relative to the target window. */
+static int
+xd3_emit_run (xd3_stream *stream, usize_t pos, usize_t size, uint8_t *run_c)
+{
+  xd3_rinst* ri;
+  int ret;
+
+  if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
+
+  ri->type = XD3_RUN;
+  ri->xtra = *run_c;
+  ri->pos  = pos;
+  ri->size = size;
+
+  return 0;
+}
+
+/* This enters a potential copy instruction into the iopt buffer.  The
+ * position argument is relative to the target window.. */
+int
+xd3_found_match (xd3_stream *stream, usize_t pos,
+		 usize_t size, xoff_t addr, int is_source)
+{
+  xd3_rinst* ri;
+  int ret;
+
+  if ((ret = xd3_iopt_get_slot (stream, & ri))) { return ret; }
+
+  ri->type = XD3_CPY;
+  ri->xtra = is_source;
+  ri->pos  = pos;
+  ri->size = size;
+  ri->addr = addr;
+
+  return 0;
+}
+
+static int
+xd3_emit_hdr (xd3_stream *stream)
+{
+  int  ret;
+  int  use_secondary = stream->sec_type != NULL;
+  int  use_adler32   = stream->flags & (XD3_ADLER32 | XD3_ADLER32_RECODE);
+  int  vcd_source    = xd3_encoder_used_source (stream);
+  uint8_t win_ind = 0;
+  uint8_t del_ind = 0;
+  usize_t enc_len;
+  usize_t tgt_len;
+  usize_t data_len;
+  usize_t inst_len;
+  usize_t addr_len;
+
+  if (stream->current_window == 0)
+    {
+      uint8_t hdr_ind = 0;
+      int use_appheader  = stream->enc_appheader != NULL;
+
+      if (use_secondary)  { hdr_ind |= VCD_SECONDARY; }
+      if (use_appheader)  { hdr_ind |= VCD_APPHEADER; }
+
+      if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream),
+				VCDIFF_MAGIC1)) != 0 ||
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream),
+				VCDIFF_MAGIC2)) != 0 ||
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream),
+				VCDIFF_MAGIC3)) != 0 ||
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream),
+				VCDIFF_VERSION)) != 0 ||
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), hdr_ind)) != 0)
+	{
+	  return ret;
+	}
+
+      /* Secondary compressor ID */
+#if SECONDARY_ANY
+      if (use_secondary &&
+	  (ret = xd3_emit_byte (stream, & HDR_TAIL (stream),
+				stream->sec_type->id)))
+	{
+	  return ret;
+	}
+#endif
+
+      /* Application header */
+      if (use_appheader)
+	{
+	  if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream),
+				    stream->enc_appheadsz)) ||
+	      (ret = xd3_emit_bytes (stream, & HDR_TAIL (stream),
+				     stream->enc_appheader,
+				     stream->enc_appheadsz)))
+	    {
+	      return ret;
+	    }
+	}
+    }
+
+  /* try to compress this window */
+#if SECONDARY_ANY
+  if (use_secondary)
+    {
+      int data_sec = 0;
+      int inst_sec = 0;
+      int addr_sec = 0;
+
+#     define ENCODE_SECONDARY_SECTION(UPPER,LOWER) \
+             ((stream->flags & XD3_SEC_NO ## UPPER) == 0 && \
+              (ret = xd3_encode_secondary (stream, \
+					   & UPPER ## _HEAD (stream), \
+					   & UPPER ## _TAIL (stream), \
+					& xd3_sec_ ## LOWER (stream), \
+				        & stream->sec_ ## LOWER, \
+					   & LOWER ## _sec)))
+
+      if (ENCODE_SECONDARY_SECTION (DATA, data) ||
+	  ENCODE_SECONDARY_SECTION (INST, inst) ||
+	  ENCODE_SECONDARY_SECTION (ADDR, addr))
+	{
+	  return ret;
+	}
+
+      del_ind |= (data_sec ? VCD_DATACOMP : 0);
+      del_ind |= (inst_sec ? VCD_INSTCOMP : 0);
+      del_ind |= (addr_sec ? VCD_ADDRCOMP : 0);
+    }
+#endif
+
+  /* if (vcd_target) { win_ind |= VCD_TARGET; } */
+  if (vcd_source)  { win_ind |= VCD_SOURCE; }
+  if (use_adler32) { win_ind |= VCD_ADLER32; }
+
+  /* window indicator */
+  if ((ret = xd3_emit_byte (stream, & HDR_TAIL (stream), win_ind)))
+    {
+      return ret;
+    }
+
+  /* source window */
+  if (vcd_source)
+    {
+      /* or (vcd_target) { ... } */
+      if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream),
+				stream->src->srclen)) ||
+	  (ret = xd3_emit_offset (stream, & HDR_TAIL (stream),
+				  stream->src->srcbase))) { return ret; }
+    }
+
+  tgt_len  = stream->avail_in;
+  data_len = xd3_sizeof_output (DATA_HEAD (stream));
+  inst_len = xd3_sizeof_output (INST_HEAD (stream));
+  addr_len = xd3_sizeof_output (ADDR_HEAD (stream));
+
+  /* The enc_len field is a redundency for future extensions. */
+  enc_len = (1 + (xd3_sizeof_size (tgt_len) +
+		  xd3_sizeof_size (data_len) +
+		  xd3_sizeof_size (inst_len) +
+		  xd3_sizeof_size (addr_len)) +
+	     data_len +
+	     inst_len +
+	     addr_len +
+	     (use_adler32 ? 4 : 0));
+
+  if ((ret = xd3_emit_size (stream, & HDR_TAIL (stream), enc_len)) ||
+      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), tgt_len)) ||
+      (ret = xd3_emit_byte (stream, & HDR_TAIL (stream), del_ind)) ||
+      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), data_len)) ||
+      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), inst_len)) ||
+      (ret = xd3_emit_size (stream, & HDR_TAIL (stream), addr_len)))
+    {
+      return ret;
+    }
+
+  if (use_adler32)
+    {
+      uint8_t  send[4];
+      uint32_t a32;
+
+      if (stream->flags & XD3_ADLER32)
+	{
+	  a32 = adler32 (1L, stream->next_in, stream->avail_in);
+	}
+      else
+	{
+	  a32 = stream->recode_adler32;
+	}
+
+      /* Four bytes. */
+      send[0] = (uint8_t) (a32 >> 24);
+      send[1] = (uint8_t) (a32 >> 16);
+      send[2] = (uint8_t) (a32 >> 8);
+      send[3] = (uint8_t) (a32 & 0x000000FFU);
+
+      if ((ret = xd3_emit_bytes (stream, & HDR_TAIL (stream), send, 4)))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/****************************************************************
+ Encode routines
+ ****************************************************************/
+
+static int
+xd3_encode_buffer_leftover (xd3_stream *stream)
+{
+  usize_t take;
+  usize_t room;
+
+  /* Allocate the buffer. */
+  if (stream->buf_in == NULL &&
+      (stream->buf_in = (uint8_t*) xd3_alloc (stream, stream->winsize, 1)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  IF_DEBUG2 (DP(RINT "[leftover] flush?=%s\n", (stream->flags & XD3_FLUSH) ? "yes" : "no"));
+
+  /* Take leftover input first. */
+  if (stream->buf_leftover != NULL)
+    {
+      XD3_ASSERT (stream->buf_avail == 0);
+      XD3_ASSERT (stream->buf_leftavail < stream->winsize);
+
+      IF_DEBUG2 (DP(RINT "[leftover] previous %"W"u avail %"W"u\n",
+		    stream->buf_leftavail, stream->avail_in));
+
+      memcpy (stream->buf_in, stream->buf_leftover, stream->buf_leftavail);
+
+      stream->buf_leftover = NULL;
+      stream->buf_avail    = stream->buf_leftavail;
+    }
+
+  /* Copy into the buffer. */
+  room = stream->winsize - stream->buf_avail;
+  take = xd3_min (room, stream->avail_in);
+
+  memcpy (stream->buf_in + stream->buf_avail, stream->next_in, take);
+
+  stream->buf_avail += take;
+
+  if (take < stream->avail_in)
+    {
+      /* Buffer is full */
+      stream->buf_leftover  = stream->next_in  + take;
+      stream->buf_leftavail = stream->avail_in - take;
+    }
+  else if ((stream->buf_avail < stream->winsize) && !(stream->flags & XD3_FLUSH))
+    {
+      /* Buffer has space */
+      IF_DEBUG2 (DP(RINT "[leftover] emptied %"W"u\n", take));
+      return XD3_INPUT;
+    }
+
+  /* Use the buffer: */
+  IF_DEBUG2 (DP(RINT "[leftover] take %"W"u remaining %"W"u\n", take, stream->buf_leftavail));
+  stream->next_in   = stream->buf_in;
+  stream->avail_in  = stream->buf_avail;
+  stream->buf_avail = 0;
+
+  return 0;
+}
+
+/* Allocates one block of xd3_rlist elements */
+static int
+xd3_alloc_iopt (xd3_stream *stream, usize_t elts)
+{
+  usize_t i;
+  xd3_iopt_buflist* last =
+    (xd3_iopt_buflist*) xd3_alloc (stream, sizeof (xd3_iopt_buflist), 1);
+
+  if (last == NULL ||
+      (last->buffer = (xd3_rinst*) xd3_alloc (stream, sizeof (xd3_rinst), elts)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  last->next = stream->iopt_alloc;
+  stream->iopt_alloc = last;
+
+  for (i = 0; i < elts; i += 1)
+    {
+      xd3_rlist_push_back (& stream->iopt_free, & last->buffer[i]);
+    }
+
+  return 0;
+}
+
+/* This function allocates all memory initially used by the encoder. */
+static int
+xd3_encode_init (xd3_stream *stream, int full_init)
+{
+  int ret;
+  int i;
+
+  if (full_init)
+    {
+      int large_comp = (stream->src != NULL);
+      int small_comp = ! (stream->flags & XD3_NOCOMPRESS);
+
+      /* Memory allocations for checksum tables are delayed until
+       * xd3_string_match_init in the first call to string_match--that way
+       * identical or short inputs require no table allocation. */
+      if (large_comp)
+	{
+	  /* TODO Need to check for overflow here. */
+	  usize_t hash_values = stream->src->max_winsize /
+	                        stream->smatcher.large_step;
+
+	  if ((ret = xd3_size_hashtable (stream,
+					 hash_values,
+					 stream->smatcher.large_look,
+					 & stream->large_hash)))
+	    {
+	      return ret;
+	    }
+	}
+
+      if (small_comp)
+	{
+	  /* TODO: This is under devel: used to have min (sprevsz) here, which sort
+	   * of makes sense, but observed fast performance w/ larger tables, which
+	   * also sort of makes sense. @@@ */
+	  usize_t hash_values = stream->winsize;
+
+	  if ((ret = xd3_size_hashtable (stream,
+					 hash_values,
+					 stream->smatcher.small_look,
+					 & stream->small_hash)))
+	    {
+	      return ret;
+	    }
+	}
+    }
+
+  /* data buffers */
+  for (i = 0; i < ENC_SECTS; i += 1)
+    {
+      if ((stream->enc_heads[i] =
+	   stream->enc_tails[i] =
+	   xd3_alloc_output (stream, NULL)) == NULL)
+	{
+	  return ENOMEM;
+	}
+    }
+
+  /* iopt buffer */
+  xd3_rlist_init (& stream->iopt_used);
+  xd3_rlist_init (& stream->iopt_free);
+
+  if (xd3_alloc_iopt (stream, stream->iopt_size) != 0) { goto fail; }
+
+  XD3_ASSERT (xd3_rlist_length (& stream->iopt_free) == stream->iopt_size);
+  XD3_ASSERT (xd3_rlist_length (& stream->iopt_used) == 0);
+
+  /* address cache, code table */
+  stream->acache.s_near = stream->code_table_desc->near_modes;
+  stream->acache.s_same = stream->code_table_desc->same_modes;
+  stream->code_table    = stream->code_table_func ();
+
+  return xd3_alloc_cache (stream);
+
+ fail:
+
+  return ENOMEM;
+}
+
+int
+xd3_encode_init_full (xd3_stream *stream)
+{
+  return xd3_encode_init (stream, 1);
+}
+
+int
+xd3_encode_init_partial (xd3_stream *stream)
+{
+  return xd3_encode_init (stream, 0);
+}
+
+/* Called after the ENC_POSTOUT state, this puts the output buffers
+ * back into separate lists and re-initializes some variables.  (The
+ * output lists were spliced together during the ENC_FLUSH state.) */
+static void
+xd3_encode_reset (xd3_stream *stream)
+{
+  int i;
+  xd3_output *olist;
+
+  stream->avail_in     = 0;
+  stream->small_reset  = 1;
+  stream->i_slots_used = 0;
+
+  if (stream->src != NULL)
+    {
+      stream->src->srcbase   = 0;
+      stream->src->srclen    = 0;
+      stream->srcwin_decided = 0;
+      stream->srcwin_decided_early = 0;
+      stream->match_minaddr  = 0;
+      stream->match_maxaddr  = 0;
+      stream->taroff         = 0;
+    }
+
+  /* Reset output chains. */
+  olist = stream->enc_heads[0];
+
+  for (i = 0; i < ENC_SECTS; i += 1)
+    {
+      XD3_ASSERT (olist != NULL);
+
+      stream->enc_heads[i] = olist;
+      stream->enc_tails[i] = olist;
+      olist = olist->next_page;
+
+      stream->enc_heads[i]->next = 0;
+      stream->enc_heads[i]->next_page = NULL;
+
+      stream->enc_tails[i]->next_page = NULL;
+      stream->enc_tails[i] = stream->enc_heads[i];
+    }
+
+  xd3_freelist_output (stream, olist);
+}
+
+/* The main encoding routine. */
+int
+xd3_encode_input (xd3_stream *stream)
+{
+  int ret, i;
+
+  if (stream->dec_state != 0)
+    {
+      stream->msg = "encoder/decoder transition";
+      return XD3_INTERNAL;
+    }
+
+  switch (stream->enc_state)
+    {
+    case ENC_INIT:
+      /* Only reached on first time through: memory setup. */
+      if ((ret = xd3_encode_init_full (stream))) { return ret; }
+
+      stream->enc_state = ENC_INPUT;
+
+    case ENC_INPUT:
+
+      /* If there is no input yet, just return.  This checks for
+       * next_in == NULL, not avail_in == 0 since zero bytes is a
+       * valid input.  There is an assertion in xd3_avail_input() that
+       * next_in != NULL for this reason.  By returning right away we
+       * avoid creating an input buffer before the caller has supplied
+       * its first data.  It is possible for xd3_avail_input to be
+       * called both before and after the first call to
+       * xd3_encode_input(). */
+      if (stream->next_in == NULL)
+	{
+	  return XD3_INPUT;
+	}
+
+    enc_flush:
+      /* See if we should buffer the input: either if there is already
+       * a leftover buffer, or if the input is short of winsize
+       * without flush.  The label at this point is reached by a goto
+       * below, when there is leftover input after postout. */
+      if ((stream->buf_leftover != NULL) ||
+	  (stream->buf_avail != 0) ||
+	  (stream->avail_in < stream->winsize && ! (stream->flags & XD3_FLUSH)))
+	{
+	  if ((ret = xd3_encode_buffer_leftover (stream))) { return ret; }
+	}
+
+      /* Initalize the address cache before each window. */
+      xd3_init_cache (& stream->acache);
+
+      stream->input_position    = 0;
+      stream->min_match = MIN_MATCH;
+      stream->unencoded_offset = 0;
+
+      stream->enc_state = ENC_SEARCH;
+
+      IF_DEBUG2 (DP(RINT "[WINSTART:%"Q"u] input bytes %"W"u offset %"Q"u\n",
+		    stream->current_window, stream->avail_in,
+		    stream->total_in));
+      return XD3_WINSTART;
+
+    case ENC_SEARCH:
+      IF_DEBUG2 (DP(RINT "[SEARCH] match_state %d avail_in %"W"u %s\n",
+		    stream->match_state, stream->avail_in,
+		    stream->src ? "source" : "no source"));
+
+      /* Reentrant matching. */
+      if (stream->src != NULL)
+	{
+	  switch (stream->match_state)
+	    {
+	    case MATCH_TARGET:
+	      /* Try matching forward at the start of the target.
+	       * This is entered the first time through, to check for
+	       * a perfect match, and whenever there is a source match
+	       * that extends to the end of the previous window.  The
+	       * match_srcpos field is initially zero and later set
+	       * during xd3_source_extend_match. */
+
+	      if (stream->avail_in > 0)
+		{
+		  /* This call can't fail because the source window is
+		   * unrestricted. */
+		  ret = xd3_source_match_setup (stream, stream->match_srcpos);
+		  XD3_ASSERT (ret == 0);
+		  stream->match_state = MATCH_FORWARD;
+		}
+	      else
+		{
+		  stream->match_state = MATCH_SEARCHING;
+		  stream->match_fwd = 0;
+		}
+	      XD3_ASSERT (stream->match_fwd == 0);
+
+	    case MATCH_FORWARD:
+	    case MATCH_BACKWARD:
+	      if (stream->avail_in != 0)
+		{
+		  if ((ret = xd3_source_extend_match (stream)) != 0)
+		    {
+		      return ret;
+		    }
+
+		  /* The search has to make forward progress here
+		   * or else it can get stuck in a match-backward
+		   * (getsrcblk) then match-forward (getsrcblk),
+		   * find insufficient match length, then repeat
+
+		   * exactly the same search.
+		   */
+		  stream->input_position += stream->match_fwd;
+		}
+
+	    case MATCH_SEARCHING:
+	      /* Continue string matching.  (It's possible that the
+	       * initial match continued through the entire input, in
+	       * which case we're still in MATCH_FORWARD and should
+	       * remain so for the next input window.) */
+	      break;
+	    }
+	}
+
+      /* String matching... */
+      if (stream->avail_in != 0 &&
+	  (ret = stream->smatcher.string_match (stream)))
+	{
+	  return ret;
+	}
+
+      stream->enc_state = ENC_INSTR;
+
+    case ENC_INSTR:
+      /* Note: Jump here to encode VCDIFF deltas w/o using this
+       * string-matching code.  Merging code enters here. */
+
+      /* Flush the instrution buffer, then possibly add one more
+       * instruction, then emit the header. */
+      if ((ret = xd3_iopt_flush_instructions (stream, 1)) ||
+          (ret = xd3_iopt_add_finalize (stream)))
+	{
+	  return ret;
+	}
+
+      stream->enc_state = ENC_FLUSH;
+
+    case ENC_FLUSH:
+      /* Note: main_recode_func() bypasses string-matching by setting
+       * ENC_FLUSH. */
+      if ((ret = xd3_emit_hdr (stream)))
+	{
+	  return ret;
+	}
+
+      /* Begin output. */
+      stream->enc_current = HDR_HEAD (stream);
+
+      /* Chain all the outputs together.  After doing this, it looks
+       * as if there is only one section.  The other enc_heads are set
+       * to NULL to avoid freeing them more than once. */
+       for (i = 1; i < ENC_SECTS; i += 1)
+	{
+	  stream->enc_tails[i-1]->next_page = stream->enc_heads[i];
+	  stream->enc_heads[i] = NULL;
+	}
+
+    enc_output:
+
+      stream->enc_state  = ENC_POSTOUT;
+      stream->next_out   = stream->enc_current->base;
+      stream->avail_out  = stream->enc_current->next;
+      stream->total_out += stream->avail_out;
+
+      /* If there is any output in this buffer, return it, otherwise
+       * fall through to handle the next buffer or finish the window
+       * after all buffers have been output. */
+      if (stream->avail_out > 0)
+	{
+	  /* This is the only place xd3_encode returns XD3_OUTPUT */
+	  return XD3_OUTPUT;
+	}
+
+    case ENC_POSTOUT:
+
+      if (stream->avail_out != 0)
+	{
+	  stream->msg = "missed call to consume output";
+	  return XD3_INTERNAL;
+	}
+
+      /* Continue outputting one buffer at a time, until the next is NULL. */
+      if ((stream->enc_current = stream->enc_current->next_page) != NULL)
+	{
+	  goto enc_output;
+	}
+
+      stream->total_in += stream->avail_in;
+      stream->enc_state = ENC_POSTWIN;
+
+      IF_DEBUG2 (DP(RINT "[WINFINISH:%"Q"u] in=%"Q"u\n",
+		    stream->current_window,
+		    stream->total_in));
+      return XD3_WINFINISH;
+
+    case ENC_POSTWIN:
+
+      xd3_encode_reset (stream);
+
+      stream->current_window += 1;
+      stream->enc_state = ENC_INPUT;
+
+      /* If there is leftover input to flush, repeat. */
+      if (stream->buf_leftover != NULL)
+	{
+	  goto enc_flush;
+	}
+
+      /* Ready for more input. */
+      return XD3_INPUT;
+
+    default:
+      stream->msg = "invalid state";
+      return XD3_INTERNAL;
+    }
+}
+#endif /* XD3_ENCODER */
+
+/*****************************************************************
+ Client convenience functions
+ ******************************************************************/
+
+int
+xd3_process_stream (int            is_encode,
+		    xd3_stream    *stream,
+		    int          (*func) (xd3_stream *),
+		    int            close_stream,
+		    const uint8_t *input,
+		    usize_t        input_size,
+		    uint8_t       *output,
+		    usize_t       *output_size,
+		    usize_t        output_size_max)
+{
+  usize_t ipos = 0;
+  usize_t n = xd3_min (stream->winsize, input_size);
+
+  (*output_size) = 0;
+
+  stream->flags |= XD3_FLUSH;
+
+  xd3_avail_input (stream, input + ipos, n);
+  ipos += n;
+
+  for (;;)
+    {
+      int ret;
+      switch ((ret = func (stream)))
+	{
+	case XD3_OUTPUT: { /* memcpy below */ break; }
+	case XD3_INPUT: {
+	  n = xd3_min(stream->winsize, input_size - ipos);
+	  if (n == 0) 
+	    {
+	      goto done;
+	    }
+	  xd3_avail_input (stream, input + ipos, n);
+	  ipos += n;
+	  continue;
+	}
+	case XD3_GOTHEADER: { /* ignore */ continue; }
+	case XD3_WINSTART: { /* ignore */ continue; }
+	case XD3_WINFINISH: { /* ignore */ continue; }
+	case XD3_GETSRCBLK:
+	  {
+	    /* When the getblk function is NULL, it is necessary to
+	     * provide the complete source as a single block using
+	     * xd3_set_source_and_size, otherwise this error.  The
+	     * library should never ask for another source block. */
+	    stream->msg = "library requested source block";
+	    return XD3_INTERNAL;
+	  }
+	case 0:
+	  {
+	    /* xd3_encode_input/xd3_decode_input never return 0 */
+	    stream->msg = "invalid return: 0";
+	    return XD3_INTERNAL;
+	  }
+	default:
+	  return ret;
+	}
+
+      if (*output_size + stream->avail_out > output_size_max)
+	{
+	  stream->msg = "insufficient output space";
+	  return ENOSPC;
+	}
+
+      memcpy (output + *output_size, stream->next_out, stream->avail_out);
+
+      *output_size += stream->avail_out;
+
+      xd3_consume_output (stream);
+    }
+ done:
+  return (close_stream == 0) ? 0 : xd3_close_stream (stream);
+}
+
+static int
+xd3_process_memory (int            is_encode,
+		    int          (*func) (xd3_stream *),
+		    const uint8_t *input,
+		    usize_t        input_size,
+		    const uint8_t *source,
+		    usize_t        source_size,
+		    uint8_t       *output,
+		    usize_t       *output_size,
+		    usize_t        output_size_max,
+		    int            flags) {
+  xd3_stream stream;
+  xd3_config config;
+  xd3_source src;
+  int ret;
+
+  memset (& stream, 0, sizeof (stream));
+  memset (& config, 0, sizeof (config));
+
+  if (input == NULL || output == NULL) {
+    stream.msg = "invalid input/output buffer";
+    ret = XD3_INTERNAL;
+    goto exit;
+  }
+
+  config.flags = flags;
+
+  if (is_encode)
+    {
+      config.winsize = xd3_min(input_size, (usize_t) XD3_DEFAULT_WINSIZE);
+      config.sprevsz = xd3_pow2_roundup (config.winsize);
+    }
+
+  if ((ret = xd3_config_stream (&stream, &config)) != 0)
+    {
+      goto exit;
+    }
+
+  if (source != NULL)
+    {
+      memset (& src, 0, sizeof (src));
+
+      src.blksize = source_size;
+      src.onblk = source_size;
+      src.curblk = source;
+      src.curblkno = 0;
+      src.max_winsize = source_size;
+
+      if ((ret = xd3_set_source_and_size (&stream, &src, source_size)) != 0)
+	{
+	  goto exit;
+	}
+    }
+
+  if ((ret = xd3_process_stream (is_encode,
+				 & stream,
+				 func, 1,
+				 input, input_size,
+				 output,
+				 output_size,
+				 output_size_max)) != 0)
+    {
+      goto exit;
+    }
+
+ exit:
+  if (ret != 0)
+    {
+      IF_DEBUG2 (DP(RINT "process_memory: %d: %s\n", ret, stream.msg));
+    }
+  xd3_free_stream(&stream);
+  return ret;
+}
+
+int
+xd3_decode_stream (xd3_stream    *stream,
+		   const uint8_t *input,
+		   usize_t        input_size,
+		   uint8_t       *output,
+		   usize_t       *output_size,
+		   usize_t        output_size_max)
+{
+  return xd3_process_stream (0, stream, & xd3_decode_input, 1,
+			     input, input_size,
+			     output, output_size, output_size_max);
+}
+
+int
+xd3_decode_memory (const uint8_t *input,
+		   usize_t        input_size,
+		   const uint8_t *source,
+		   usize_t        source_size,
+		   uint8_t       *output,
+		   usize_t       *output_size,
+		   usize_t        output_size_max,
+		   int            flags) {
+  return xd3_process_memory (0, & xd3_decode_input,
+			     input, input_size,
+			     source, source_size,
+			     output, output_size, output_size_max,
+			     flags);
+}
+
+
+#if XD3_ENCODER
+int
+xd3_encode_stream (xd3_stream    *stream,
+		   const uint8_t *input,
+		   usize_t         input_size,
+		   uint8_t       *output,
+		   usize_t        *output_size,
+		   usize_t         output_size_max)
+{
+  return xd3_process_stream (1, stream, & xd3_encode_input, 1,
+			     input, input_size,
+			     output, output_size, output_size_max);
+}
+
+int
+xd3_encode_memory (const uint8_t *input,
+		   usize_t        input_size,
+		   const uint8_t *source,
+		   usize_t        source_size,
+		   uint8_t       *output,
+		   usize_t        *output_size,
+		   usize_t        output_size_max,
+		   int            flags) {
+  return xd3_process_memory (1, & xd3_encode_input,
+			     input, input_size,
+			     source, source_size,
+			     output, output_size, output_size_max,
+			     flags);
+}
+#endif
+
+
+/*************************************************************
+ String matching helpers
+ *************************************************************/
+
+#if XD3_ENCODER
+/* Do the initial xd3_string_match() checksum table setup.
+ * Allocations are delayed until first use to avoid allocation
+ * sometimes (e.g., perfect matches, zero-length inputs). */
+static int
+xd3_string_match_init (xd3_stream *stream)
+{
+  const int DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
+  const int DO_LARGE = (stream->src != NULL);
+
+  if (DO_LARGE && stream->large_table == NULL)
+    {
+      if ((stream->large_table =
+	   (usize_t*) xd3_alloc0 (stream, stream->large_hash.size, sizeof (usize_t))) == NULL)
+	{
+	  return ENOMEM;
+	}
+    }
+
+  if (DO_SMALL)
+    {
+      /* Subsequent calls can return immediately after checking reset. */
+      if (stream->small_table != NULL)
+	{
+	  /* The target hash table is reinitialized once per window. */
+	  /* TODO: This would not have to be reinitialized if absolute
+	   * offsets were being stored. */
+	  if (stream->small_reset)
+	    {
+	      stream->small_reset = 0;
+	      memset (stream->small_table, 0,
+		      sizeof (usize_t) * stream->small_hash.size);
+	    }
+
+	  return 0;
+	}
+
+      if ((stream->small_table =
+	   (usize_t*) xd3_alloc0 (stream,
+				  stream->small_hash.size,
+				  sizeof (usize_t))) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      /* If there is a previous table needed. */
+      if (stream->smatcher.small_lchain > 1 ||
+	  stream->smatcher.small_chain > 1)
+	{
+	  if ((stream->small_prev =
+	       (xd3_slist*) xd3_alloc (stream,
+				       stream->sprevsz,
+				       sizeof (xd3_slist))) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+	}
+    }
+
+  return 0;
+}
+
+#if XD3_USE_LARGEFILE64 && !XD3_USE_LARGESIZET
+/* This function handles the 32/64bit ambiguity -- file positions are 64bit
+ * but the hash table for source-offsets is 32bit. */
+static xoff_t
+xd3_source_cksum_offset(xd3_stream *stream, usize_t low)
+{
+  xoff_t scp = stream->srcwin_cksum_pos;
+  xoff_t s0 = scp >> 32;
+
+  usize_t sr = (usize_t) scp;
+
+  if (s0 == 0) {
+    return low;
+  }
+
+  /* This should not be >= because srcwin_cksum_pos is the next
+   * position to index. */
+  if (low > sr) {
+    return (--s0 << 32) | low;
+  }
+
+  return (s0 << 32) | low;
+}
+#else
+static xoff_t
+xd3_source_cksum_offset(xd3_stream *stream, usize_t low)
+{
+  return low;
+}
+#endif
+
+/* This function sets up the stream->src fields srcbase, srclen.  The
+ * call is delayed until these values are needed to encode a copy
+ * address.  At this point the decision has to be made. */
+static int
+xd3_srcwin_setup (xd3_stream *stream)
+{
+  xd3_source *src = stream->src;
+  xoff_t length, x;
+
+  /* Check the undecided state. */
+  XD3_ASSERT (src->srclen == 0 && src->srcbase == 0);
+
+  /* Avoid repeating this call. */
+  stream->srcwin_decided = 1;
+
+  /* If the stream is flushing, then the iopt buffer was able to
+   * contain the complete encoding.  If no copies were issued no
+   * source window is actually needed.  This prevents the VCDIFF
+   * header from including source base/len.  xd3_emit_hdr checks for
+   * srclen == 0. */
+  if (stream->enc_state == ENC_INSTR && stream->match_maxaddr == 0)
+    {
+      goto done;
+    }
+
+  /* Check for overflow, srclen is usize_t - this can't happen unless
+   * XD3_DEFAULT_SRCBACK and related parameters are extreme - should
+   * use smaller windows. */
+  length = stream->match_maxaddr - stream->match_minaddr;
+
+  x = USIZE_T_MAX;
+  if (length > x)
+    {
+      stream->msg = "source window length overflow (not 64bit)";
+      return XD3_INTERNAL;
+    }
+
+  /* If ENC_INSTR, then we know the exact source window to use because
+   * no more copies can be issued. */
+  if (stream->enc_state == ENC_INSTR)
+    {
+      src->srcbase = stream->match_minaddr;
+      src->srclen  = (usize_t) length;
+      XD3_ASSERT (src->srclen);
+      goto done;
+    }
+
+  /* Otherwise, we have to make a guess.  More copies may still be
+   * issued, but we have to decide the source window base and length
+   * now.  
+   * TODO: This may not working well in practice, more testing needed. */
+  src->srcbase = stream->match_minaddr;
+  src->srclen  = xd3_max ((usize_t) length,
+			  stream->avail_in + (stream->avail_in >> 2));
+
+  if (src->eof_known)
+    {
+      /* Note: if the source size is known, we must reduce srclen or
+       * code that expects to pass a single block w/ getblk == NULL
+       * will not function, as the code will return GETSRCBLK asking
+       * for the second block. */
+      src->srclen = xd3_min (src->srclen, xd3_source_eof(src) - src->srcbase);
+    }
+  IF_DEBUG1 (DP(RINT "[srcwin_setup_constrained] base %"Q"u len %"W"u\n",
+		src->srcbase, src->srclen));
+
+  XD3_ASSERT (src->srclen);
+ done:
+  /* Set the taroff.  This convenience variable is used even when
+     stream->src == NULL. */
+  stream->taroff = src->srclen;
+  return 0;
+}
+
+/* Sets the bounding region for a newly discovered source match, prior
+ * to calling xd3_source_extend_match().  This sets the match_maxfwd,
+ * match_maxback variables.  Note: srcpos is an absolute position
+ * (xoff_t) but the match_maxfwd, match_maxback variables are usize_t.
+ * Returns 0 if the setup succeeds, or 1 if the source position lies
+ * outside an already-decided srcbase/srclen window. */
+static int
+xd3_source_match_setup (xd3_stream *stream, xoff_t srcpos)
+{
+  xd3_source *const src = stream->src;
+  usize_t greedy_or_not;
+
+  stream->match_maxback = 0;
+  stream->match_maxfwd  = 0;
+  stream->match_back    = 0;
+  stream->match_fwd     = 0;
+
+  /* This avoids a non-blocking endless loop caused by scanning
+   * backwards across a block boundary, only to find not enough
+   * matching bytes to beat the current min_match due to a better lazy
+   * target match: the re-entry to xd3_string_match() repeats the same
+   * long match because the input position hasn't changed.  TODO: if
+   * ever duplicates are added to the source hash table, this logic
+   * won't suffice to avoid loops.  See testing/regtest.cc's
+   * TestNonBlockingProgress test! */
+  if (srcpos != 0 && srcpos == stream->match_last_srcpos)
+    {
+      IF_DEBUG2(DP(RINT "[match_setup] looping failure\n"));
+      goto bad;
+    }
+
+  /* Implement src->max_winsize, which prevents the encoder from seeking
+   * back further than the LRU cache maintaining FIFO discipline, (to
+   * avoid seeking). */
+  if (srcpos < stream->srcwin_cksum_pos &&
+      stream->srcwin_cksum_pos - srcpos > src->max_winsize)
+    {
+      IF_DEBUG2(DP(RINT "[match_setup] rejected due to src->max_winsize "
+		   "distance eof=%"Q"u srcpos=%"Q"u max_winsz=%"Q"u\n",
+		   xd3_source_eof (src),
+		   srcpos, src->max_winsize));
+      goto bad;
+    }
+
+  /* There are cases where the above test does not reject a match that
+   * will experience XD3_TOOFARBACK at the first xd3_getblk call
+   * because the input may have advanced up to one block beyond the
+   * actual EOF. */
+  IF_DEBUG2(DP(RINT "[match_setup] %"Q"u srcpos %"Q"u, "
+	       "src->max_winsize %"Q"u\n",
+	       stream->total_in + stream->input_position,
+	       srcpos, src->max_winsize));
+
+  /* Going backwards, the 1.5-pass algorithm allows some
+   * already-matched input may be covered by a longer source match.
+   * The greedy algorithm does not allow this.
+   * TODO: Measure this. */
+  if (stream->flags & XD3_BEGREEDY)
+    {
+      /* The greedy algorithm allows backward matching to the last
+       * matched position. */
+      greedy_or_not = xd3_iopt_last_matched (stream);
+    }
+  else
+    {
+      /* The 1.5-pass algorithm allows backward matching to go back as
+       * far as the unencoded offset, which is updated as instructions
+       * pass out of the iopt buffer.  If this (default) is chosen, it
+       * means xd3_iopt_erase may be called to eliminate instructions
+       * when a covering source match is found. */
+      greedy_or_not = stream->unencoded_offset;
+    }
+
+  /* Backward target match limit. */
+  XD3_ASSERT (stream->input_position >= greedy_or_not);
+  stream->match_maxback = stream->input_position - greedy_or_not;
+
+  /* Forward target match limit. */
+  XD3_ASSERT (stream->avail_in > stream->input_position);
+  stream->match_maxfwd = stream->avail_in - stream->input_position;
+
+  /* Now we take the source position into account.  It depends whether
+   * the srclen/srcbase have been decided yet. */
+  if (stream->srcwin_decided == 0)
+    {
+      /* Unrestricted case: the match can cover the entire source,
+       * 0--src->size.  We compare the usize_t
+       * match_maxfwd/match_maxback against the xoff_t
+       * src->size/srcpos values and take the min. */
+      /* TODO #if XD3_USE_LARGESIZET ? */
+      if (srcpos < stream->match_maxback)
+	{
+	  stream->match_maxback = (usize_t) srcpos;
+	}
+
+      if (src->eof_known)
+	{
+	  xoff_t srcavail = xd3_source_eof (src) - srcpos;
+
+	  if (srcavail < stream->match_maxfwd)
+	    {
+	      stream->match_maxfwd = (usize_t) srcavail;
+	    }
+	}
+
+      IF_DEBUG2(DP(RINT
+		   "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) "
+		   "unrestricted maxback %"W"u maxfwd %"W"u\n",
+		   srcpos,
+		   stream->total_in + stream->input_position,
+		   stream->match_maxback,
+		   stream->match_maxfwd));
+      goto good;
+    }
+
+  /* Decided some source window. */
+  XD3_ASSERT (src->srclen > 0);
+
+  /* Restricted case: fail if the srcpos lies outside the source window */
+  if ((srcpos < src->srcbase) ||
+      (srcpos > (src->srcbase + src->srclen)))
+    {
+      IF_DEBUG1(DP(RINT "[match_setup] restricted source window failure\n"));
+      goto bad;
+    }
+  else
+    {
+      usize_t srcavail;
+
+      srcavail = (usize_t) (srcpos - src->srcbase);
+      if (srcavail < stream->match_maxback)
+	{
+	  stream->match_maxback = srcavail;
+	}
+
+      srcavail = src->srcbase + src->srclen - srcpos;
+      if (srcavail < stream->match_maxfwd)
+	{
+	  stream->match_maxfwd = srcavail;
+	}
+
+      IF_DEBUG2(DP(RINT
+		   "[match_setup] srcpos %"Q"u (tgtpos %"Q"u) "
+		   "restricted maxback %"W"u maxfwd %"W"u\n",
+		   srcpos,
+		   stream->total_in + stream->input_position,
+		   stream->match_maxback,
+		   stream->match_maxfwd));
+      goto good;
+    }
+
+ good:
+  stream->match_state  = MATCH_BACKWARD;
+  stream->match_srcpos = srcpos;
+  stream->match_last_srcpos = srcpos;
+  return 0;
+
+ bad:
+  stream->match_state  = MATCH_SEARCHING;
+  stream->match_last_srcpos = srcpos;
+  return 1;
+}
+
+static inline usize_t
+xd3_forward_match(const uint8_t *s1c, const uint8_t *s2c, usize_t n)
+{
+  usize_t i = 0;
+#if UNALIGNED_OK
+  usize_t nint = n / sizeof(int);
+
+  if (nint >> 3)
+    {
+      usize_t j = 0;
+      const int *s1 = (const int*)s1c;
+      const int *s2 = (const int*)s2c;
+      usize_t nint_8 = nint - 8;
+
+      while (i <= nint_8 &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++]) { }
+
+      i = (i - 1) * sizeof(int);
+    }
+#endif
+
+  while (i < n && s1c[i] == s2c[i])
+    {
+      i++;
+    }
+  return i;
+}
+
+/* This function expands the source match backward and forward.  It is
+ * reentrant, since xd3_getblk may return XD3_GETSRCBLK, so most
+ * variables are kept in xd3_stream.  There are two callers of this
+ * function, the string_matching routine when a checksum match is
+ * discovered, and xd3_encode_input whenever a continuing (or initial)
+ * match is suspected.  The two callers do different things with the
+ * input_position, thus this function leaves that variable untouched.
+ * If a match is taken the resulting stream->match_fwd is left
+ * non-zero. */
+static int
+xd3_source_extend_match (xd3_stream *stream)
+{
+  int ret;
+  xd3_source *const src = stream->src;
+  xoff_t matchoff;  /* matchoff is the current right/left-boundary of
+		       the source match being tested. */
+  usize_t streamoff; /* streamoff is the current right/left-boundary
+			of the input match being tested. */
+  xoff_t tryblk;    /* tryblk, tryoff are the block, offset position
+		       of matchoff */
+  usize_t tryoff;
+  usize_t tryrem;    /* tryrem is the number of matchable bytes */
+  usize_t matched;
+
+  IF_DEBUG2(DP(RINT "[extend match] srcpos %"Q"u\n",
+	       stream->match_srcpos));
+
+  XD3_ASSERT (src != NULL);
+
+  /* Does it make sense to compute backward match AFTER forward match? */
+  if (stream->match_state == MATCH_BACKWARD)
+    {
+      /* Note: this code is practically duplicated below, substituting
+       * match_fwd/match_back and direction. */
+      matchoff  = stream->match_srcpos - stream->match_back;
+      streamoff = stream->input_position - stream->match_back;
+      xd3_blksize_div (matchoff, src, &tryblk, &tryoff);
+
+      /* this loops backward over source blocks */
+      while (stream->match_back < stream->match_maxback)
+	{
+	  /* see if we're backing across a source block boundary */
+	  if (tryoff == 0)
+	    {
+	      tryoff  = src->blksize;
+	      tryblk -= 1;
+	    }
+
+	  if ((ret = xd3_getblk (stream, tryblk)))
+	    {
+	      if (ret == XD3_TOOFARBACK)
+		{
+		  IF_DEBUG2(DP(RINT "[maxback] %"Q"u TOOFARBACK: %"W"u INP %"Q"u CKSUM %"Q"u\n",
+			       tryblk, stream->match_back,
+			       stream->total_in + stream->input_position,
+			       stream->srcwin_cksum_pos));
+
+		  /* the starting position is too far back. */
+		  if (stream->match_back == 0)
+		    {
+		      XD3_ASSERT(stream->match_fwd == 0);
+		      goto donefwd;
+		    }
+
+		  /* search went too far back, continue forward. */
+		  goto doneback;
+		}
+
+	      /* could be a XD3_GETSRCBLK failure. */
+	      return ret;
+	    }
+
+	  tryrem = xd3_min (tryoff, stream->match_maxback - stream->match_back);
+
+	  IF_DEBUG2(DP(RINT "[maxback] maxback %"W"u trysrc %"Q"u/%"W"u tgt %"W"u tryrem %"W"u\n",
+		       stream->match_maxback, tryblk, tryoff, streamoff, tryrem));
+
+	  /* TODO: This code can be optimized similar to xd3_match_forward() */
+	  for (; tryrem != 0; tryrem -= 1, stream->match_back += 1)
+	    {
+	      if (src->curblk[tryoff-1] != stream->next_in[streamoff-1])
+		{
+		  goto doneback;
+		}
+
+	      tryoff    -= 1;
+	      streamoff -= 1;
+	    }
+	}
+
+    doneback:
+      stream->match_state = MATCH_FORWARD;
+    }
+
+  XD3_ASSERT (stream->match_state == MATCH_FORWARD);
+
+  matchoff  = stream->match_srcpos + stream->match_fwd;
+  streamoff = stream->input_position + stream->match_fwd;
+  xd3_blksize_div (matchoff, src, & tryblk, & tryoff);
+
+  /* Note: practically the same code as backwards case above: same comments */
+  while (stream->match_fwd < stream->match_maxfwd)
+    {
+      if (tryoff == src->blksize)
+	{
+	  tryoff  = 0;
+	  tryblk += 1;
+	}
+
+      if ((ret = xd3_getblk (stream, tryblk)))
+	{
+	  if (ret == XD3_TOOFARBACK)
+	    {
+	      IF_DEBUG2(DP(RINT "[maxfwd] %"Q"u TOOFARBACK: %"W"u INP %"Q"u CKSUM %"Q"u\n",
+			   tryblk, stream->match_fwd,
+			   stream->total_in + stream->input_position,
+			   stream->srcwin_cksum_pos));
+	      goto donefwd;
+	    }
+
+	  /* could be a XD3_GETSRCBLK failure. */
+	  return ret;
+	}
+
+      tryrem = xd3_min(stream->match_maxfwd - stream->match_fwd,
+		   src->onblk - tryoff);
+
+      if (tryrem == 0)
+	{
+	  /* Generally, this means we have a power-of-two size source
+	   * and we just found the end-of-file, in this case it's an
+	   * empty block. */
+	  XD3_ASSERT (src->onblk < src->blksize);
+	  break;
+	}
+
+      matched = xd3_forward_match(src->curblk + tryoff,
+				  stream->next_in + streamoff,
+				  tryrem);
+      tryoff += matched;
+      streamoff += matched;
+      stream->match_fwd += matched;
+
+      if (tryrem != matched)
+	{
+	  break;
+	}
+    }
+
+ donefwd:
+  stream->match_state = MATCH_SEARCHING;
+
+  IF_DEBUG2(DP(RINT "[extend match] input %"Q"u srcpos %"Q"u len %"W"u\n",
+	       stream->input_position + stream->total_in,
+	       stream->match_srcpos,
+	       stream->match_fwd));
+
+  /* If the match ends short of the last instruction end, we probably
+   * don't want it.  There is the possibility that a copy ends short
+   * of the last copy but also goes further back, in which case we
+   * might want it.  This code does not implement such: if so we would
+   * need more complicated xd3_iopt_erase logic. */
+  if (stream->match_fwd < stream->min_match)
+    {
+      stream->match_fwd = 0;
+    }
+  else
+    {
+      usize_t total  = stream->match_fwd + stream->match_back;
+
+      /* Correct the variables to remove match_back from the equation. */
+      usize_t target_position = stream->input_position - stream->match_back;
+      usize_t match_length   = stream->match_back      + stream->match_fwd;
+      xoff_t match_position  = stream->match_srcpos    - stream->match_back;
+      xoff_t match_end       = stream->match_srcpos    + stream->match_fwd;
+
+      /* At this point we may have to erase any iopt-buffer
+       * instructions that are fully covered by a backward-extending
+       * copy. */
+      if (stream->match_back > 0)
+	{
+	  xd3_iopt_erase (stream, target_position, total);
+	}
+
+      stream->match_back = 0;
+
+      /* Update ranges.  The first source match occurs with both
+	 values set to 0. */
+      if (stream->match_maxaddr == 0 ||
+	  match_position < stream->match_minaddr)
+	{
+	  stream->match_minaddr = match_position;
+	}
+
+      if (match_end > stream->match_maxaddr)
+	{
+	  /* Note: per-window */
+	  stream->match_maxaddr = match_end;
+	}
+
+      if (match_end > stream->maxsrcaddr)
+	{
+	  /* Note: across windows */
+	  stream->maxsrcaddr = match_end;
+	}
+
+      IF_DEBUG2 ({
+	static int x = 0;
+	DP(RINT "[source match:%d] length %"W"u <inp %"Q"u %"Q"u>  <src %"Q"u %"Q"u> (%s)\n",
+	   x++,
+	   match_length,
+	   stream->total_in + target_position,
+	   stream->total_in + target_position + match_length,
+	   match_position,
+	   match_position + match_length,
+	   (stream->total_in + target_position == match_position) ? "same" : "diff");
+      });
+
+      if ((ret = xd3_found_match (stream,
+				  /* decoder position */ target_position,
+				  /* length */ match_length,
+				  /* address */ match_position,
+				  /* is_source */ 1)))
+	{
+	  return ret;
+	}
+
+      /* If the match ends with the available input: */
+      if (target_position + match_length == stream->avail_in)
+	{
+	  /* Setup continuing match for the next window. */
+	  stream->match_state  = MATCH_TARGET;
+	  stream->match_srcpos = match_end;
+	}
+    }
+
+  return 0;
+}
+
+/* Update the small hash.  Values in the small_table are offset by
+ * HASH_CKOFFSET (1) to distinguish empty buckets from real offsets. */
+static void
+xd3_scksum_insert (xd3_stream *stream,
+		   usize_t inx,
+		   usize_t scksum,
+		   usize_t pos)
+{
+  /* If we are maintaining previous duplicates. */
+  if (stream->small_prev)
+    {
+      usize_t    last_pos = stream->small_table[inx];
+      xd3_slist *pos_list = & stream->small_prev[pos & stream->sprevmask];
+
+      /* Note last_pos is offset by HASH_CKOFFSET. */
+      pos_list->last_pos = last_pos;
+    }
+
+  /* Enter the new position into the hash bucket. */
+  stream->small_table[inx] = pos + HASH_CKOFFSET;
+}
+
+#if XD3_DEBUG
+static int
+xd3_check_smatch (const uint8_t *ref0, const uint8_t *inp0,
+		  const uint8_t *inp_max, usize_t cmp_len)
+{
+  usize_t i;
+
+  for (i = 0; i < cmp_len; i += 1)
+    {
+      XD3_ASSERT (ref0[i] == inp0[i]);
+    }
+
+  if (inp0 + cmp_len < inp_max)
+    {
+      XD3_ASSERT (inp0[i] != ref0[i]);
+    }
+
+  return 1;
+}
+#endif /* XD3_DEBUG */
+
+/* When the hash table indicates a possible small string match, it
+ * calls this routine to find the best match.  The first matching
+ * position is taken from the small_table, HASH_CKOFFSET is subtracted
+ * to get the actual position.  After checking that match, if previous
+ * linked lists are in use (because stream->smatcher.small_chain > 1),
+ * previous matches are tested searching for the longest match.  If
+ * (stream->min_match > MIN_MATCH) then a lazy match is in effect.
+ */
+static usize_t
+xd3_smatch (xd3_stream *stream,
+	    usize_t base,
+	    usize_t scksum,
+	    usize_t *match_offset)
+{
+  usize_t cmp_len;
+  usize_t match_length = 0;
+  usize_t chain = (stream->min_match == MIN_MATCH ?
+                   stream->smatcher.small_chain :
+                   stream->smatcher.small_lchain);
+  const uint8_t *inp_max = stream->next_in + stream->avail_in;
+  const uint8_t *inp;
+  const uint8_t *ref;
+
+  SMALL_HASH_DEBUG1 (stream, stream->next_in + stream->input_position);
+
+  XD3_ASSERT (stream->min_match + stream->input_position <= stream->avail_in);
+
+  base -= HASH_CKOFFSET;
+
+ again:
+
+  IF_DEBUG2 (DP(RINT "smatch at base=%"W"u inp=%"W"u cksum=%"W"u\n", base,
+                stream->input_position, scksum));
+
+  /* For small matches, we can always go to the end-of-input because
+   * the matching position must be less than the input position. */
+  XD3_ASSERT (base < stream->input_position);
+
+  ref = stream->next_in + base;
+  inp = stream->next_in + stream->input_position;
+
+  SMALL_HASH_DEBUG2 (stream, ref);
+
+  /* Expand potential match forward. */
+  while (inp < inp_max && *inp == *ref)
+    {
+      ++inp;
+      ++ref;
+    }
+
+  cmp_len = (usize_t)(inp - (stream->next_in + stream->input_position));
+
+  /* Verify correctness */
+  XD3_ASSERT (xd3_check_smatch (stream->next_in + base,
+				stream->next_in + stream->input_position,
+				inp_max, cmp_len));
+
+  /* Update longest match */
+  if (cmp_len > match_length)
+    {
+      ( match_length) = cmp_len;
+      (*match_offset) = base;
+
+      /* Stop if we match the entire input or have a long_enough match. */
+      if (inp == inp_max || cmp_len >= stream->smatcher.long_enough)
+	{
+	  goto done;
+	}
+    }
+
+  /* If we have not reached the chain limit, see if there is another
+     previous position. */
+  while (--chain != 0)
+    {
+      /* Calculate the previous offset. */
+      usize_t prev_pos = stream->small_prev[base & stream->sprevmask].last_pos;
+      usize_t diff_pos;
+
+       if (prev_pos == 0)
+ 	{
+ 	  break;
+ 	}
+
+      prev_pos -= HASH_CKOFFSET;
+
+      if (prev_pos > base)
+        {
+          break;
+        }
+
+      base = prev_pos;
+
+      XD3_ASSERT (stream->input_position > base);
+      diff_pos = stream->input_position - base;
+
+      /* Stop searching if we go beyond sprevsz, since those entries
+       * are for unrelated checksum entries. */
+      if (diff_pos & ~stream->sprevmask)
+        {
+          break;
+        }
+
+      goto again;
+    }
+
+ done:
+  /* Crude efficiency test: if the match is very short and very far back, it's
+   * unlikely to help, but the exact calculation requires knowing the state of
+   * the address cache and adjacent instructions, which we can't do here.
+   * Rather than encode a probably inefficient copy here and check it later
+   * (which complicates the code a lot), do this:
+   */
+  if (match_length == 4 && stream->input_position - (*match_offset) >= 1<<14)
+    {
+      /* It probably takes >2 bytes to encode an address >= 2^14 from here */
+      return 0;
+    }
+  if (match_length == 5 && stream->input_position - (*match_offset) >= 1<<21)
+    {
+      /* It probably takes >3 bytes to encode an address >= 2^21 from here */
+      return 0;
+    }
+
+  /* It's unlikely that a window is large enough for the (match_length == 6 &&
+   * address >= 2^28) check */
+  return match_length;
+}
+
+#if XD3_DEBUG
+static void
+xd3_verify_small_state (xd3_stream    *stream,
+			const uint8_t *inp,
+			uint32_t       x_cksum)
+{
+  uint32_t state;
+  uint32_t cksum = xd3_scksum (&state, inp, stream->smatcher.small_look);
+
+  XD3_ASSERT (cksum == x_cksum);
+}
+
+static void
+xd3_verify_large_state (xd3_stream    *stream,
+			const uint8_t *inp,
+			usize_t        x_cksum)
+{
+  usize_t cksum = xd3_large_cksum (&stream->large_hash, inp, stream->smatcher.large_look);
+  XD3_ASSERT (cksum == x_cksum);
+}
+static void
+xd3_verify_run_state (xd3_stream    *stream,
+		      const uint8_t *inp,
+		      usize_t        x_run_l,
+		      uint8_t       *x_run_c)
+{
+  usize_t slook = stream->smatcher.small_look;
+  uint8_t run_c;
+  usize_t run_l = xd3_comprun (inp, slook, &run_c);
+
+  XD3_ASSERT (run_l == 0 || run_c == *x_run_c);
+  XD3_ASSERT (x_run_l > slook || run_l == x_run_l);
+}
+#endif /* XD3_DEBUG */
+
+/* This function computes more source checksums to advance the window.
+ * Called at every entrance to the string-match loop and each time
+ * stream->input_position reaches the value returned as
+ * *next_move_point.  NB: this is one of the most expensive functions
+ * in this code and also the most critical for good compression.
+ */
+static int
+xd3_srcwin_move_point (xd3_stream *stream, usize_t *next_move_point)
+{
+  /* the source file is indexed until this point */
+  xoff_t target_cksum_pos;
+  /* the absolute target file input position */
+  xoff_t absolute_input_pos;
+
+  if (stream->src->eof_known)
+    {
+      xoff_t source_size = xd3_source_eof (stream->src);
+      XD3_ASSERT(stream->srcwin_cksum_pos <= source_size);
+
+      if (stream->srcwin_cksum_pos == source_size)
+	{
+	  *next_move_point = USIZE_T_MAX;
+	  return 0;
+	}
+    }
+
+  absolute_input_pos = stream->total_in + stream->input_position;
+
+  /* Immediately read the entire window. 
+   *
+   * Note: this reverses a long held policy, at this point in the
+   * code, of advancing relatively slowly as the input is read, which
+   * results in better compression for very-similar inputs, but worse
+   * compression where data is deleted near the beginning of the file.
+   * 
+   * The new policy is simpler, somewhat slower and can benefit, or
+   * slightly worsen, compression performance. */
+  if (absolute_input_pos < stream->src->max_winsize / 2)
+    {
+      target_cksum_pos = stream->src->max_winsize;
+    }
+  else
+    {
+      /* TODO: The addition of 2 blocks here is arbitrary.  Do a
+       * better job of stream alignment based on observed source copy
+       * addresses, and when both input sizes are known, the
+       * difference in size. */
+      target_cksum_pos = absolute_input_pos +
+	stream->src->max_winsize / 2 +
+	stream->src->blksize * 2;
+      target_cksum_pos &= ~stream->src->maskby;
+    }
+
+  /* A long match may have extended past srcwin_cksum_pos.  Don't
+   * start checksumming already-matched source data. */
+  if (stream->maxsrcaddr > stream->srcwin_cksum_pos)
+    {
+      stream->srcwin_cksum_pos = stream->maxsrcaddr;
+    }
+
+  if (target_cksum_pos < stream->srcwin_cksum_pos)
+    {
+      target_cksum_pos = stream->srcwin_cksum_pos;
+    }
+
+  while (stream->srcwin_cksum_pos < target_cksum_pos &&
+	 (!stream->src->eof_known ||
+	  stream->srcwin_cksum_pos < xd3_source_eof (stream->src)))
+    {
+      xoff_t  blkno;
+      xoff_t  blkbaseoffset;
+      usize_t blkrem;
+      ssize_t oldpos;  /* Using ssize_t because of a  */
+      ssize_t blkpos;  /* do { blkpos-- }
+			  while (blkpos >= oldpos); */
+      int ret;
+      xd3_blksize_div (stream->srcwin_cksum_pos,
+		       stream->src, &blkno, &blkrem);
+      oldpos = blkrem;
+
+      if ((ret = xd3_getblk (stream, blkno)))
+	{
+	  /* TOOFARBACK should never occur here, since we read forward. */
+	  if (ret == XD3_TOOFARBACK)
+	    {
+ 	      ret = XD3_INTERNAL;
+	    }
+
+	  IF_DEBUG1 (DP(RINT
+			"[srcwin_move_point] async getblk return for %"Q"u: %s\n",
+			blkno, xd3_strerror (ret)));
+	  return ret;
+	}
+
+      IF_DEBUG1 (DP(RINT
+		    "[srcwin_move_point] block %"Q"u T=%"Q"u S=%"Q"u L=%"Q"u EOF=%"Q"u %s\n",
+		    blkno,
+		    stream->total_in + stream->input_position,
+		    stream->srcwin_cksum_pos,
+		    target_cksum_pos,
+		    xd3_source_eof (stream->src),
+		    stream->src->eof_known ? "known" : "unknown"));
+
+      blkpos = xd3_bytes_on_srcblk (stream->src, blkno);
+
+      if (blkpos < (ssize_t) stream->smatcher.large_look)
+	{
+	  stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
+	  IF_DEBUG2 (DP(RINT "[srcwin_move_point] continue (end-of-block): %"Z"d\n", blkpos));
+	  continue;
+	}
+
+      /* This inserts checksums for the entire block, in reverse,
+       * starting from the end of the block.  This logic does not test
+       * stream->srcwin_cksum_pos because it always advances it to the
+       * start of the next block.
+       *
+       * oldpos is the srcwin_cksum_pos within this block.  blkpos is
+       * the number of bytes available.  Each iteration inspects
+       * large_look bytes then steps back large_step bytes.  The
+       * if-stmt above ensures at least one large_look of data. */
+      blkpos -= stream->smatcher.large_look;
+      blkbaseoffset = stream->src->blksize * blkno;
+
+      do
+	{
+	  /* TODO: This would be significantly faster if the compiler
+	   * knew stream->smatcher.large_look (which the template for
+	   * xd3_string_match_* allows). */
+	  usize_t cksum = xd3_large_cksum (&stream->large_hash, 
+					   stream->src->curblk + blkpos,
+					   stream->smatcher.large_look);
+	  usize_t hval = xd3_checksum_hash (& stream->large_hash, cksum);
+
+	  stream->large_table[hval] =
+	    (usize_t) (blkbaseoffset +
+		       (xoff_t)(blkpos + HASH_CKOFFSET));
+
+	  IF_DEBUG (stream->large_ckcnt += 1);
+
+	  blkpos -= stream->smatcher.large_step;
+	}
+      while (blkpos >= oldpos);
+
+      stream->srcwin_cksum_pos = (blkno + 1) * stream->src->blksize;
+    }
+
+  IF_DEBUG1 (DP(RINT
+		"[srcwin_move_point] exited loop T=%"Q"u "
+		"S=%"Q"u EOF=%"Q"u %s\n",
+		stream->total_in + stream->input_position,
+		stream->srcwin_cksum_pos,
+		xd3_source_eof (stream->src),
+		stream->src->eof_known ? "known" : "unknown"));
+
+  if (stream->src->eof_known)
+    {
+      xoff_t source_size = xd3_source_eof (stream->src);
+      if (stream->srcwin_cksum_pos >= source_size)
+	{
+	  /* This invariant is needed for xd3_source_cksum_offset() */
+	  stream->srcwin_cksum_pos = source_size;
+	  *next_move_point = USIZE_T_MAX;
+	  IF_DEBUG1 (DP(RINT
+			"[srcwin_move_point] finished with source input\n"));
+	  return 0;
+	}
+    }
+
+  /* How long until this function should be called again. */
+  XD3_ASSERT(stream->srcwin_cksum_pos >= target_cksum_pos);
+
+  *next_move_point = stream->input_position +
+    stream->src->blksize -
+    ((stream->srcwin_cksum_pos - target_cksum_pos) & stream->src->maskby);
+  
+  IF_DEBUG2 (DP(RINT
+		"[srcwin_move_point] finished T=%"Q"u "
+		"S=%"Q"u L=%"Q"u EOF=%"Q"u %s again in %"W"u\n",
+		stream->total_in + stream->input_position,
+		stream->srcwin_cksum_pos,
+		target_cksum_pos,
+		xd3_source_eof (stream->src),
+		stream->src->eof_known ? "known" : "unknown",
+		*next_move_point - stream->input_position));
+
+  return 0;
+}
+
+#endif /* XD3_ENCODER */
+
+/********************************************************************
+ TEMPLATE pass
+ *********************************************************************/
+
+#endif /* __XDELTA3_C_INLINE_PASS__ */
+#ifdef __XDELTA3_C_TEMPLATE_PASS__
+
+#if XD3_ENCODER
+
+/********************************************************************
+ Templates
+ *******************************************************************/
+
+/* Template macros */
+#define XD3_TEMPLATE(x)      XD3_TEMPLATE2(x,TEMPLATE)
+#define XD3_TEMPLATE2(x,n)   XD3_TEMPLATE3(x,n)
+#define XD3_TEMPLATE3(x,n)   x ## n
+#define XD3_STRINGIFY(x)     XD3_STRINGIFY2(x)
+#define XD3_STRINGIFY2(x)    #x
+
+static int XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream);
+
+static const xd3_smatcher XD3_TEMPLATE(__smatcher_) =
+{
+  XD3_STRINGIFY(TEMPLATE),
+  XD3_TEMPLATE(xd3_string_match_),
+#if SOFTCFG == 1
+  0, 0, 0, 0, 0, 0, 0
+#else
+  LLOOK, LSTEP, SLOOK, SCHAIN, SLCHAIN, MAXLAZY, LONGENOUGH
+#endif
+};
+
+static int
+XD3_TEMPLATE(xd3_string_match_) (xd3_stream *stream)
+{
+  const int      DO_SMALL = ! (stream->flags & XD3_NOCOMPRESS);
+  const int      DO_LARGE = (stream->src != NULL);
+  const int      DO_RUN   = (1);
+
+  const uint8_t *inp;
+  uint32_t       scksum = 0;
+  uint32_t       scksum_state = 0;
+  usize_t        lcksum = 0;
+  usize_t        sinx;
+  usize_t        linx;
+  uint8_t        run_c;
+  usize_t        run_l;
+  int            ret;
+  usize_t        match_length;
+  usize_t        match_offset = 0;
+  usize_t        next_move_point = 0;
+
+  IF_DEBUG2(DP(RINT "[string_match] initial entry %"W"u\n", stream->input_position));
+
+  /* If there will be no compression due to settings or short input,
+   * skip it entirely. */
+  if (! (DO_SMALL || DO_LARGE || DO_RUN) ||
+      stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; }
+
+  if ((ret = xd3_string_match_init (stream))) { return ret; }
+
+  /* The restartloop label is reached when the incremental loop state
+   * needs to be reset. */
+ restartloop:
+
+  IF_DEBUG2(DP(RINT "[string_match] restartloop %"W"u\n", stream->input_position));
+
+  /* If there is not enough input remaining for any kind of match,
+     skip it. */
+  if (stream->input_position + SLOOK > stream->avail_in) { goto loopnomore; }
+
+  /* Now reset the incremental loop state: */
+
+  /* The min_match variable is updated to avoid matching the same lazy
+   * match over and over again.  For example, if you find a (small)
+   * match of length 9 at one position, you will likely find a match
+   * of length 8 at the next position. */
+  if (xd3_iopt_last_matched (stream) > stream->input_position)
+    {
+      stream->min_match = xd3_max (MIN_MATCH,
+				   1 + xd3_iopt_last_matched(stream) -
+				   stream->input_position);
+    }
+  else
+    {
+      stream->min_match = MIN_MATCH;
+    }
+
+  /* The current input byte. */
+  inp = stream->next_in + stream->input_position;
+
+  /* Small match state. */
+  if (DO_SMALL)
+    {
+      scksum = xd3_scksum (&scksum_state, inp, SLOOK);
+    }
+
+  /* Run state. */
+  if (DO_RUN)
+    {
+      run_l = xd3_comprun (inp, SLOOK, & run_c);
+    }
+
+  /* Large match state.  We continue the loop even after not enough
+   * bytes for LLOOK remain, so always check stream->input_position in
+   * DO_LARGE code. */
+  if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in))
+    {
+      /* Source window: next_move_point is the point that
+       * stream->input_position must reach before computing more
+       * source checksum.  Note: this is called unconditionally
+       * the first time after reentry, subsequent calls will be
+       * avoided if next_move_point is > input_position */
+      if ((ret = xd3_srcwin_move_point (stream, & next_move_point)))
+	{
+	  return ret;
+	}
+
+      lcksum = xd3_large_cksum (&stream->large_hash, inp, LLOOK);
+    }
+
+  /* TRYLAZYLEN: True if a certain length match should be followed by
+   * lazy search.  This checks that LEN is shorter than MAXLAZY and
+   * that there is enough leftover data to consider lazy matching.
+   * "Enough" is set to 2 since the next match will start at the next
+   * offset, it must match two extra characters. */
+#define TRYLAZYLEN(LEN,POS,MAX) ((MAXLAZY) > 0 && (LEN) < (MAXLAZY) \
+				 && (POS) + (LEN) <= (MAX) - 2)
+
+  /* HANDLELAZY: This statement is called each time an instruciton is
+   * emitted (three cases).  If the instruction is large enough, the
+   * loop is restarted, otherwise lazy matching may ensue. */
+#define HANDLELAZY(mlen) \
+  if (TRYLAZYLEN ((mlen), (stream->input_position), (stream->avail_in))) \
+    { stream->min_match = (mlen) + LEAST_MATCH_INCR; goto updateone; } \
+  else \
+    { stream->input_position += (mlen); goto restartloop; }
+
+  /* Now loop over one input byte at a time until a match is found... */
+  for (;; inp += 1, stream->input_position += 1)
+    {
+      /* Now we try three kinds of string match in order of expense:
+       * run, large match, small match. */
+
+      /* Expand the start of a RUN.  The test for (run_l == SLOOK)
+       * avoids repeating this check when we pass through a run area
+       * performing lazy matching.  The run is only expanded once when
+       * the min_match is first reached.  If lazy matching is
+       * performed, the run_l variable will remain inconsistent until
+       * the first non-running input character is reached, at which
+       * time the run_l may then again grow to SLOOK. */
+      if (DO_RUN && run_l == SLOOK)
+	{
+	  usize_t max_len = stream->avail_in - stream->input_position;
+
+	  IF_DEBUG (xd3_verify_run_state (stream, inp, run_l, &run_c));
+
+	  while (run_l < max_len && inp[run_l] == run_c) { run_l += 1; }
+
+	  /* Output a RUN instruction. */
+	  if (run_l >= stream->min_match && run_l >= MIN_RUN)
+	    {
+	      if ((ret = xd3_emit_run (stream, stream->input_position,
+				       run_l, &run_c))) { return ret; }
+
+	      HANDLELAZY (run_l);
+	    }
+	}
+
+      /* If there is enough input remaining. */
+      if (DO_LARGE && (stream->input_position + LLOOK <= stream->avail_in))
+	{
+	  if ((stream->input_position >= next_move_point) &&
+	      (ret = xd3_srcwin_move_point (stream, & next_move_point)))
+	    {
+	      return ret;
+	    }
+
+	  linx = xd3_checksum_hash (& stream->large_hash, lcksum);
+
+	  IF_DEBUG (xd3_verify_large_state (stream, inp, lcksum));
+
+	  if (stream->large_table[linx] != 0)
+	    {
+	      /* the match_setup will fail if the source window has
+	       * been decided and the match lies outside it.
+	       * OPT: Consider forcing a window at this point to
+	       * permit a new source window. */
+	      xoff_t adj_offset =
+		xd3_source_cksum_offset(stream,
+					stream->large_table[linx] -
+					HASH_CKOFFSET);
+	      if (xd3_source_match_setup (stream, adj_offset) == 0)
+		{
+		  if ((ret = xd3_source_extend_match (stream)))
+		    {
+		      return ret;
+		    }
+
+		  /* Update stream position.  match_fwd is zero if no
+		   * match. */
+		  if (stream->match_fwd > 0)
+		    {
+		      HANDLELAZY (stream->match_fwd);
+		    }
+		}
+	    }
+	}
+
+      /* Small matches. */
+      if (DO_SMALL)
+	{
+	  sinx = xd3_checksum_hash (& stream->small_hash, scksum);
+
+	  /* Verify incremental state in debugging mode. */
+	  IF_DEBUG (xd3_verify_small_state (stream, inp, scksum));
+
+	  /* Search for the longest match */
+	  if (stream->small_table[sinx] != 0)
+	    {
+	      match_length = xd3_smatch (stream,
+					 stream->small_table[sinx],
+					 scksum,
+					 & match_offset);
+	    }
+	  else
+	    {
+	      match_length = 0;
+	    }
+
+	  /* Insert a hash for this string. */
+	  xd3_scksum_insert (stream, sinx, scksum, stream->input_position);
+
+	  /* Maybe output a COPY instruction */
+	  if (match_length >= stream->min_match)
+	    {
+	      IF_DEBUG2 ({
+		static int x = 0;
+		DP(RINT "[target match:%d] <inp %"W"u %"W"u>  <cpy %"W"u %"W"u> "
+		   "(-%"W"d) [ %"W"u bytes ]\n",
+		   x++,
+		   stream->input_position,
+		   stream->input_position + match_length,
+		   match_offset,
+		   match_offset + match_length,
+		   stream->input_position - match_offset,
+		   match_length);
+	      });
+
+	      if ((ret = xd3_found_match (stream,
+					  /* decoder position */
+					  stream->input_position,
+					  /* length */ match_length,
+					  /* address */ (xoff_t) match_offset,
+					  /* is_source */ 0)))
+		{
+		  return ret;
+		}
+
+	      /* Copy instruction. */
+	      HANDLELAZY (match_length);
+	    }
+	}
+
+      /* The logic above prevents excess work during lazy matching by
+       * increasing min_match to avoid smaller matches.  Each time we
+       * advance stream->input_position by one, the minimum match
+       * shortens as well.  */
+      if (stream->min_match > MIN_MATCH)
+	{
+	  stream->min_match -= 1;
+	}
+
+    updateone:
+
+      /* See if there are no more incremental cksums to compute. */
+      if (stream->input_position + SLOOK == stream->avail_in)
+	{
+	  goto loopnomore;
+	}
+
+      /* Compute next RUN, CKSUM */
+      if (DO_RUN)
+	{
+	  NEXTRUN (inp[SLOOK]);
+	}
+
+      if (DO_SMALL)
+	{
+	  scksum = xd3_small_cksum_update (&scksum_state, inp, SLOOK);
+	}
+
+      if (DO_LARGE && (stream->input_position + LLOOK < stream->avail_in))
+	{
+	  lcksum = xd3_large_cksum_update (&stream->large_hash, lcksum, inp, LLOOK);
+	}
+    }
+
+ loopnomore:
+  return 0;
+}
+
+#endif /* XD3_ENCODER */
+#endif /* __XDELTA3_C_TEMPLATE_PASS__ */
diff --git a/third-party/xdelta3/xdelta3/xdelta3.h b/third-party/xdelta3/xdelta3/xdelta3.h
new file mode 100644
index 0000000000..b9b6fe0261
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3.h
@@ -0,0 +1,1476 @@
+/* xdelta3 - delta compression tools and library
+   Copyright 2016 Joshua MacDonald
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+/* To learn more about Xdelta, start by reading xdelta3.c.  If you are
+ * ready to use the API, continue reading here.  There are two
+ * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen
+ * or so related calls.  This interface is styled after Zlib. */
+
+#ifndef _XDELTA3_H_
+#define _XDELTA3_H_
+
+#define _POSIX_SOURCE 200112L
+#define _ISOC99_SOURCE
+#define _C99_SOURCE
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+/****************************************************************/
+
+/* Default configured value of stream->winsize.  If the program
+ * supplies xd3_encode_input() with data smaller than winsize the
+ * stream will automatically buffer the input, otherwise the input
+ * buffer is used directly.
+ */
+#ifndef XD3_DEFAULT_WINSIZE
+#define XD3_DEFAULT_WINSIZE (1U << 23)
+#endif
+
+/* Default total size of the source window used in xdelta3-main.h */
+#ifndef XD3_DEFAULT_SRCWINSZ
+#define XD3_DEFAULT_SRCWINSZ (1U << 26)
+#endif
+
+/* When Xdelta requests a memory allocation for certain buffers, it
+ * rounds up to units of at least this size.  The code assumes (and
+ * asserts) that this is a power-of-two. */
+#ifndef XD3_ALLOCSIZE
+#define XD3_ALLOCSIZE (1U<<14)
+#endif
+
+/* The XD3_HARDMAXWINSIZE parameter is a safety mechanism to protect
+ * decoders against malicious files.  The decoder will never decode a
+ * window larger than this.  If the file specifies VCD_TARGET the
+ * decoder may require two buffers of this size.
+ *
+ * 8-16MB is reasonable, probably don't need to go larger. */
+#ifndef XD3_HARDMAXWINSIZE
+#define XD3_HARDMAXWINSIZE (1U<<26)
+#endif
+/* The IOPT_SIZE value sets the size of a buffer used to batch
+ * overlapping copy instructions before they are optimized by picking
+ * the best non-overlapping ranges.  The larger this buffer, the
+ * longer a forced xd3_srcwin_setup() decision is held off.  Setting
+ * this value to 0 causes an unlimited buffer to be used. */
+#ifndef XD3_DEFAULT_IOPT_SIZE
+#define XD3_DEFAULT_IOPT_SIZE    (1U<<15)
+#endif
+
+/* The maximum distance backward to search for small matches */
+#ifndef XD3_DEFAULT_SPREVSZ
+#define XD3_DEFAULT_SPREVSZ (1U<<18)
+#endif
+
+/* The default compression level */
+#ifndef XD3_DEFAULT_LEVEL
+#define XD3_DEFAULT_LEVEL 3
+#endif
+
+#ifndef XD3_DEFAULT_SECONDARY_LEVEL
+#define XD3_DEFAULT_SECONDARY_LEVEL 6
+#endif
+
+#ifndef XD3_USE_LARGEFILE64
+#define XD3_USE_LARGEFILE64 1
+#endif
+
+/* The source window size is limited to 2GB unless
+ * XD3_USE_LARGESIZET is defined to 1. */
+#ifndef XD3_USE_LARGESIZET
+#define XD3_USE_LARGESIZET 1
+#endif
+
+/* Sizes and addresses within VCDIFF windows are represented as usize_t
+ *
+ * For source-file offsets and total file sizes, total input and
+ * output counts, the xoff_t type is used.  The decoder and encoder
+ * generally check for overflow of the xoff_t size (this is tested at
+ * the 32bit boundary [xdelta3-test.h]).
+ */
+#ifndef _WIN32
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <stdint.h>
+#else /* WIN32 case */
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+
+#ifndef WINVER
+#if XD3_USE_LARGEFILE64
+/* 64 bit file offsets: uses GetFileSizeEx and SetFilePointerEx. */
+#define WINVER		0x0500
+#define _WIN32_WINNT	0x0500
+#else /* xoff_t is 32bit */
+/* 32 bit file offsets: uses GetFileSize and SetFilePointer. */
+#define WINVER		0x0400
+#define _WIN32_WINNT	0x0400
+#endif /* if XD3_USE_LARGEFILE64 */
+#endif /* ifndef WINVER */
+
+#include <windows.h>
+
+/* _MSV_VER is defined by Microsoft tools, not by Mingw32 */
+#ifdef _MSC_VER
+typedef signed int     ssize_t;
+typedef int pid_t;
+#if _MSC_VER < 1600
+typedef unsigned char  uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned long  uint32_t;
+typedef ULONGLONG      uint64_t;
+#else /* _MSC_VER >= 1600 */
+/* For MSVC10 and above */
+#include <stdint.h>
+#define inline __inline
+#endif /* _MSC_VER < 1600 */
+#else /* _MSC_VER not defined  */
+/* Mingw32 */
+#include <stdint.h>
+#endif /* _MSC_VER defined */
+
+#endif /* _WIN32 defined */
+
+/* Settings based on the size of xoff_t (32 vs 64 file offsets) */
+#if XD3_USE_LARGEFILE64
+/* xoff_t is a 64-bit type */
+#define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops. */
+
+#ifndef _LARGEFILE_SOURCE
+#define _LARGEFILE_SOURCE
+#endif
+
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
+static_assert(SIZEOF_SIZE_T == sizeof(size_t), "SIZEOF_SIZE_T not correctly set");
+static_assert(SIZEOF_UNSIGNED_LONG_LONG == sizeof(unsigned long long), "SIZEOF_UNSIGNED_LONG_LONG not correctly set");
+
+/* Set a xoff_t typedef and the "Q" printf insert. */
+#if defined(_WIN32)
+typedef uint64_t xoff_t;
+#define Q "I64"
+#elif SIZEOF_UNSIGNED_LONG == 8
+typedef unsigned long xoff_t;
+#define Q "l"
+#elif SIZEOF_SIZE_T == 8
+typedef size_t xoff_t;
+#define Q "z"
+#elif SIZEOF_UNSIGNED_LONG_LONG == 8
+typedef unsigned long long xoff_t;
+#define Q "ll"
+#endif /* typedef and #define Q */
+
+#define SIZEOF_XOFF_T 8
+
+#else /* XD3_USE_LARGEFILE64 == 0 */
+
+#if SIZEOF_UNSIGNED_INT == 4
+typedef unsigned int xoff_t;
+#elif SIZEOF_UNSIGNED_LONG == 4
+typedef unsigned long xoff_t;
+#else
+typedef uint32_t xoff_t;
+#endif /* xoff_t is 32 bits */
+
+#define SIZEOF_XOFF_T 4
+#define Q
+#endif /* 64 vs 32 bit xoff_t */
+
+/* Settings based on the size of usize_t (32 and 64 bit window size) */
+#if XD3_USE_LARGESIZET
+
+/* Set a usize_ttypedef and the "W" printf insert. */
+#if defined(_WIN32)
+typedef uint64_t usize_t;
+#define W "I64"
+#elif SIZEOF_UNSIGNED_LONG == 8
+typedef unsigned long usize_t;
+#define W "l"
+#elif SIZEOF_SIZE_T == 8
+typedef size_t usize_t;
+#define W "z"
+#elif SIZEOF_UNSIGNED_LONG_LONG == 8
+typedef unsigned long long usize_t;
+#define W "ll"
+#endif /* typedef and #define W */
+
+#define SIZEOF_USIZE_T 8
+
+#else /* XD3_USE_LARGESIZET == 0 */
+
+#if SIZEOF_UNSIGNED_INT == 4
+typedef unsigned int usize_t;
+#elif SIZEOF_UNSIGNED_LONG == 4
+typedef unsigned long usize_t;
+#else
+typedef uint32_t usize_t;
+#endif /* usize_t is 32 bits */
+
+#define SIZEOF_USIZE_T 4
+#define W
+
+#endif /* 64 vs 32 bit usize_t */
+
+/* Settings based on the size of size_t (the system-provided,
+ * usually-but-maybe-not an unsigned type) */
+#if SIZEOF_SIZE_T == 4
+#define Z "z"
+#elif SIZEOF_SIZE_T == 8
+#ifdef _WIN32
+#define Z "I64"
+#else /* !_WIN32 */
+#define Z "z"
+#endif /* Windows or not */
+#else
+#error Bad configure script
+#endif /* size_t printf flags */
+
+#define USE_UINT32 (SIZEOF_USIZE_T == 4 || \
+		    SIZEOF_XOFF_T == 4 || REGRESSION_TEST)
+#define USE_UINT64 (SIZEOF_USIZE_T == 8 || \
+		    SIZEOF_XOFF_T == 8 || REGRESSION_TEST)
+
+#ifndef UNALIGNED_OK
+#ifdef HAVE_ALIGNED_ACCESS_REQUIRED
+#define UNALIGNED_OK 0
+#else
+/* This generally includes all Windows builds. */
+#define UNALIGNED_OK 1
+#endif
+#endif
+
+/**********************************************************************/
+
+/* Whether to build the encoder, otherwise only build the decoder. */
+#ifndef XD3_ENCODER
+#define XD3_ENCODER 1
+#endif
+
+/* The code returned when main() fails, also defined in system
+   includes. */
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+
+/* REGRESSION TEST enables the "xdelta3 test" command, which runs a
+   series of self-tests. */
+#ifndef REGRESSION_TEST
+#define REGRESSION_TEST 0
+#endif
+
+/* XD3_DEBUG=1 enables assertions and various statistics.  Levels > 1
+ * enable some additional output only useful during development and
+ * debugging. */
+#ifndef XD3_DEBUG
+#define XD3_DEBUG 0
+#endif
+
+#ifndef PYTHON_MODULE
+#define PYTHON_MODULE 0
+#endif
+
+#ifndef SWIG_MODULE
+#define SWIG_MODULE 0
+#endif
+
+#ifndef NOT_MAIN
+#define NOT_MAIN 0
+#endif
+
+/* There are three string matching functions supplied: one fast, one
+ * slow (default), and one soft-configurable.  To disable any of
+ * these, use the following definitions. */
+#ifndef XD3_BUILD_SLOW
+#define XD3_BUILD_SLOW 1
+#endif
+#ifndef XD3_BUILD_FAST
+#define XD3_BUILD_FAST 1
+#endif
+#ifndef XD3_BUILD_FASTER
+#define XD3_BUILD_FASTER 1
+#endif
+#ifndef XD3_BUILD_FASTEST
+#define XD3_BUILD_FASTEST 1
+#endif
+#ifndef XD3_BUILD_SOFT
+#define XD3_BUILD_SOFT 1
+#endif
+#ifndef XD3_BUILD_DEFAULT
+#define XD3_BUILD_DEFAULT 1
+#endif
+
+#if XD3_DEBUG
+#include <stdio.h>
+#endif
+
+typedef struct _xd3_stream             xd3_stream;
+typedef struct _xd3_source             xd3_source;
+typedef struct _xd3_hash_cfg           xd3_hash_cfg;
+typedef struct _xd3_smatcher           xd3_smatcher;
+typedef struct _xd3_rinst              xd3_rinst;
+typedef struct _xd3_dinst              xd3_dinst;
+typedef struct _xd3_hinst              xd3_hinst;
+typedef struct _xd3_winst              xd3_winst;
+typedef struct _xd3_rpage              xd3_rpage;
+typedef struct _xd3_addr_cache         xd3_addr_cache;
+typedef struct _xd3_output             xd3_output;
+typedef struct _xd3_desect             xd3_desect;
+typedef struct _xd3_iopt_buflist       xd3_iopt_buflist;
+typedef struct _xd3_rlist              xd3_rlist;
+typedef struct _xd3_sec_type           xd3_sec_type;
+typedef struct _xd3_sec_cfg            xd3_sec_cfg;
+typedef struct _xd3_sec_stream         xd3_sec_stream;
+typedef struct _xd3_config             xd3_config;
+typedef struct _xd3_code_table_desc    xd3_code_table_desc;
+typedef struct _xd3_code_table_sizes   xd3_code_table_sizes;
+typedef struct _xd3_slist              xd3_slist;
+typedef struct _xd3_whole_state        xd3_whole_state;
+typedef struct _xd3_wininfo            xd3_wininfo;
+
+/* The stream configuration has three callbacks functions, all of
+ * which may be supplied with NULL values.  If config->getblk is
+ * provided as NULL, the stream returns XD3_GETSRCBLK. */
+
+typedef void*  (xd3_alloc_func)    (void       *opaque,
+				    size_t      items,
+				    usize_t     size);
+typedef void   (xd3_free_func)     (void       *opaque,
+				    void       *address);
+
+typedef int    (xd3_getblk_func)   (xd3_stream *stream,
+				    xd3_source *source,
+				    xoff_t      blkno);
+
+typedef const xd3_dinst* (xd3_code_table_func) (void);
+
+
+#ifdef _WIN32
+#define vsnprintf_func _vsnprintf
+#define snprintf_func _snprintf
+#else
+#define vsnprintf_func vsnprintf
+#define snprintf_func snprintf
+#endif
+#define short_sprintf(sb,fmt,...) \
+  snprintf_func((sb).buf,sizeof((sb).buf),fmt,__VA_ARGS__)
+
+/* Type used for short snprintf calls. */
+typedef struct {
+  char buf[48];
+} shortbuf;
+
+#ifndef PRINTF_ATTRIBUTE
+#ifdef __GNUC__
+#define PRINTF_ATTRIBUTE(x,y) __attribute__ ((__format__ (__printf__, x, y)))
+#else
+#define PRINTF_ATTRIBUTE(x,y)
+#endif
+#endif
+
+/* Underlying xprintf() */
+int xsnprintf_func (char *str, size_t n, const char *fmt, ...)
+  PRINTF_ATTRIBUTE(3,4);
+
+/* XPR(NT "", ...) (used by main) prefixes an "xdelta3: " to the output. */
+void xprintf(const char *fmt, ...) PRINTF_ATTRIBUTE(1,2);
+#define XPR xprintf
+#define NT "xdelta3: "
+#define NTR ""
+/* DP(RINT ...) */
+#define DP   xprintf
+#define RINT ""
+
+#if XD3_DEBUG
+#define XD3_ASSERT(x)				     \
+  do {						     \
+    if (! (x)) {				     \
+      DP(RINT "%s:%d: XD3 assertion failed: %s\n",   \
+	 __FILE__, __LINE__, #x);		     \
+      abort (); } } while (0)
+#else
+#define XD3_ASSERT(x) (void)0
+#endif  /* XD3_DEBUG */
+
+#define xd3_max(x,y) ((x) < (y) ? (y) : (x))
+#define xd3_min(x,y) ((x) < (y) ? (x) : (y))
+
+/****************************************************************
+ PUBLIC ENUMS
+ ******************************************************************/
+
+/* These are the five ordinary status codes returned by the
+ * xd3_encode_input() and xd3_decode_input() state machines. */
+typedef enum {
+
+  /* An application must be prepared to handle these five return
+   * values from either xd3_encode_input or xd3_decode_input, except
+   * in the case of no-source compression, in which case XD3_GETSRCBLK
+   * is never returned.  More detailed comments for these are given in
+   * xd3_encode_input and xd3_decode_input comments, below. */
+  XD3_INPUT     = -17703, /* need input */
+  XD3_OUTPUT    = -17704, /* have output */
+  XD3_GETSRCBLK = -17705, /* need a block of source input (with no
+			   * xd3_getblk function), a chance to do
+			   * non-blocking read. */
+  XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF &
+			     first window header */
+  XD3_WINSTART  = -17707, /* notification: returned before a window is
+			   * processed, giving a chance to
+			   * XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that
+			   * window. */
+  XD3_WINFINISH  = -17708, /* notification: returned after
+			      encode/decode & output for a window */
+  XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by
+			      getblk() if the block is too old */
+  XD3_INTERNAL   = -17710, /* internal error */
+  XD3_INVALID    = -17711, /* invalid config */
+  XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */
+  XD3_NOSECOND    = -17713, /* when secondary compression finds no
+			       improvement. */
+  XD3_UNIMPLEMENTED = -17714  /* currently VCD_TARGET, VCD_CODETABLE */
+} xd3_rvalues;
+
+/* special values in config->flags */
+typedef enum
+{
+  XD3_JUST_HDR       = (1 << 1),   /* used by VCDIFF tools, see
+				      xdelta3-main.h. */
+  XD3_SKIP_WINDOW    = (1 << 2),   /* used by VCDIFF tools, see
+				      xdelta3-main.h. */
+  XD3_SKIP_EMIT      = (1 << 3),   /* used by VCDIFF tools, see
+				      xdelta3-main.h. */
+  XD3_FLUSH          = (1 << 4),   /* flush the stream buffer to
+				      prepare for
+				      xd3_stream_close(). */
+
+  XD3_SEC_DJW        = (1 << 5),   /* use DJW static huffman */
+  XD3_SEC_FGK        = (1 << 6),   /* use FGK adaptive huffman */
+  XD3_SEC_LZMA       = (1 << 24),  /* use LZMA secondary */
+
+  XD3_SEC_TYPE       = (XD3_SEC_DJW | XD3_SEC_FGK | XD3_SEC_LZMA),
+
+  XD3_SEC_NODATA     = (1 << 7),   /* disable secondary compression of
+				      the data section. */
+  XD3_SEC_NOINST     = (1 << 8),   /* disable secondary compression of
+				      the inst section. */
+  XD3_SEC_NOADDR     = (1 << 9),   /* disable secondary compression of
+				      the addr section. */
+
+  XD3_SEC_NOALL      = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR),
+
+  XD3_ADLER32        = (1 << 10),  /* enable checksum computation in
+				      the encoder. */
+  XD3_ADLER32_NOVER  = (1 << 11),  /* disable checksum verification in
+				      the decoder. */
+
+  XD3_NOCOMPRESS     = (1 << 13),  /* disable ordinary data
+				    * compression feature, only search
+				    * the source, not the target. */
+  XD3_BEGREEDY       = (1 << 14),  /* disable the "1.5-pass
+				    * algorithm", instead use greedy
+				    * matching.  Greedy is off by
+				    * default. */
+  XD3_ADLER32_RECODE = (1 << 15),  /* used by "recode". */
+
+  /* 4 bits to set the compression level the same as the command-line
+   * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag,
+   * and is independent of compression level).  This is for
+   * convenience, especially with xd3_encode_memory(). */
+
+  XD3_COMPLEVEL_SHIFT = 20,  /* 20 - 23 */
+  XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT),
+  XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT),
+  XD3_COMPLEVEL_2 = (2 << XD3_COMPLEVEL_SHIFT),
+  XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT),
+  XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT),
+  XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT)
+
+} xd3_flags;
+
+/* The values of this enumeration are set in xd3_config using the
+ * smatch_cfg variable.  It can be set to default, slow, fast, etc.,
+ * and soft. */
+typedef enum
+{
+  XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits,
+			     else default. */
+  XD3_SMATCH_SLOW    = 1,
+  XD3_SMATCH_FAST    = 2,
+  XD3_SMATCH_FASTER  = 3,
+  XD3_SMATCH_FASTEST = 4,
+  XD3_SMATCH_SOFT    = 5
+} xd3_smatch_cfg;
+
+/*********************************************************************
+ PRIVATE ENUMS
+**********************************************************************/
+
+/* stream->match_state is part of the xd3_encode_input state machine
+ *  for source matching:
+ *
+ *  1. the XD3_GETSRCBLK block-read mechanism means reentrant matching
+ *  2. this state spans encoder windows: a match and end-of-window
+ *  will continue in the next 3. the initial target byte and source
+ *  byte are a presumed match, to avoid some computation in case the
+ *  inputs are identical.
+ */
+typedef enum {
+
+  MATCH_TARGET    = 0, /* in this state, attempt to match the start of
+			* the target with the previously set source
+			* address (initially 0). */
+  MATCH_BACKWARD  = 1, /* currently expanding a match backward in the
+			  source/target. */
+  MATCH_FORWARD   = 2, /* currently expanding a match forward in the
+			  source/target. */
+  MATCH_SEARCHING = 3  /* currently searching for a match. */
+
+} xd3_match_state;
+
+/* The xd3_encode_input state machine steps through these states in
+ * the following order.  The matcher is reentrant and returns
+ * XD3_INPUT whenever it requires more data.  After receiving
+ * XD3_INPUT, if the application reads EOF it should call
+ * xd3_stream_close().
+ */
+typedef enum {
+
+  ENC_INIT      = 0, /* xd3_encode_input has never been called. */
+  ENC_INPUT     = 1, /* waiting for xd3_avail_input () to be called. */
+  ENC_SEARCH    = 2, /* currently searching for matches. */
+  ENC_INSTR     = 3, /* currently formatting output. */
+  ENC_FLUSH     = 4, /* currently emitting output. */
+  ENC_POSTOUT   = 5, /* after an output section. */
+  ENC_POSTWIN   = 6, /* after all output sections. */
+  ENC_ABORTED   = 7  /* abort. */
+} xd3_encode_state;
+
+/* The xd3_decode_input state machine steps through these states in
+ * the following order.  The matcher is reentrant and returns
+ * XD3_INPUT whenever it requires more data.  After receiving
+ * XD3_INPUT, if the application reads EOF it should call
+ * xd3_stream_close().
+ *
+ * 0-8:   the VCDIFF header
+ * 9-18:  the VCDIFF window header
+ * 19-21: the three primary sections: data, inst, addr
+ * 22:    producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK,
+ * 23:    return XD3_WINFINISH, set state=9 to decode more input
+ */
+typedef enum {
+
+  DEC_VCHEAD   = 0, /* VCDIFF header */
+  DEC_HDRIND   = 1, /* header indicator */
+
+  DEC_SECONDID = 2, /* secondary compressor ID */
+
+  DEC_TABLEN   = 3, /* code table length */
+  DEC_NEAR     = 4, /* code table near */
+  DEC_SAME     = 5, /* code table same */
+  DEC_TABDAT   = 6, /* code table data */
+
+  DEC_APPLEN   = 7, /* application data length */
+  DEC_APPDAT   = 8, /* application data */
+
+  DEC_WININD   = 9, /* window indicator */
+
+  DEC_CPYLEN   = 10, /* copy window length */
+  DEC_CPYOFF   = 11, /* copy window offset */
+
+  DEC_ENCLEN   = 12, /* length of delta encoding */
+  DEC_TGTLEN   = 13, /* length of target window */
+  DEC_DELIND   = 14, /* delta indicator */
+
+  DEC_DATALEN  = 15, /* length of ADD+RUN data */
+  DEC_INSTLEN  = 16, /* length of instruction data */
+  DEC_ADDRLEN  = 17, /* length of address data */
+
+  DEC_CKSUM    = 18, /* window checksum */
+
+  DEC_DATA     = 19, /* data section */
+  DEC_INST     = 20, /* instruction section */
+  DEC_ADDR     = 21, /* address section */
+
+  DEC_EMIT     = 22, /* producing data */
+
+  DEC_FINISH   = 23, /* window finished */
+
+  DEC_ABORTED  = 24  /* xd3_abort_stream */
+} xd3_decode_state;
+
+/************************************************************
+ internal types
+ ************************************************************/
+
+/* instruction lists used in the IOPT buffer */
+struct _xd3_rlist
+{
+  xd3_rlist  *next;
+  xd3_rlist  *prev;
+};
+
+/* the raw encoding of an instruction used in the IOPT buffer */
+struct _xd3_rinst
+{
+  uint8_t     type;
+  uint8_t     xtra;
+  uint8_t     code1;
+  uint8_t     code2;
+  usize_t      pos;
+  usize_t      size;
+  xoff_t      addr;
+  xd3_rlist   link;
+};
+
+/* the code-table form of an single- or double-instruction */
+struct _xd3_dinst
+{
+  uint8_t     type1;
+  uint8_t     size1;
+  uint8_t     type2;
+  uint8_t     size2;
+};
+
+/* the decoded form of a single (half) instruction. */
+struct _xd3_hinst
+{
+  uint8_t    type;
+  usize_t    size;
+  usize_t    addr;
+};
+
+/* the form of a whole-file instruction */
+struct _xd3_winst
+{
+  uint8_t type;  /* RUN, ADD, COPY */
+  uint8_t mode;  /* 0, VCD_SOURCE, VCD_TARGET */
+  usize_t size;
+  xoff_t  addr;
+  xoff_t  position;  /* absolute position of this inst */
+};
+
+/* used by the encoder to buffer output in sections.  list of blocks. */
+struct _xd3_output
+{
+  uint8_t    *base;
+  usize_t     next;
+  usize_t     avail;
+  xd3_output *next_page;
+};
+
+/* used by the decoder to buffer input in sections. */
+struct _xd3_desect
+{
+  const uint8_t *buf;
+  const uint8_t *buf_max;
+  usize_t        size;
+  usize_t        pos;
+
+  /* used in xdelta3-decode.h */
+  uint8_t       *copied1;
+  usize_t        alloc1;
+
+  /* used in xdelta3-second.h */
+  uint8_t       *copied2;
+  usize_t        alloc2;
+};
+
+/* the VCDIFF address cache, see the RFC */
+struct _xd3_addr_cache
+{
+  usize_t  s_near;
+  usize_t  s_same;
+  usize_t  next_slot;  /* the circular index for near */
+  usize_t *near_array; /* array of size s_near        */
+  usize_t *same_array; /* array of size s_same*256    */
+};
+
+/* the IOPT buffer list is just a list of buffers, which may be allocated
+ * during encode when using an unlimited buffer. */
+struct _xd3_iopt_buflist
+{
+  xd3_rinst *buffer;
+  xd3_iopt_buflist *next;
+};
+
+/* This is the record of a pre-compiled configuration, a subset of
+   xd3_config. */
+struct _xd3_smatcher
+{
+  const char        *name;
+  int             (*string_match) (xd3_stream  *stream);
+  usize_t            large_look;
+  usize_t            large_step;
+  usize_t            small_look;
+  usize_t            small_chain;
+  usize_t            small_lchain;
+  usize_t            max_lazy;
+  usize_t            long_enough;
+};
+
+/* hash table size & power-of-two hash function. */
+struct _xd3_hash_cfg
+{
+  usize_t  size;       // Number of buckets
+  usize_t  shift;
+  usize_t  mask;
+  usize_t  look;       // How wide is this checksum
+  usize_t  multiplier; // K * powers[0]
+  usize_t *powers;     // Array of [0,look) where powers[look-1] == 1
+                       // and powers[N] = powers[N+1]*K (Rabin-Karp)
+};
+
+/* the sprev list */
+struct _xd3_slist
+{
+  usize_t     last_pos;
+};
+
+/* window info (for whole state) */
+struct _xd3_wininfo {
+  xoff_t offset;
+  usize_t length;
+  uint32_t adler32;
+};
+
+/* whole state for, e.g., merge */
+struct _xd3_whole_state {
+  usize_t addslen;
+  uint8_t *adds;
+  usize_t  adds_alloc;
+
+  usize_t instlen;
+  xd3_winst *inst;
+  usize_t  inst_alloc;
+
+  usize_t wininfolen;
+  xd3_wininfo *wininfo;
+  usize_t wininfo_alloc;
+
+  xoff_t length;
+};
+
+/********************************************************************
+ public types
+ *******************************************************************/
+
+/* Settings for the secondary compressor. */
+struct _xd3_sec_cfg
+{
+  int                data_type;     /* Which section. (set automatically) */
+  usize_t            ngroups;       /* Number of DJW Huffman groups. */
+  usize_t            sector_size;   /* Sector size. */
+  int                inefficient;   /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */
+};
+
+/* This is the user-visible stream configuration. */
+struct _xd3_config
+{
+  usize_t             winsize;       /* The encoder window size. */
+  usize_t             sprevsz;       /* How far back small string
+					matching goes */
+  usize_t             iopt_size;     /* entries in the
+					instruction-optimizing
+					buffer */
+
+  xd3_getblk_func   *getblk;        /* The three callbacks. */
+  xd3_alloc_func    *alloc;
+  xd3_free_func     *freef;
+  void              *opaque;        /* Not used. */
+  uint32_t           flags;         /* stream->flags are initialized
+				     * from xd3_config & never
+				     * modified by the library.  Use
+				     * xd3_set_flags to modify flags
+				     * settings mid-stream. */
+
+  xd3_sec_cfg       sec_data;       /* Secondary compressor config: data */
+  xd3_sec_cfg       sec_inst;       /* Secondary compressor config: inst */
+  xd3_sec_cfg       sec_addr;       /* Secondary compressor config: addr */
+
+  xd3_smatch_cfg     smatch_cfg;    /* See enum: use fields below  for
+				       soft config */
+  xd3_smatcher       smatcher_soft;
+};
+
+/* The primary source file object. You create one of these objects and
+ * initialize the first four fields.  This library maintains the next
+ * 5 fields.  The configured getblk implementation is responsible for
+ * setting the final 3 fields when called (and/or when XD3_GETSRCBLK
+ * is returned).
+ */
+struct _xd3_source
+{
+  /* you set */
+  usize_t             blksize;       /* block size */
+  const char         *name;          /* its name, for debug/print
+					purposes */
+  void               *ioh;           /* opaque handle */
+  xoff_t              max_winsize;   /* maximum visible buffer */
+
+  /* getblk sets */
+  xoff_t              curblkno;      /* current block number: client
+					sets after getblk request */
+  usize_t             onblk;         /* number of bytes on current
+					block: client sets,  must be >= 0
+				        and <= blksize */
+  const uint8_t      *curblk;        /* current block array: client
+					sets after getblk request */
+
+  /* xd3 sets */
+  usize_t             srclen;        /* length of this source window */
+  xoff_t              srcbase;       /* offset of this source window
+					in the source itself */
+  usize_t             shiftby;       /* for power-of-two blocksizes */
+  usize_t             maskby;        /* for power-of-two blocksizes */
+  xoff_t              cpyoff_blocks; /* offset of dec_cpyoff in blocks */
+  usize_t             cpyoff_blkoff; /* offset of copy window in
+					blocks, remainder */
+  xoff_t              getblkno;      /* request block number: xd3 sets
+					current getblk request */
+
+  /* See xd3_getblk() */
+  xoff_t              max_blkno;  /* Maximum block, if eof is known,
+				   * otherwise, equals frontier_blkno
+				   * (initially 0). */
+  usize_t             onlastblk;  /* Number of bytes on max_blkno */
+  int                 eof_known;  /* Set to true when the first
+				   * partial block is read. */
+};
+
+/* The primary xd3_stream object, used for encoding and decoding.  You
+ * may access only two fields: avail_out, next_out.  Use the methods
+ * above to operate on xd3_stream. */
+struct _xd3_stream
+{
+  /* input state */
+  const uint8_t    *next_in;          /* next input byte */
+  usize_t           avail_in;         /* number of bytes available at
+					 next_in */
+  xoff_t            total_in;         /* how many bytes in */
+
+  /* output state */
+  uint8_t          *next_out;         /* next output byte */
+  usize_t           avail_out;        /* number of bytes available at
+					 next_out */
+  usize_t           space_out;        /* total out space */
+  xoff_t            current_window;   /* number of windows encoded/decoded */
+  xoff_t            total_out;        /* how many bytes out */
+
+  /* to indicate an error, xd3 sets */
+  const char       *msg;              /* last error message, NULL if
+					 no error */
+
+  /* source configuration */
+  xd3_source       *src;              /* source array */
+
+  /* encoder memory configuration */
+  usize_t           winsize;          /* suggested window size */
+  usize_t           sprevsz;          /* small string, previous window
+					 size (power of 2) */
+  usize_t           sprevmask;        /* small string, previous window
+					 size mask */
+  usize_t           iopt_size;
+  usize_t           iopt_unlimited;
+
+  /* general configuration */
+  xd3_getblk_func  *getblk;           /* set nxtblk, nxtblkno to scanblkno */
+  xd3_alloc_func   *alloc;            /* malloc function */
+  xd3_free_func    *free;             /* free function */
+  void*             opaque;           /* private data object passed to
+					 alloc, free, and getblk */
+  uint32_t          flags;            /* various options */
+
+  /* secondary compressor configuration */
+  xd3_sec_cfg       sec_data;         /* Secondary compressor config: data */
+  xd3_sec_cfg       sec_inst;         /* Secondary compressor config: inst */
+  xd3_sec_cfg       sec_addr;         /* Secondary compressor config: addr */
+
+  xd3_smatcher      smatcher;
+
+  usize_t           *large_table;      /* table of large checksums */
+  xd3_hash_cfg       large_hash;       /* large hash config */
+
+  usize_t           *small_table;      /* table of small checksums */
+  xd3_slist         *small_prev;       /* table of previous offsets,
+					  circular linked list */
+  int                small_reset;      /* true if small table should
+					  be reset */
+
+  xd3_hash_cfg       small_hash;       /* small hash config */
+  xd3_addr_cache     acache;           /* the vcdiff address cache */
+  xd3_encode_state   enc_state;        /* state of the encoder */
+
+  usize_t            taroff;           /* base offset of the target input */
+  usize_t            input_position;   /* current input position */
+  usize_t            min_match;        /* current minimum match
+					  length, avoids redundent
+					  matches */
+  usize_t            unencoded_offset; /* current input, first
+				       * unencoded offset. this value
+				       * is <= the first instruction's
+				       * position in the iopt buffer,
+				       * if there is at least one
+				       * match in the buffer. */
+
+  /* SRCWIN */
+  int                srcwin_decided;    /* boolean: true if srclen and
+					   srcbase have been
+					   decided. */
+  int                srcwin_decided_early;  /* boolean: true if srclen
+					       and srcbase were
+					       decided early. */
+  xoff_t             srcwin_cksum_pos;  /* Source checksum position */
+
+  /* MATCH */
+  xd3_match_state    match_state;      /* encoder match state */
+  xoff_t             match_srcpos;     /* current match source
+					  position relative to
+					  srcbase */
+  xoff_t             match_last_srcpos;  /* previously attempted
+					  * srcpos, to avoid loops. */
+  xoff_t             match_minaddr;    /* smallest matching address to
+				       * set window params (reset each
+				       * window xd3_encode_reset) */
+  xoff_t             match_maxaddr;    /* largest matching address to
+				       * set window params (reset each
+				       * window xd3_encode_reset) */
+  usize_t            match_back;       /* match extends back so far */
+  usize_t            match_maxback;    /* match extends back maximum */
+  usize_t            match_fwd;        /* match extends forward so far */
+  usize_t            match_maxfwd;     /* match extends forward maximum */
+
+  xoff_t             maxsrcaddr;      /* address of the last source
+					 match (across windows) */
+
+  uint8_t          *buf_in;           /* for saving buffered input */
+  usize_t           buf_avail;        /* amount of saved input */
+  const uint8_t    *buf_leftover;     /* leftover content of next_in
+					 (i.e., user's buffer) */
+  usize_t            buf_leftavail;    /* amount of leftover content */
+
+  xd3_output       *enc_current;      /* current output buffer */
+  xd3_output       *enc_free;         /* free output buffers */
+  xd3_output       *enc_heads[4];     /* array of encoded outputs:
+					 head of chain */
+  xd3_output       *enc_tails[4];     /* array of encoded outputs:
+					 tail of chain */
+  uint32_t          recode_adler32;   /* set the adler32 checksum
+				       * during "recode". */
+
+  xd3_rlist         iopt_used;        /* instruction optimizing buffer */
+  xd3_rlist         iopt_free;
+  xd3_rinst        *iout;             /* next single instruction */
+  xd3_iopt_buflist *iopt_alloc;
+
+  const uint8_t    *enc_appheader;    /* application header to encode */
+  usize_t            enc_appheadsz;    /* application header size */
+
+  /* decoder stuff */
+  xd3_decode_state  dec_state;        /* current DEC_XXX value */
+  usize_t           dec_hdr_ind;      /* VCDIFF header indicator */
+  usize_t           dec_win_ind;      /* VCDIFF window indicator */
+  usize_t           dec_del_ind;      /* VCDIFF delta indicator */
+
+  uint8_t           dec_magic[4];     /* First four bytes */
+  usize_t           dec_magicbytes;   /* Magic position. */
+
+  usize_t           dec_secondid;     /* Optional secondary compressor ID. */
+
+  usize_t           dec_codetblsz;    /* Optional code table: length. */
+  uint8_t          *dec_codetbl;      /* Optional code table: storage. */
+  usize_t           dec_codetblbytes; /* Optional code table: position. */
+
+  usize_t           dec_appheadsz;    /* Optional application header:
+					 size. */
+  uint8_t          *dec_appheader;    /* Optional application header:
+					 storage */
+  usize_t           dec_appheadbytes; /* Optional application header:
+					 position. */
+
+  usize_t            dec_cksumbytes;   /* Optional checksum: position. */
+  uint8_t           dec_cksum[4];     /* Optional checksum: storage. */
+  uint32_t          dec_adler32;      /* Optional checksum: value. */
+
+  usize_t            dec_cpylen;       /* length of copy window
+					  (VCD_SOURCE or VCD_TARGET) */
+  xoff_t             dec_cpyoff;       /* offset of copy window
+					  (VCD_SOURCE or VCD_TARGET) */
+  usize_t            dec_enclen;       /* length of delta encoding */
+  usize_t            dec_tgtlen;       /* length of target window */
+
+#if USE_UINT64
+  uint64_t          dec_64part;       /* part of a decoded uint64_t */
+#endif
+#if USE_UINT32
+  uint32_t          dec_32part;       /* part of a decoded uint32_t */
+#endif
+
+  xoff_t            dec_winstart;     /* offset of the start of
+                                         current target window */
+  xoff_t            dec_window_count; /* == current_window + 1 in
+                                         DEC_FINISH */
+  usize_t            dec_winbytes;     /* bytes of the three sections
+                                          so far consumed */
+  usize_t            dec_hdrsize;      /* VCDIFF + app header size */
+
+  const uint8_t    *dec_tgtaddrbase;  /* Base of decoded target
+                                         addresses (addr >=
+                                         dec_cpylen). */
+  const uint8_t    *dec_cpyaddrbase;  /* Base of decoded copy
+                                         addresses (addr <
+                                         dec_cpylen). */
+
+  usize_t            dec_position;     /* current decoder position
+                                          counting the cpylen
+                                          offset */
+  usize_t            dec_maxpos;       /* maximum decoder position
+                                          counting the cpylen
+                                          offset */
+  xd3_hinst         dec_current1;     /* current instruction */
+  xd3_hinst         dec_current2;     /* current instruction */
+
+  uint8_t          *dec_buffer;       /* Decode buffer */
+  uint8_t          *dec_lastwin;      /* In case of VCD_TARGET, the
+                                         last target window. */
+  usize_t            dec_lastlen;      /* length of the last target
+                                          window */
+  xoff_t            dec_laststart;    /* offset of the start of last
+                                         target window */
+  usize_t            dec_lastspace;    /* allocated space of last
+                                          target window, for reuse */
+
+  xd3_desect        inst_sect;        /* staging area for decoding
+                                         window sections */
+  xd3_desect        addr_sect;
+  xd3_desect        data_sect;
+
+  xd3_code_table_func       *code_table_func;
+  const xd3_dinst           *code_table;
+  const xd3_code_table_desc *code_table_desc;
+  xd3_dinst                 *code_table_alloc;
+
+  /* secondary compression */
+  const xd3_sec_type *sec_type;
+  xd3_sec_stream     *sec_stream_d;
+  xd3_sec_stream     *sec_stream_i;
+  xd3_sec_stream     *sec_stream_a;
+
+  /* state for reconstructing whole files (e.g., for merge), this only
+   * supports loading USIZE_T_MAX instructions, adds, etc. */
+  xd3_whole_state     whole_target;
+
+  /* statistics */
+  xoff_t            n_scpy;
+  xoff_t            n_tcpy;
+  xoff_t            n_add;
+  xoff_t            n_run;
+
+  xoff_t            l_scpy;
+  xoff_t            l_tcpy;
+  xoff_t            l_add;
+  xoff_t            l_run;
+
+  usize_t           i_slots_used;
+
+#if XD3_DEBUG
+  usize_t            large_ckcnt;
+
+  /* memory usage */
+  usize_t            alloc_cnt;
+  usize_t            free_cnt;
+#endif
+};
+
+/**************************************************************************
+ PUBLIC FUNCTIONS
+ **************************************************************************/
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+
+/* This function configures an xd3_stream using the provided in-memory
+ * input buffer, source buffer, output buffer, and flags.  The output
+ * array must be large enough or else ENOSPC will be returned.  This
+ * is the simplest in-memory encoding interface. */
+int     xd3_encode_memory (const uint8_t *input,
+			   usize_t        input_size,
+			   const uint8_t *source,
+			   usize_t        source_size,
+			   uint8_t       *output_buffer,
+			   usize_t       *output_size,
+			   usize_t        avail_output,
+			   int            flags);
+
+/* The reverse of xd3_encode_memory. */
+int     xd3_decode_memory (const uint8_t *input,
+			   usize_t        input_size,
+			   const uint8_t *source,
+			   usize_t        source_size,
+			   uint8_t       *output_buf,
+			   usize_t       *output_size,
+			   usize_t        avail_output,
+			   int            flags);
+
+/* This function encodes an in-memory input using a pre-configured
+ * xd3_stream.  This allows the caller to set a variety of options
+ * which are not available in the xd3_encode/decode_memory()
+ * functions.
+ *
+ * The output array must be large enough to hold the output or else
+ * ENOSPC is returned.  The source (if any) should be set using
+ * xd3_set_source_and_size() with a single-block xd3_source.  This
+ * calls the underlying non-blocking interfaces,
+ * xd3_encode/decode_input(), handling the necessary input/output
+ * states.  This method may be considered a reference for any
+ * application using xd3_encode_input() directly.
+ *
+ *   xd3_stream stream;
+ *   xd3_config config;
+ *   xd3_source src;
+ *
+ *   memset (& src, 0, sizeof (src));
+ *   memset (& stream, 0, sizeof (stream));
+ *   memset (& config, 0, sizeof (config));
+ *
+ *   if (source != NULL)
+ *     {
+ *       src.size = source_size;
+ *       src.blksize = source_size;
+ *       src.curblkno = 0;
+ *       src.onblk = source_size;
+ *       src.curblk = source;
+ *       src.max_winsize = source_size;
+ *       xd3_set_source(&stream, &src);
+ *     }
+ *
+ *   config.flags = flags;
+ *   config.winsize = input_size;
+ *
+ *   ... set smatcher, appheader, encoding-table, compression-level, etc.
+ *
+ *   xd3_config_stream(&stream, &config);
+ *   xd3_encode_stream(&stream, ...);
+ *   xd3_free_stream(&stream);
+ */
+int     xd3_encode_stream (xd3_stream    *stream,
+			   const uint8_t *input,
+			   usize_t         input_size,
+			   uint8_t       *output,
+			   usize_t        *output_size,
+			   usize_t         avail_output);
+
+/* The reverse of xd3_encode_stream. */
+int     xd3_decode_stream (xd3_stream    *stream,
+			   const uint8_t *input,
+			   usize_t        input_size,
+			   uint8_t       *output,
+			   usize_t       *output_size,
+			   usize_t        avail_size);
+
+/* This is the non-blocking interface.
+ *
+ * Handling input and output states is the same for encoding or
+ * decoding using the xd3_avail_input() and xd3_consume_output()
+ * routines, inlined below.
+ *
+ * Return values:
+ *
+ *   XD3_INPUT: the process requires more input: call
+ *               xd3_avail_input() then repeat
+ *
+ *   XD3_OUTPUT: the process has more output: read stream->next_out,
+ *               stream->avail_out, then call xd3_consume_output(),
+ *               then repeat
+ *
+ *   XD3_GOTHEADER: (decoder-only) notification returned following the
+ *               VCDIFF header and first window header.  the decoder
+ *               may use the header to configure itself.
+ *
+ *   XD3_WINSTART: a general notification returned once for each
+ *               window except the 0-th window, which is implied by
+ *               XD3_GOTHEADER.  It is recommended to use a
+ *               switch-stmt such as:
+ *
+ *                 ...
+ *               again:
+ *                 switch ((ret = xd3_decode_input (stream))) {
+ *                    case XD3_GOTHEADER: {
+ *                      assert(stream->current_window == 0);
+ *                      stuff;
+ *                    }
+ *                    // fallthrough
+ *                    case XD3_WINSTART: {
+ *                      something(stream->current_window);
+ *                      goto again;
+ *                    }
+ *                    ...
+ *
+ *   XD3_WINFINISH: a general notification, following the complete
+ *               input & output of a window.  at this point,
+ *               stream->total_in and stream->total_out are consistent
+ *               for either encoding or decoding.
+ *
+ *   XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value
+ *               is returned to initiate a non-blocking source read.
+ */
+int     xd3_decode_input  (xd3_stream    *stream);
+int     xd3_encode_input  (xd3_stream    *stream);
+
+/* The xd3_config structure is used to initialize a stream - all data
+ * is copied into stream so config may be a temporary variable.  See
+ * the [documentation] or comments on the xd3_config structure. */
+int     xd3_config_stream (xd3_stream    *stream,
+			   xd3_config    *config);
+
+/* Since Xdelta3 doesn't open any files, xd3_close_stream is just an
+ * error check that the stream is in a proper state to be closed: this
+ * means the encoder is flushed and the decoder is at a window
+ * boundary.  The application is responsible for freeing any of the
+ * resources it supplied. */
+int     xd3_close_stream (xd3_stream    *stream);
+
+/* This arranges for closes the stream to succeed.  Does not free the
+ * stream.*/
+void    xd3_abort_stream (xd3_stream    *stream);
+
+/* xd3_free_stream frees all memory allocated for the stream.  The
+ * application is responsible for freeing any of the resources it
+ * supplied. */
+void    xd3_free_stream   (xd3_stream    *stream);
+
+/* This function informs the encoder or decoder that source matching
+ * (i.e., delta-compression) is possible.  For encoding, this should
+ * be called before the first xd3_encode_input.  A NULL source is
+ * ignored.  For decoding, this should be called before the first
+ * window is decoded, but the appheader may be read first
+ * (XD3_GOTHEADER).  After decoding the header, call xd3_set_source()
+ * if you have a source file.  Note: if (stream->dec_win_ind & VCD_SOURCE)
+ * is true, it means the first window expects there to be a source file.
+ */
+int     xd3_set_source    (xd3_stream    *stream,
+			   xd3_source    *source);
+
+/* If the source size is known, call this instead of xd3_set_source().
+ * to avoid having stream->getblk called (and/or to avoid XD3_GETSRCBLK).
+ *
+ * Follow these steps:
+  xd3_source source;
+  memset(&source, 0, sizeof(source));
+  source.blksize  = size;
+  source.onblk    = size;
+  source.curblk   = buf;
+  source.curblkno = 0;
+  int ret = xd3_set_source_and_size(&stream, &source, size);
+  ...
+ */
+int     xd3_set_source_and_size (xd3_stream    *stream,
+				 xd3_source    *source,
+				 xoff_t         source_size);
+
+/* This should be called before the first call to xd3_encode_input()
+ * to include application-specific data in the VCDIFF header. */
+void    xd3_set_appheader (xd3_stream    *stream,
+			   const uint8_t *data,
+			   usize_t        size);
+
+/* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER.
+ * For convenience, the decoder always adds a single byte padding to
+ * the end of the application header, which is set to zero in case the
+ * application header is a string. */
+int     xd3_get_appheader (xd3_stream     *stream,
+			   uint8_t       **data,
+			   usize_t        *size);
+
+/* To generate a VCDIFF encoded delta with xd3_encode_init() from
+ * another format, use:
+ *
+ *   xd3_encode_init_partial() -- initialze encoder state (w/o hash tables)
+ *   xd3_init_cache() -- reset VCDIFF address cache
+ *   xd3_found_match() -- to report a copy instruction
+ *
+ * set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual.
+ */
+int xd3_encode_init_partial (xd3_stream *stream);
+void xd3_init_cache (xd3_addr_cache* acache);
+int xd3_found_match (xd3_stream *stream,
+		     usize_t pos, usize_t size,
+		     xoff_t addr, int is_source);
+
+/* Gives an error string for xdelta3-speficic errors, returns NULL for
+   system errors */
+const char* xd3_strerror (int ret);
+
+/* For convenience, zero & initialize the xd3_config structure with
+   specified flags. */
+static inline
+void    xd3_init_config (xd3_config *config,
+			 uint32_t    flags)
+{
+  memset (config, 0, sizeof (*config));
+  config->flags = flags;
+}
+
+/* This supplies some input to the stream.
+ *
+ * For encoding, if the input is larger than the configured window
+ * size (xd3_config.winsize), the entire input will be consumed and
+ * encoded anyway.  If you wish to strictly limit the window size,
+ * limit the buffer passed to xd3_avail_input to the window size.
+ *
+ * For encoding, if the input is smaller than the configured window
+ * size (xd3_config.winsize), the library will create a window-sized
+ * buffer and accumulate input until a full-sized window can be
+ * encoded.  XD3_INPUT will be returned.  The input must remain valid
+ * until the next time xd3_encode_input() returns XD3_INPUT.
+ *
+ * For decoding, the input will be consumed entirely before XD3_INPUT
+ * is returned again.
+ */
+static inline
+void    xd3_avail_input  (xd3_stream    *stream,
+			  const uint8_t *idata,
+			  usize_t         isize)
+{
+  /* Even if isize is zero, the code expects a non-NULL idata.  Why?
+   * It uses this value to determine whether xd3_avail_input has ever
+   * been called.  If xd3_encode_input is called before
+   * xd3_avail_input it will return XD3_INPUT right away without
+   * allocating a stream->winsize buffer.  This is to avoid an
+   * unwanted allocation. */
+  XD3_ASSERT (idata != NULL || isize == 0);
+
+  stream->next_in  = idata;
+  stream->avail_in = isize;
+}
+
+/* This acknowledges receipt of output data, must be called after any
+ * XD3_OUTPUT return. */
+static inline
+void xd3_consume_output (xd3_stream  *stream)
+{
+  stream->avail_out  = 0;
+}
+
+/* These are set for each XD3_WINFINISH return. */
+static inline
+int xd3_encoder_used_source (xd3_stream *stream) {
+  return stream->src != NULL && stream->src->srclen > 0;
+}
+static inline
+xoff_t xd3_encoder_srcbase (xd3_stream *stream) {
+  return stream->src->srcbase;
+}
+static inline
+usize_t xd3_encoder_srclen (xd3_stream *stream) {
+  return stream->src->srclen;
+}
+
+/* Checks for legal flag changes. */
+static inline
+void xd3_set_flags (xd3_stream *stream, uint32_t flags)
+{
+  /* The bitwise difference should contain only XD3_FLUSH or
+     XD3_SKIP_WINDOW */
+  XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0);
+  stream->flags = flags;
+}
+
+/* Gives some extra information about the latest library error, if any
+ * is known. */
+static inline
+const char* xd3_errstring (xd3_stream  *stream)
+{
+  return stream->msg ? stream->msg : "";
+}
+
+
+/* 64-bit divisions are expensive, which is why we require a
+ * power-of-two source->blksize.  To relax this restriction is
+ * relatively easy, see the history for xd3_blksize_div(). */
+static inline
+void xd3_blksize_div (const xoff_t offset,
+		      const xd3_source *source,
+		      xoff_t *blkno,
+		      usize_t *blkoff) {
+  *blkno = offset >> source->shiftby;
+  *blkoff = offset & source->maskby;
+  XD3_ASSERT (*blkoff < source->blksize);
+}
+
+static inline
+void xd3_blksize_add (xoff_t *blkno,
+		      usize_t *blkoff,
+		      const xd3_source *source,
+		      const usize_t add)
+{
+  usize_t blkdiff;
+
+  /* Does not check for overflow, checked in xdelta3-decode.h. */
+  *blkoff += add;
+  blkdiff = *blkoff >> source->shiftby;
+
+  if (blkdiff)
+    {
+      *blkno += blkdiff;
+      *blkoff &= source->maskby;
+    }
+
+  XD3_ASSERT (*blkoff < source->blksize);
+}
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#define XD3_NOOP 0U
+#define XD3_ADD 1U
+#define  XD3_RUN 2U
+#define  XD3_CPY 3U /* XD3_CPY rtypes are represented as (XD3_CPY +
+                     * copy-mode value) */
+
+#if XD3_DEBUG
+#define IF_DEBUG(x) x
+#else
+#define IF_DEBUG(x)
+#endif
+#if XD3_DEBUG > 1
+#define IF_DEBUG1(x) x
+#else
+#define IF_DEBUG1(x)
+#endif
+#if XD3_DEBUG > 2
+#define IF_DEBUG2(x) x
+#else
+#define IF_DEBUG2(x)
+#endif
+
+#define SIZEOF_ARRAY(x) (sizeof(x) / sizeof(x[0]))
+
+#endif /* _XDELTA3_H_ */
diff --git a/third-party/xdelta3/xdelta3/xdelta3.i b/third-party/xdelta3/xdelta3/xdelta3.i
new file mode 100644
index 0000000000..2fea01535d
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3.i
@@ -0,0 +1,85 @@
+%module xdelta3
+%import cstring.i
+%import argcargv.i
+%{
+#include "xdelta3.h"
+
+int xd3_main_cmdline (int ARGC, char **ARGV);
+%}
+
+%cstring_input_binary(const char *input, unsigned int input_size);
+%cstring_input_binary(const char *source, unsigned int source_size);
+
+%define %max_output_withsize(TYPEMAP, SIZE, MAXSIZE)
+%typemap(in) MAXSIZE (unsigned int alloc_size) {
+  $1 = alloc_size = PyInt_AsLong(obj2);
+}
+%typemap(in,numinputs=0) (TYPEMAP, SIZE) {
+}
+%typemap(check) (TYPEMAP, SIZE) {
+  // alloc_size input is #7th position in xd3_xxcode_memory()
+  $1 = malloc(alloc_size7);
+  $2 = &alloc_size7;
+}
+%typemap(argout,fragment="t_output_helper") (TYPEMAP, SIZE) {
+  if (result == 0) {
+    PyObject *o;
+    // alloc_size7 now carries actual size
+    o = PyString_FromStringAndSize($1,alloc_size7);
+    $result = t_output_helper($result,o);
+  } else {
+    $result = t_output_helper($result,Py_None);
+  }
+  free($1);
+}
+%typemap(default) int flags {
+  $1 = 0;
+}
+%enddef
+
+%max_output_withsize(char *output_buf, unsigned int *output_size, unsigned int max_output);
+
+int     xd3_encode_memory (const uint8_t *input,
+			   usize_t        input_size,
+			   const uint8_t *source,
+			   usize_t        source_size,
+			   uint8_t       *output_buffer,
+			   usize_t       *output_size,
+			   usize_t        avail_output,
+			   int            flags);
+
+int     xd3_decode_memory (const uint8_t *input,
+			   usize_t        input_size,
+			   const uint8_t *source,
+			   usize_t        source_size,
+			   uint8_t       *output_buf,
+			   usize_t       *output_size,
+			   usize_t        avail_output,
+			   int            flags);
+
+int     xd3_main_cmdline (int ARGC, char **ARGV);
+
+/* Is this the right way? */
+enum {
+  /*XD3_JUST_HDR,*/
+  /*XD3_SKIP_WINDOW,*/
+  /*XD3_SKIP_EMIT,*/
+  /*XD3_FLUSH,*/
+  XD3_SEC_DJW,
+  XD3_SEC_FGK,
+  /*XD3_SEC_TYPE,*/
+  XD3_SEC_NODATA,
+  XD3_SEC_NOINST,
+  XD3_SEC_NOADDR,
+  /*XD3_SEC_OTHER,*/
+  XD3_ADLER32,
+  XD3_ADLER32_NOVER,
+  XD3_NOCOMPRESS,
+  XD3_BEGREEDY,
+  XD3_COMPLEVEL_SHIFT,
+  XD3_COMPLEVEL_MASK,
+  XD3_COMPLEVEL_1,
+  XD3_COMPLEVEL_3,
+  XD3_COMPLEVEL_6,
+  XD3_COMPLEVEL_9,
+};
diff --git a/third-party/xdelta3/xdelta3/xdelta3.vcxproj b/third-party/xdelta3/xdelta3/xdelta3.vcxproj
new file mode 100644
index 0000000000..638496f674
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3.vcxproj
@@ -0,0 +1,344 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Itanium">
+      <Configuration>Debug</Configuration>
+      <Platform>Itanium</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Itanium">
+      <Configuration>Release</Configuration>
+      <Platform>Itanium</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="xdelta3-64|Itanium">
+      <Configuration>xdelta3-64</Configuration>
+      <Platform>Itanium</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="xdelta3-64|Win32">
+      <Configuration>xdelta3-64</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="xdelta3-64|x64">
+      <Configuration>xdelta3-64</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="xdelta3.c">
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug|Itanium'">%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">/DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release|Itanium'">/DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">/DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Win32'">/DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Itanium'">/DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|x64'">/DXD3_DEBUG=0 /DXD3_USE_LARGEFILE64=1 /DREGRESSION_TEST=1 /DSECONDARY_DJW=1 /DSECONDARY_FGK=1 /DXD3_MAIN=1 /DXD3_WIN32=1 /DEXTERNAL_COMPRESSION=0 /DXD3_STDIO=0 /DXD3_POSIX=0;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="xdelta3-blkcache.h" />
+    <ClInclude Include="xdelta3-cfgs.h" />
+    <ClInclude Include="xdelta3-decode.h" />
+    <ClInclude Include="xdelta3-djw.h" />
+    <ClInclude Include="xdelta3-fgk.h" />
+    <ClInclude Include="xdelta3-hash.h" />
+    <ClInclude Include="xdelta3-internal.h" />
+    <ClInclude Include="xdelta3-list.h" />
+    <ClInclude Include="xdelta3-lzma.h" />
+    <ClInclude Include="xdelta3-main.h" />
+    <ClInclude Include="xdelta3-merge.h" />
+    <ClInclude Include="xdelta3-second.h" />
+    <ClInclude Include="xdelta3-test.h" />
+    <ClInclude Include="xdelta3.h" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8F9D37B5-B78E-4816-BE61-AEF679DBF3BC}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>xdelta3</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Itanium'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Itanium'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Itanium'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v120</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Itanium'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Itanium'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Itanium'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Itanium'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Itanium'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>$(WindowsSdkDir)\include;$(VCInstallDir)include;..\xz\include</IncludePath>
+    <LibraryPath>$(LibraryPath);$(VSInstallDir);$(VSInstallDir)lib\amd64;..\xz\bin_x86-64</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Itanium'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;SECONDARY_LZMA=0;LZMA_API_STATIC;SIZEOF_SIZE_T=4;SIZEOF_UNSIGNED_LONG_LONG=8;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <AdditionalIncludeDirectories>../xz/include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Itanium'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;..\..\..\..\src\xz\bin_x86-64\liblzma_static.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;SECONDARY_LZMA=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;LZMA_API_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <AdditionalIncludeDirectories>..\..\..\..\src\xz\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;..\..\..\..\src\xz\bin_i486\liblzma_static.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Itanium'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;SECONDARY_LZMA=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;LZMA_API_STATIC;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <AdditionalIncludeDirectories>..\..\..\..\src\xz\include</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);..\..\..\..\src\xz\bin_x86-64\liblzma_static.lib</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|Itanium'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='xdelta3-64|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;XD3_MAIN=1;XD3_DEBUG=0;XD3_USE_LARGEFILE64=1;REGRESSION_TEST=1;SECONDARY_DJW=1;SECONDARY_FGK=1;XD3_WIN32=1;EXTERNAL_COMPRESSION=0;SHELL_TESTS=0;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/third-party/xdelta3/xdelta3/xdelta3.wxi b/third-party/xdelta3/xdelta3/xdelta3.wxi
new file mode 100644
index 0000000000..2ef842642b
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3.wxi
@@ -0,0 +1,7 @@
+<Include>
+            <?define PRODUCT_ID=60131be5-be4d-4975-9108-dd0be735890d ?>
+            <?define PACKAGE_ID=82bf21ca-ee08-4701-ab78-37210dac82ce ?>
+            <?define COMPONENT_ID=85bc3206-05f8-41f8-b500-6ea32e5d6a8f ?>
+            <?define MANUAL_ID=07f387bc-a0c5-4af9-88db-1a84443f1fc5 ?>
+            <?define SOURCE_ID=4e1503a9-3ed1-4e06-b0c0-890462b1a4fd ?>
+</Include>
diff --git a/third-party/xdelta3/xdelta3/xdelta3.wxs b/third-party/xdelta3/xdelta3/xdelta3.wxs
new file mode 100644
index 0000000000..5e2d05c467
--- /dev/null
+++ b/third-party/xdelta3/xdelta3/xdelta3.wxs
@@ -0,0 +1,131 @@
+<?xml version='1.0'?>
+<?include $(sys.SOURCEFILEDIR)\xdelta3.wxi ?>
+
+<Wix xmlns='http://schemas.microsoft.com/wix/2003/01/wi'>
+   <Product Id='$(var.PRODUCT_ID)'
+            Name='Xdelta 3.0u'
+            Language='1033'
+            Codepage='1252'
+            Version='3.0.1.1'
+            Manufacturer='Josh.MacDonald@Gmail.Com'>
+
+            <Package Id='$(var.PACKAGE_ID)'
+                     Keywords='Installer'
+                     Description='Xdelta 3.0u'
+                     Comments='http://xdelta.org'
+                     Manufacturer='Josh.MacDonald@Gmail.Com'
+                     InstallerVersion='300'
+                     Languages='1033'
+                     Compressed='yes' />
+
+            <Media Id='1'
+                   Cabinet='xdelta30t.cab'
+                   EmbedCab='yes' />
+
+            <Directory Id='TARGETDIR' Name='SourceDir'>
+              <Directory Id='ProgramFilesFolder' Name='PFiles'>
+                <Directory Id='Xdelta' 
+                           Name='Xdelta'>
+
+            <Component Id='Main'
+                       Guid='$(var.COMPONENT_ID)'>
+              <File Id='XdeltaEXE'
+                    Name='xdelt30t'
+                    LongName='xdelta30t.exe'
+                    DiskId='1'
+                    Source='G:\jmacd\svn\xdelta3\Release\xdelta3.exe'
+                    Vital='yes'>
+              </File>
+            </Component>
+
+            <Component Id='Readme'
+                       Guid='$(var.MANUAL_ID)'>
+              <File Id='Readme'
+                    Name='readme.txt'
+                    LongName='readme.txt'
+                    DiskId='1'
+                    Source='G:\jmacd\svn\xdelta3\readme.txt'
+                    Vital='yes'>
+                <Shortcut Id="startupmenuReadme"
+                          Directory="ProgramMenuDir"
+                          Name="readme.txt"
+                          LongName="Xdelta3 readme.txt"
+                          />
+              </File>
+            </Component>
+
+            <Component Id='Copyright'
+                       Guid='$(var.MANUAL_ID)'>
+              <File Id='Copyright'
+                    Name='COPYING'
+                    LongName='COPYING'
+                    DiskId='1'
+                    Source='G:\jmacd\svn\xdelta3\COPYING'
+                    Vital='yes'>
+                <Shortcut Id="startupmenuCopyright"
+                          Directory="ProgramMenuDir"
+                          Name="COPYING"
+                          LongName="GNU Public License"
+                          />
+              </File>
+            </Component>
+
+            <Component Id='Source'
+                       Guid='$(var.SOURCE_ID)'>
+              <File Id='Source'
+                    Name='xdelt30t.zip'
+                    LongName='xdelta3.0u.zip'
+                    DiskId='1'
+                    Source='G:\jmacd\svn\xdelta3\xdelta3.0u.zip'
+                    Vital='yes'>
+                <Shortcut Id="startupmenuSource"
+                          Directory="ProgramMenuDir"
+                          Name="xdelt30t.zip"
+                          LongName="xdelta3.0u.zip"
+                          />
+              </File>
+            </Component>
+
+                </Directory>
+              </Directory>
+
+              <Directory Id="ProgramMenuFolder" Name="PMenu" LongName="Programs">
+	        <Directory Id="ProgramMenuDir"
+                           Name="xdelt30t"
+                           LongName="Xdelta 3.0u">
+                </Directory>
+              </Directory>
+
+<!--               <Merge Id='CRT' -->
+<!--                      Language='0' -->
+<!--                      DiskId='1' -->
+<!-- src='C:\Program Files\Common Files\Merge Modules\microsoft_vc80_crt_x86.msm' -->
+<!-- /> -->
+<!--               <Merge Id='CRT Policy' -->
+<!--                      Language='0' -->
+<!--                      DiskId='1' -->
+<!-- src='C:\Program Files\Common Files\Merge Modules\policy_8_0_Microsoft_VC80_CRT_x86.msm' -->
+<!-- /> -->
+            </Directory>
+
+            <Feature Id='Complete'
+                     Level='1'>
+              <ComponentRef Id='Main' />
+              <ComponentRef Id='Readme' />
+              <ComponentRef Id='Copyright' />
+              <ComponentRef Id='Source' />
+            </Feature>
+
+<!--             <Feature Id='CRT_WinSXS' Title='CRT WinSXS' Level='1'> -->
+<!--                         <MergeRef Id='CRT' /> -->
+<!--                         <MergeRef Id='CRT Policy' /> -->
+<!--             </Feature> -->
+
+            <InstallExecuteSequence>
+                        <RemoveRegistryValues/>
+                        <RemoveFiles/>
+                        <InstallFiles/>
+                        <WriteRegistryValues/>
+            </InstallExecuteSequence>
+   </Product>
+</Wix>
diff --git a/vendor.yaml b/vendor.yaml
index b993bb4c90..682b905e8d 100644
--- a/vendor.yaml
+++ b/vendor.yaml
@@ -12,3 +12,5 @@ third-party/CLI11.hpp:
   git: https://github.com/CLIUtils/CLI11/tree/v2.2.0
 third-party/xxhash.hpp:
   git: https://github.com/RedSpah/xxhash_cpp/tree/0.7.3
+third-party/xdelta3:
+  sha: 7508fd2a823443b1f0173ca361620f21d62a7d37