Tidy up Graphics.h a bit

2024-08-16 17:21:29 +10:00 · 2024-08-16 17:21:29 +10:00 · b3e545f202
parent 6dcd418acb
commit b3e545f202
14 changed files with 116 additions and 4039 deletions
--- a/src/Graphics.h
+++ b/src/Graphics.h
@ -5,7 +5,20 @@
 CC_BEGIN_HEADER

 /* 
-Abstracts a 3D graphics rendering API
+SUMMARY:
+- Provides a low level abstraction a 3D graphics rendering API.
+- Because of the numerous possible rendering backends, only a small number of
+   functions are provided so that the available functionality behaves the same
+   regardless of the rendering backend being used. (as much as reasonably possible)
+- Most code using Graphics.h therefore doesn' need to care about the rendering backend being used
+
+IMPLEMENTATION NOTES:
+- By default, a reasonable rendering backend is automatically selected in Core.h
+- The selected rendering backend can be altered in two ways:
+  * explicitly defining CC_GFX_BACKEND in the compilation flags (recommended)
+  * altering DEFAULT_GFX_BACKEND for the platform in Core.h
+- Rendering backends are implemented in Graphics_GL1.c, Graphics_D3D9.c etc
+   
 Copyright 2014-2023 ClassiCube | Licensed under BSD-3
 */
 struct Bitmap;
@ -18,15 +31,6 @@ typedef enum VertexFormat_ {
 	VERTEX_FORMAT_COLOURED, VERTEX_FORMAT_TEXTURED
 } VertexFormat;

-typedef enum FogFunc_ {
-	FOG_LINEAR, FOG_EXP, FOG_EXP2
-} FogFunc;
-
-typedef enum MatrixType_ {
-	MATRIX_PROJ, /* Projection matrix */
-	MATRIX_VIEW  /* Combined model view matrix */
-} MatrixType;
-
 #define SIZEOF_VERTEX_COLOURED 16
 #define SIZEOF_VERTEX_TEXTURED 24

@ -88,11 +92,13 @@ extern const cc_string Gfx_LowPerfMessage;
 #define GFX_MAX_INDICES (65536 / 4 * 6)
 #define GFX_MAX_VERTICES 65536

-typedef enum GfxBuffers_ {
-	GFX_BUFFER_COLOR = 1,
-	GFX_BUFFER_DEPTH = 2
-} GfxBuffers;
+void  Gfx_RecreateTexture(GfxResourceID* tex, struct Bitmap* bmp, cc_uint8 flags, cc_bool mipmaps);
+void* Gfx_RecreateAndLockVb(GfxResourceID* vb, VertexFormat fmt, int count);

+
+/*########################################################################################################################*
+*---------------------------------------------------------Textures--------------------------------------------------------*
+*#########################################################################################################################*/
 /* Texture should persist across gfx context loss (if backend supports ManagedTextures) */
 #define TEXTURE_FLAG_MANAGED  0x01
 /* Texture should allow updating via Gfx_UpdateTexture */
@ -104,9 +110,6 @@ typedef enum GfxBuffers_ {
 /* Texture should be rendered using bilinear filtering if possible */
 #define TEXTURE_FLAG_BILINEAR 0x10

-void  Gfx_RecreateTexture(GfxResourceID* tex, struct Bitmap* bmp, cc_uint8 flags, cc_bool mipmaps);
-void* Gfx_RecreateAndLockVb(GfxResourceID* vb, VertexFormat fmt, int count);
-
 cc_bool Gfx_CheckTextureSize(int width, int height, cc_uint8 flags);
 /* Creates a new texture. (and also generates mipmaps if mipmaps) */
 /*   See TEXTURE_FLAG values for supported flags */
@ -134,6 +137,46 @@ CC_API void Gfx_EnableMipmaps(void);
 /* NOTE: You must have created textures with mipmaps true for this to work */
 CC_API void Gfx_DisableMipmaps(void);

+
+/*########################################################################################################################*
+*------------------------------------------------------Frame management---------------------------------------------------*
+*#########################################################################################################################*/
+typedef enum GfxBuffers_ {
+	GFX_BUFFER_COLOR = 1,
+	GFX_BUFFER_DEPTH = 2
+} GfxBuffers;
+
+/* Clears the given rendering buffer(s) to their default values. */
+/* buffers can be either GFX_BUFFER_COLOR or GFX_BUFFER_DEPTH, or both */
+CC_API void Gfx_ClearBuffers(GfxBuffers buffers);
+/* Sets the default colour that the colour buffer is cleared to */
+CC_API void Gfx_ClearColor(PackedCol color);
+
+/* Sets up state for rendering a new frame */
+void Gfx_BeginFrame(void);
+/* Finishes rendering a frame, and swaps it with the back buffer */
+void Gfx_EndFrame(void);
+/* Sets whether to synchronise with monitor refresh to avoid tearing */
+/* NOTE: VSync setting may be unsupported or just ignored */
+void Gfx_SetVSync(cc_bool vsync);
+
+enum Screen3DS { TOP_SCREEN, BOTTOM_SCREEN };
+#ifdef CC_BUILD_DUALSCREEN
+/* Selects which screen/display to render to */
+void Gfx_3DS_SetRenderScreen(enum Screen3DS screen);
+#else
+/* Selects which screen/display to render to */
+static CC_INLINE void Gfx_3DS_SetRenderScreen(enum Screen3DS screen) { }
+#endif
+
+
+/*########################################################################################################################*
+*---------------------------------------------------------Fog state-------------------------------------------------------*
+*#########################################################################################################################*/
+typedef enum FogFunc_ {
+	FOG_LINEAR, FOG_EXP, FOG_EXP2
+} FogFunc;
+
 /* Returns whether fog blending is enabled */
 CC_API cc_bool Gfx_GetFog(void);
 /* Sets whether fog blending is enabled */
@ -147,6 +190,10 @@ CC_API void Gfx_SetFogEnd(float value);
 /* Sets in what way fog is blended */
 CC_API void Gfx_SetFogMode(FogFunc func);

+
+/*########################################################################################################################*
+*-----------------------------------------------------State management----------------------------------------------------*
+*#########################################################################################################################*/
 /* Sets whether backface culling is performed */
 CC_API void Gfx_SetFaceCulling(cc_bool enabled);
 /* Sets whether pixels with an alpha of less than 128 are discarded */
@ -156,11 +203,6 @@ CC_API void Gfx_SetAlphaBlending(cc_bool enabled);
 /* Sets whether blending between the alpha components of texture and vertex colour is performed */
 CC_API void Gfx_SetAlphaArgBlend(cc_bool enabled);

-/* Clears the given rendering buffer(s) to default. */
-/* buffers can be either GFX_BUFFER_COLOR or GFX_BUFFER_DEPTH, or both */
-CC_API void Gfx_ClearBuffers(GfxBuffers buffers);
-/* Sets the colour that the colour buffer is cleared to */
-CC_API void Gfx_ClearColor(PackedCol color);
 /* Sets whether pixels may be discard based on z/depth */
 CC_API void Gfx_SetDepthTest(cc_bool enabled);
 /* Sets whether z/depth of pixels is actually written to the depth buffer */
@ -171,11 +213,10 @@ CC_API void Gfx_SetColorWrite(cc_bool r, cc_bool g, cc_bool b, cc_bool a);
 /*  NOTE: Implicitly calls Gfx_SetColorWrite */
 CC_API void Gfx_DepthOnlyRendering(cc_bool depthOnly);

-/* Anaglyph 3D rendering support */
-void Gfx_Set3DLeft( struct Matrix* proj, struct Matrix* view);
-void Gfx_Set3DRight(struct Matrix* proj, struct Matrix* view);
-void Gfx_End3D(     struct Matrix* proj, struct Matrix* view);

+/*########################################################################################################################*
+*------------------------------------------------------Index buffers-----------------------------------------------------*
+*#########################################################################################################################*/
 /* Callback function to initialise/fill out the contents of an index buffer */
 typedef void (*Gfx_FillIBFunc)(cc_uint16* indices, int count, void* obj);
 /* Creates a new index buffer and fills out its contents */
@ -185,6 +226,10 @@ CC_API void Gfx_BindIb(GfxResourceID ib);
 /* Deletes the given index buffer, then sets it to 0 */
 CC_API void Gfx_DeleteIb(GfxResourceID* ib);

+
+/*########################################################################################################################*
+*------------------------------------------------------Vertex buffers-----------------------------------------------------*
+*#########################################################################################################################*/
 /* Creates a new vertex buffer */
 CC_API GfxResourceID Gfx_CreateVb(VertexFormat fmt, int count);
 /* Sets the currently active vertex buffer */
@ -222,6 +267,10 @@ CC_API void  Gfx_UnlockDynamicVb(GfxResourceID vb);
 /* Updates the data of a dynamic vertex buffer */
 CC_API void Gfx_SetDynamicVbData(GfxResourceID vb, void* vertices, int vCount);

+
+/*########################################################################################################################*
+*------------------------------------------------------Vertex drawing-----------------------------------------------------*
+*#########################################################################################################################*/
 /* Sets the format of the rendered vertices */
 CC_API void Gfx_SetVertexFormat(VertexFormat fmt);
 /* Renders vertices from the currently bound vertex buffer as lines */
@ -234,9 +283,20 @@ CC_API void Gfx_DrawVb_IndexedTris(int verticesCount);
 /* Special case Gfx_DrawVb_IndexedTris_Range for map renderer */
 void Gfx_DrawIndexedTris_T2fC4b(int verticesCount, int startVertex);

-/* Loads the given matrix over the currently active matrix */
+
+/*########################################################################################################################*
+*-----------------------------------------------------Vertex transform----------------------------------------------------*
+*#########################################################################################################################*/
+typedef enum MatrixType_ {
+	MATRIX_PROJ, /* Projection matrix */
+	MATRIX_VIEW  /* Combined model view matrix */
+} MatrixType;
+
+/* Sets the currently active matrix projection or modelview matrix */
 CC_API void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix);
+/* Sets the texture U/V translation (not normally used) */
 CC_API void Gfx_EnableTextureOffset(float x, float y);
+/* Disables texture U/V translation */
 CC_API void Gfx_DisableTextureOffset(void);
 /* Loads given modelview and projection matrices, then calculates the combined MVP matrix */
 void Gfx_LoadMVP(const struct Matrix* view, const struct Matrix* proj, struct Matrix* mvp);
@ -248,19 +308,25 @@ void Gfx_CalcPerspectiveMatrix(struct Matrix* matrix, float fov, float aspect, f
 /* NOTE: Projection matrix calculation is here because it can depend the graphics backend */
 /*  (e.g. OpenGL uses a Z clip space range of [-1, 1], whereas Direct3D9 uses [0, 1]) */

+/* Sets the region where transformed vertices are drawn in */
+/*  By default this region has origin 0,0 and size is window width/height */
+/*  This region should normally be the same as the scissor region */
+CC_API void Gfx_SetViewport(int x, int y, int w, int h);
+/* Sets the region where pixels can be drawn in (pixels outside this region are discarded) */
+/*  By default this region has origin 0,0 and size is window width/height */
+/*  This region should normally be the same as the viewport region */
+CC_API void Gfx_SetScissor (int x, int y, int w, int h);
+
+
+/*########################################################################################################################*
+*------------------------------------------------------Misc utilities-----------------------------------------------------*
+*#########################################################################################################################*/
 /* Outputs a .png screenshot of the backbuffer */
 cc_result Gfx_TakeScreenshot(struct Stream* output);
 /* Warns in chat if the backend has problems with the user's GPU */
 /* Returns whether legacy rendering mode for borders/sky/clouds is needed */
 cc_bool Gfx_WarnIfNecessary(void);
 cc_bool Gfx_GetUIOptions(struct MenuOptionsScreen* s);
-/* Sets up state for rendering a new frame */
-void Gfx_BeginFrame(void);
-/* Finishes rendering a frame, and swaps it with the back buffer */
-void Gfx_EndFrame(void);
-/* Sets whether to synchronise with monitor refresh to avoid tearing */
-/* NOTE: VSync setting may be unsupported or just ignored */
-void Gfx_SetVSync(cc_bool vsync);
 /* Gets information about the user's GPU and current backend state */
 /* Backend state may include depth buffer bits, free memory, etc */
 /* NOTE: Each line is separated by \n */
@ -269,16 +335,11 @@ void Gfx_GetApiInfo(cc_string* info);
 /* Updates state when the window's dimensions have changed */
 /* NOTE: This may require recreating the context depending on the backend */
 void Gfx_OnWindowResize(void);
-CC_API void Gfx_SetViewport(int x, int y, int w, int h);
-CC_API void Gfx_SetScissor (int x, int y, int w, int h);

-enum Screen3DS { TOP_SCREEN, BOTTOM_SCREEN };
-#ifdef CC_BUILD_DUALSCREEN
-/* Selects which screen on the 3DS to render to */
-void Gfx_3DS_SetRenderScreen(enum Screen3DS screen);
-#else
-static CC_INLINE void Gfx_3DS_SetRenderScreen(enum Screen3DS screen) { }
-#endif
+/* Anaglyph 3D rendering support */
+void Gfx_Set3DLeft( struct Matrix* proj, struct Matrix* view);
+void Gfx_Set3DRight(struct Matrix* proj, struct Matrix* view);
+void Gfx_End3D(     struct Matrix* proj, struct Matrix* view);

 /* Raises ContextLost event and updates state for lost contexts */
 void Gfx_LoseContext(const char* reason);
@ -287,6 +348,15 @@ void Gfx_RecreateContext(void);
 /* Attempts to restore a lost context */
 cc_bool Gfx_TryRestoreContext(void);

+/* Sets appropriate alpha test/blending for given block draw type */
+void Gfx_SetupAlphaState(cc_uint8 draw);
+/* Undoes changes to alpha test/blending state by Gfx_SetupAlphaState */
+void Gfx_RestoreAlphaState(cc_uint8 draw);
+
+
+/*########################################################################################################################*
+*------------------------------------------------------2D rendering------------------------------------------------------*
+*#########################################################################################################################*/
 /* Renders a 2D flat coloured rectangle */
 void Gfx_Draw2DFlat(int x, int y, int width, int height, PackedCol color);
 /* Renders a 2D flat vertical gradient rectangle */
@ -303,11 +373,6 @@ void Gfx_Begin2D(int width, int height);
 /* NOTE: This means restoring fog/depth test, restoring matrices, etc */
 void Gfx_End2D(void);

-/* Sets appropriate alpha test/blending for given block draw type */
-void Gfx_SetupAlphaState(cc_uint8 draw);
-/* Undoes changes to alpha test/blending state by Gfx_SetupAlphaState */
-void Gfx_RestoreAlphaState(cc_uint8 draw);
-
 /* Statically initialises the position and dimensions of this texture */
 #define Tex_Rect(x,y, width,height) x,y,width,height
 /* Statically initialises the texture coordinate corners of this texture */
--- a/third_party/bearssl/inc/bearssl_block.h
+++ b/third_party/bearssl/inc/bearssl_block.h
@ -299,7 +299,6 @@ extern "C" {
 * | aes_ct    | AES      |        16          | 16, 24 and 32       |
 * | aes_ct64  | AES      |        16          | 16, 24 and 32       |
 * | aes_x86ni | AES      |        16          | 16, 24 and 32       |
- * | aes_pwr8  | AES      |        16          | 16, 24 and 32       |
 * | des_ct    | DES/3DES |         8          | 8, 16 and 24        |
 * | des_tab   | DES/3DES |         8          | 8, 16 and 24        |
 *
@ -335,10 +334,6 @@ extern "C" {
 * `aes_x86ni` exists only on x86 architectures (32-bit and 64-bit). It
 * uses the AES-NI opcodes when available.
 *
- * `aes_pwr8` exists only on PowerPC / POWER architectures (32-bit and
- * 64-bit, both little-endian and big-endian). It uses the AES opcodes
- * present in POWER8 and later.
- *
 * `des_tab` is a classic, table-based implementation of DES/3DES. It
 * is not constant-time.
 *
@ -1860,296 +1855,6 @@ const br_block_ctr_class *br_aes_x86ni_ctr_get_vtable(void);
 */
 const br_block_ctrcbc_class *br_aes_x86ni_ctrcbc_get_vtable(void);

-/*
- * AES implementation using POWER8 opcodes.
- */
-
-/** \brief AES block size (16 bytes). */
-#define br_aes_pwr8_BLOCK_SIZE   16
-
-/**
- * \brief Context for AES subkeys (`aes_pwr8` implementation, CBC encryption).
- *
- * First field is a pointer to the vtable; it is set by the initialisation
- * function. Other fields are not supposed to be accessed by user code.
- */
-typedef struct {
-	/** \brief Pointer to vtable for this context. */
-	const br_block_cbcenc_class *vtable;
-#ifndef BR_DOXYGEN_IGNORE
-	union {
-		unsigned char skni[16 * 15];
-	} skey;
-	unsigned num_rounds;
-#endif
-} br_aes_pwr8_cbcenc_keys;
-
-/**
- * \brief Context for AES subkeys (`aes_pwr8` implementation, CBC decryption).
- *
- * First field is a pointer to the vtable; it is set by the initialisation
- * function. Other fields are not supposed to be accessed by user code.
- */
-typedef struct {
-	/** \brief Pointer to vtable for this context. */
-	const br_block_cbcdec_class *vtable;
-#ifndef BR_DOXYGEN_IGNORE
-	union {
-		unsigned char skni[16 * 15];
-	} skey;
-	unsigned num_rounds;
-#endif
-} br_aes_pwr8_cbcdec_keys;
-
-/**
- * \brief Context for AES subkeys (`aes_pwr8` implementation, CTR encryption
- * and decryption).
- *
- * First field is a pointer to the vtable; it is set by the initialisation
- * function. Other fields are not supposed to be accessed by user code.
- */
-typedef struct {
-	/** \brief Pointer to vtable for this context. */
-	const br_block_ctr_class *vtable;
-#ifndef BR_DOXYGEN_IGNORE
-	union {
-		unsigned char skni[16 * 15];
-	} skey;
-	unsigned num_rounds;
-#endif
-} br_aes_pwr8_ctr_keys;
-
-/**
- * \brief Context for AES subkeys (`aes_pwr8` implementation, CTR encryption
- * and decryption + CBC-MAC).
- *
- * First field is a pointer to the vtable; it is set by the initialisation
- * function. Other fields are not supposed to be accessed by user code.
- */
-typedef struct {
-	/** \brief Pointer to vtable for this context. */
-	const br_block_ctrcbc_class *vtable;
-#ifndef BR_DOXYGEN_IGNORE
-	union {
-		unsigned char skni[16 * 15];
-	} skey;
-	unsigned num_rounds;
-#endif
-} br_aes_pwr8_ctrcbc_keys;
-
-/**
- * \brief Class instance for AES CBC encryption (`aes_pwr8` implementation).
- *
- * Since this implementation might be omitted from the library, or the
- * AES opcode unavailable on the current CPU, a pointer to this class
- * instance should be obtained through `br_aes_pwr8_cbcenc_get_vtable()`.
- */
-extern const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable;
-
-/**
- * \brief Class instance for AES CBC decryption (`aes_pwr8` implementation).
- *
- * Since this implementation might be omitted from the library, or the
- * AES opcode unavailable on the current CPU, a pointer to this class
- * instance should be obtained through `br_aes_pwr8_cbcdec_get_vtable()`.
- */
-extern const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable;
-
-/**
- * \brief Class instance for AES CTR encryption and decryption
- * (`aes_pwr8` implementation).
- *
- * Since this implementation might be omitted from the library, or the
- * AES opcode unavailable on the current CPU, a pointer to this class
- * instance should be obtained through `br_aes_pwr8_ctr_get_vtable()`.
- */
-extern const br_block_ctr_class br_aes_pwr8_ctr_vtable;
-
-/**
- * \brief Class instance for AES CTR encryption/decryption + CBC-MAC
- * (`aes_pwr8` implementation).
- *
- * Since this implementation might be omitted from the library, or the
- * AES opcode unavailable on the current CPU, a pointer to this class
- * instance should be obtained through `br_aes_pwr8_ctrcbc_get_vtable()`.
- */
-extern const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable;
-
-/**
- * \brief Context initialisation (key schedule) for AES CBC encryption
- * (`aes_pwr8` implementation).
- *
- * \param ctx   context to initialise.
- * \param key   secret key.
- * \param len   secret key length (in bytes).
- */
-void br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
-	const void *key, size_t len);
-
-/**
- * \brief Context initialisation (key schedule) for AES CBC decryption
- * (`aes_pwr8` implementation).
- *
- * \param ctx   context to initialise.
- * \param key   secret key.
- * \param len   secret key length (in bytes).
- */
-void br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
-	const void *key, size_t len);
-
-/**
- * \brief Context initialisation (key schedule) for AES CTR encryption
- * and decryption (`aes_pwr8` implementation).
- *
- * \param ctx   context to initialise.
- * \param key   secret key.
- * \param len   secret key length (in bytes).
- */
-void br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
-	const void *key, size_t len);
-
-/**
- * \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
- * (`aes_pwr8` implementation).
- *
- * \param ctx   context to initialise.
- * \param key   secret key.
- * \param len   secret key length (in bytes).
- */
-void br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
-	const void *key, size_t len);
-
-/**
- * \brief CBC encryption with AES (`aes_pwr8` implementation).
- *
- * \param ctx    context (already initialised).
- * \param iv     IV (updated).
- * \param data   data to encrypt (updated).
- * \param len    data length (in bytes, MUST be multiple of 16).
- */
-void br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx, void *iv,
-	void *data, size_t len);
-
-/**
- * \brief CBC decryption with AES (`aes_pwr8` implementation).
- *
- * \param ctx    context (already initialised).
- * \param iv     IV (updated).
- * \param data   data to decrypt (updated).
- * \param len    data length (in bytes, MUST be multiple of 16).
- */
-void br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx, void *iv,
-	void *data, size_t len);
-
-/**
- * \brief CTR encryption and decryption with AES (`aes_pwr8` implementation).
- *
- * \param ctx    context (already initialised).
- * \param iv     IV (constant, 12 bytes).
- * \param cc     initial block counter value.
- * \param data   data to decrypt (updated).
- * \param len    data length (in bytes).
- * \return  new block counter value.
- */
-uint32_t br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
-	const void *iv, uint32_t cc, void *data, size_t len);
-
-/**
- * \brief CTR encryption + CBC-MAC with AES (`aes_pwr8` implementation).
- *
- * \param ctx      context (already initialised).
- * \param ctr      counter for CTR (16 bytes, updated).
- * \param cbcmac   IV for CBC-MAC (updated).
- * \param data     data to encrypt (updated).
- * \param len      data length (in bytes, MUST be a multiple of 16).
- */
-void br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
-	void *ctr, void *cbcmac, void *data, size_t len);
-
-/**
- * \brief CTR decryption + CBC-MAC with AES (`aes_pwr8` implementation).
- *
- * \param ctx      context (already initialised).
- * \param ctr      counter for CTR (16 bytes, updated).
- * \param cbcmac   IV for CBC-MAC (updated).
- * \param data     data to decrypt (updated).
- * \param len      data length (in bytes, MUST be a multiple of 16).
- */
-void br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
-	void *ctr, void *cbcmac, void *data, size_t len);
-
-/**
- * \brief CTR encryption/decryption with AES (`aes_pwr8` implementation).
- *
- * \param ctx      context (already initialised).
- * \param ctr      counter for CTR (16 bytes, updated).
- * \param data     data to MAC (updated).
- * \param len      data length (in bytes, MUST be a multiple of 16).
- */
-void br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
-	void *ctr, void *data, size_t len);
-
-/**
- * \brief CBC-MAC with AES (`aes_pwr8` implementation).
- *
- * \param ctx      context (already initialised).
- * \param cbcmac   IV for CBC-MAC (updated).
- * \param data     data to MAC (unmodified).
- * \param len      data length (in bytes, MUST be a multiple of 16).
- */
-void br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
-	void *cbcmac, const void *data, size_t len);
-
-/**
- * \brief Obtain the `aes_pwr8` AES-CBC (encryption) implementation, if
- * available.
- *
- * This function returns a pointer to `br_aes_pwr8_cbcenc_vtable`, if
- * that implementation was compiled in the library _and_ the POWER8
- * crypto opcodes are available on the currently running CPU. If either
- * of these conditions is not met, then this function returns `NULL`.
- *
- * \return  the `aes_pwr8` AES-CBC (encryption) implementation, or `NULL`.
- */
-const br_block_cbcenc_class *br_aes_pwr8_cbcenc_get_vtable(void);
-
-/**
- * \brief Obtain the `aes_pwr8` AES-CBC (decryption) implementation, if
- * available.
- *
- * This function returns a pointer to `br_aes_pwr8_cbcdec_vtable`, if
- * that implementation was compiled in the library _and_ the POWER8
- * crypto opcodes are available on the currently running CPU. If either
- * of these conditions is not met, then this function returns `NULL`.
- *
- * \return  the `aes_pwr8` AES-CBC (decryption) implementation, or `NULL`.
- */
-const br_block_cbcdec_class *br_aes_pwr8_cbcdec_get_vtable(void);
-
-/**
- * \brief Obtain the `aes_pwr8` AES-CTR implementation, if available.
- *
- * This function returns a pointer to `br_aes_pwr8_ctr_vtable`, if that
- * implementation was compiled in the library _and_ the POWER8 crypto
- * opcodes are available on the currently running CPU. If either of
- * these conditions is not met, then this function returns `NULL`.
- *
- * \return  the `aes_pwr8` AES-CTR implementation, or `NULL`.
- */
-const br_block_ctr_class *br_aes_pwr8_ctr_get_vtable(void);
-
-/**
- * \brief Obtain the `aes_pwr8` AES-CTR + CBC-MAC implementation, if
- * available.
- *
- * This function returns a pointer to `br_aes_pwr8_ctrcbc_vtable`, if
- * that implementation was compiled in the library _and_ the POWER8 AES
- * opcodes are available on the currently running CPU. If either of
- * these conditions is not met, then this function returns `NULL`.
- *
- * \return  the `aes_pwr8` AES-CTR implementation, or `NULL`.
- */
-const br_block_ctrcbc_class *br_aes_pwr8_ctrcbc_get_vtable(void);

 /**
 * \brief Aggregate structure large enough to be used as context for
@ -2162,7 +1867,6 @@ typedef union {
 	br_aes_ct_cbcenc_keys c_ct;
 	br_aes_ct64_cbcenc_keys c_ct64;
 	br_aes_x86ni_cbcenc_keys c_x86ni;
-	br_aes_pwr8_cbcenc_keys c_pwr8;
 } br_aes_gen_cbcenc_keys;

 /**
@ -2176,7 +1880,6 @@ typedef union {
 	br_aes_ct_cbcdec_keys c_ct;
 	br_aes_ct64_cbcdec_keys c_ct64;
 	br_aes_x86ni_cbcdec_keys c_x86ni;
-	br_aes_pwr8_cbcdec_keys c_pwr8;
 } br_aes_gen_cbcdec_keys;

 /**
@ -2190,7 +1893,6 @@ typedef union {
 	br_aes_ct_ctr_keys c_ct;
 	br_aes_ct64_ctr_keys c_ct64;
 	br_aes_x86ni_ctr_keys c_x86ni;
-	br_aes_pwr8_ctr_keys c_pwr8;
 } br_aes_gen_ctr_keys;

 /**
@ -2204,7 +1906,6 @@ typedef union {
 	br_aes_ct_ctrcbc_keys c_ct;
 	br_aes_ct64_ctrcbc_keys c_ct64;
 	br_aes_x86ni_ctrcbc_keys c_x86ni;
-	br_aes_pwr8_ctrcbc_keys c_pwr8;
 } br_aes_gen_ctrcbc_keys;

 /*
--- a/third_party/bearssl/inc/bearssl_hash.h
+++ b/third_party/bearssl/inc/bearssl_hash.h
@ -1313,32 +1313,6 @@ void br_ghash_pclmul(void *y, const void *h, const void *data, size_t len);
 */
 br_ghash br_ghash_pclmul_get(void);

-/**
- * \brief GHASH implementation using the POWER8 opcodes.
- *
- * This implementation is available only on POWER8 platforms (and later).
- * To safely obtain a pointer to this function when supported (or 0
- * otherwise), use `br_ghash_pwr8_get()`.
- *
- * \param y      the array to update.
- * \param h      the GHASH key.
- * \param data   the input data (may be `NULL` if `len` is zero).
- * \param len    the input data length (in bytes).
- */
-void br_ghash_pwr8(void *y, const void *h, const void *data, size_t len);
-
-/**
- * \brief Obtain the `pwr8` GHASH implementation, if available.
- *
- * If the `pwr8` implementation was compiled in the library (depending
- * on the compiler abilities) _and_ the local CPU appears to support the
- * opcode, then this function will return a pointer to the
- * `br_ghash_pwr8()` function. Otherwise, it will return `0`.
- *
- * \return  the `pwr8` GHASH implementation, or `0`.
- */
-br_ghash br_ghash_pwr8_get(void);
-
 #ifdef __cplusplus
 }
 #endif
--- a/third_party/bearssl/src/aes_pwr8.c
+++ b/third_party/bearssl/src/aes_pwr8.c
@ -1,445 +0,0 @@
-/*
- * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining 
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be 
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define BR_POWER_ASM_MACROS   1
-#include "inner.h"
-
-/*
- * This code contains the AES key schedule implementation using the
- * POWER8 opcodes.
- */
-
-#if BR_POWER8
-
-static void
-key_schedule_128(unsigned char *sk, const unsigned char *key)
-{
-	long cc;
-
-	static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B };
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc = 0;
-
-	/*
-	 * We use the VSX instructions for loading and storing the
-	 * key/subkeys, since they support unaligned accesses. The rest
-	 * of the computation is VMX only. VMX register 0 is VSX
-	 * register 32.
-	 */
-	asm volatile (
-
-		/*
-		 * v0 = all-zero word
-		 * v1 = constant -8 / +8, copied into four words
-		 * v2 = current subkey
-		 * v3 = Rcon (x4 words)
-		 * v6 = constant 8, copied into four words
-		 * v7 = constant 0x11B, copied into four words
-		 * v8 = constant for byteswapping words
-		 */
-		vspltisw(0, 0)
-#if BR_POWER8_LE
-		vspltisw(1, -8)
-#else
-		vspltisw(1, 8)
-#endif
-		lxvw4x(34, 0, %[key])
-		vspltisw(3, 1)
-		vspltisw(6, 8)
-		lxvw4x(39, 0, %[fmod])
-#if BR_POWER8_LE
-		lxvw4x(40, 0, %[idx2be])
-#endif
-
-		/*
-		 * First subkey is a copy of the key itself.
-		 */
-#if BR_POWER8_LE
-		vperm(4, 2, 2, 8)
-		stxvw4x(36, 0, %[sk])
-#else
-		stxvw4x(34, 0, %[sk])
-#endif
-
-		/*
-		 * Loop must run 10 times.
-		 */
-		li(%[cc], 10)
-		mtctr(%[cc])
-	label(loop)
-		/* Increment subkey address */
-		addi(%[sk], %[sk], 16)
-
-		/* Compute SubWord(RotWord(temp)) xor Rcon  (into v4, splat) */
-		vrlw(4, 2, 1)
-		vsbox(4, 4)
-#if BR_POWER8_LE
-		vxor(4, 4, 3)
-#else
-		vsldoi(5, 3, 0, 3)
-		vxor(4, 4, 5)
-#endif
-		vspltw(4, 4, 3)
-
-		/* XOR words for next subkey */
-		vsldoi(5, 0, 2, 12)
-		vxor(2, 2, 5)
-		vsldoi(5, 0, 2, 12)
-		vxor(2, 2, 5)
-		vsldoi(5, 0, 2, 12)
-		vxor(2, 2, 5)
-		vxor(2, 2, 4)
-
-		/* Store next subkey */
-#if BR_POWER8_LE
-		vperm(4, 2, 2, 8)
-		stxvw4x(36, 0, %[sk])
-#else
-		stxvw4x(34, 0, %[sk])
-#endif
-
-		/* Update Rcon */
-		vadduwm(3, 3, 3)
-		vsrw(4, 3, 6)
-		vsubuwm(4, 0, 4)
-		vand(4, 4, 7)
-		vxor(3, 3, 4)
-
-		bdnz(loop)
-
-: [sk] "+b" (sk), [cc] "+b" (cc)
-: [key] "b" (key), [fmod] "b" (fmod)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory"
-	);
-}
-
-static void
-key_schedule_192(unsigned char *sk, const unsigned char *key)
-{
-	long cc;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc = 0;
-
-	/*
-	 * We use the VSX instructions for loading and storing the
-	 * key/subkeys, since they support unaligned accesses. The rest
-	 * of the computation is VMX only. VMX register 0 is VSX
-	 * register 32.
-	 */
-	asm volatile (
-
-		/*
-		 * v0 = all-zero word
-		 * v1 = constant -8 / +8, copied into four words
-		 * v2, v3 = current subkey
-		 * v5 = Rcon (x4 words) (already shifted on big-endian)
-		 * v6 = constant 8, copied into four words
-		 * v8 = constant for byteswapping words
-		 *
-		 * The left two words of v3 are ignored.
-		 */
-		vspltisw(0, 0)
-#if BR_POWER8_LE
-		vspltisw(1, -8)
-#else
-		vspltisw(1, 8)
-#endif
-		li(%[cc], 8)
-		lxvw4x(34, 0, %[key])
-		lxvw4x(35, %[cc], %[key])
-		vsldoi(3, 3, 0, 8)
-		vspltisw(5, 1)
-#if !BR_POWER8_LE
-		vsldoi(5, 5, 0, 3)
-#endif
-		vspltisw(6, 8)
-#if BR_POWER8_LE
-		lxvw4x(40, 0, %[idx2be])
-#endif
-
-		/*
-		 * Loop must run 8 times. Each iteration produces 256
-		 * bits of subkeys, with a 64-bit overlap.
-		 */
-		li(%[cc], 8)
-		mtctr(%[cc])
-		li(%[cc], 16)
-	label(loop)
-
-		/*
-		 * Last 6 words in v2:v3l. Compute next 6 words into
-		 * v3r:v4.
-		 */
-		vrlw(10, 3, 1)
-		vsbox(10, 10)
-		vxor(10, 10, 5)
-		vspltw(10, 10, 1)
-		vsldoi(11, 0, 10, 8)
-
-		vsldoi(12, 0, 2, 12)
-		vxor(12, 2, 12)
-		vsldoi(13, 0, 12, 12)
-		vxor(12, 12, 13)
-		vsldoi(13, 0, 12, 12)
-		vxor(12, 12, 13)
-
-		vspltw(13, 12, 3)
-		vxor(13, 13, 3)
-		vsldoi(14, 0, 3, 12)
-		vxor(13, 13, 14)
-
-		vsldoi(4, 12, 13, 8)
-		vsldoi(14, 0, 3, 8)
-		vsldoi(3, 14, 12, 8)
-
-		vxor(3, 3, 11)
-		vxor(4, 4, 10)
-
-		/*
-		 * Update Rcon. Since for a 192-bit key, we use only 8
-		 * such constants, we will not hit the field modulus,
-		 * so a simple shift (addition) works well.
-		 */
-		vadduwm(5, 5, 5)
-
-		/*
-		 * Write out the two left 128-bit words
-		 */
-#if BR_POWER8_LE
-		vperm(10, 2, 2, 8)
-		vperm(11, 3, 3, 8)
-		stxvw4x(42, 0, %[sk])
-		stxvw4x(43, %[cc], %[sk])
-#else
-		stxvw4x(34, 0, %[sk])
-		stxvw4x(35, %[cc], %[sk])
-#endif
-		addi(%[sk], %[sk], 24)
-
-		/*
-		 * Shift words for next iteration.
-		 */
-		vsldoi(2, 3, 4, 8)
-		vsldoi(3, 4, 0, 8)
-
-		bdnz(loop)
-
-		/*
-		 * The loop wrote the first 50 subkey words, but we need
-		 * to produce 52, so we must do one last write.
-		 */
-#if BR_POWER8_LE
-		vperm(10, 2, 2, 8)
-		stxvw4x(42, 0, %[sk])
-#else
-		stxvw4x(34, 0, %[sk])
-#endif
-
-: [sk] "+b" (sk), [cc] "+b" (cc)
-: [key] "b" (key)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
-  "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
-	);
-}
-
-static void
-key_schedule_256(unsigned char *sk, const unsigned char *key)
-{
-	long cc;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc = 0;
-
-	/*
-	 * We use the VSX instructions for loading and storing the
-	 * key/subkeys, since they support unaligned accesses. The rest
-	 * of the computation is VMX only. VMX register 0 is VSX
-	 * register 32.
-	 */
-	asm volatile (
-
-		/*
-		 * v0 = all-zero word
-		 * v1 = constant -8 / +8, copied into four words
-		 * v2, v3 = current subkey
-		 * v6 = Rcon (x4 words) (already shifted on big-endian)
-		 * v7 = constant 8, copied into four words
-		 * v8 = constant for byteswapping words
-		 *
-		 * The left two words of v3 are ignored.
-		 */
-		vspltisw(0, 0)
-#if BR_POWER8_LE
-		vspltisw(1, -8)
-#else
-		vspltisw(1, 8)
-#endif
-		li(%[cc], 16)
-		lxvw4x(34, 0, %[key])
-		lxvw4x(35, %[cc], %[key])
-		vspltisw(6, 1)
-#if !BR_POWER8_LE
-		vsldoi(6, 6, 0, 3)
-#endif
-		vspltisw(7, 8)
-#if BR_POWER8_LE
-		lxvw4x(40, 0, %[idx2be])
-#endif
-
-		/*
-		 * Loop must run 7 times. Each iteration produces two
-		 * subkeys.
-		 */
-		li(%[cc], 7)
-		mtctr(%[cc])
-		li(%[cc], 16)
-	label(loop)
-
-		/*
-		 * Current words are in v2:v3. Compute next word in v4.
-		 */
-		vrlw(10, 3, 1)
-		vsbox(10, 10)
-		vxor(10, 10, 6)
-		vspltw(10, 10, 3)
-
-		vsldoi(4, 0, 2, 12)
-		vxor(4, 2, 4)
-		vsldoi(5, 0, 4, 12)
-		vxor(4, 4, 5)
-		vsldoi(5, 0, 4, 12)
-		vxor(4, 4, 5)
-		vxor(4, 4, 10)
-
-		/*
-		 * Then other word in v5.
-		 */
-		vsbox(10, 4)
-		vspltw(10, 10, 3)
-
-		vsldoi(5, 0, 3, 12)
-		vxor(5, 3, 5)
-		vsldoi(11, 0, 5, 12)
-		vxor(5, 5, 11)
-		vsldoi(11, 0, 5, 12)
-		vxor(5, 5, 11)
-		vxor(5, 5, 10)
-
-		/*
-		 * Update Rcon. Since for a 256-bit key, we use only 7
-		 * such constants, we will not hit the field modulus,
-		 * so a simple shift (addition) works well.
-		 */
-		vadduwm(6, 6, 6)
-
-		/*
-		 * Write out the two left 128-bit words
-		 */
-#if BR_POWER8_LE
-		vperm(10, 2, 2, 8)
-		vperm(11, 3, 3, 8)
-		stxvw4x(42, 0, %[sk])
-		stxvw4x(43, %[cc], %[sk])
-#else
-		stxvw4x(34, 0, %[sk])
-		stxvw4x(35, %[cc], %[sk])
-#endif
-		addi(%[sk], %[sk], 32)
-
-		/*
-		 * Replace v2:v3 with v4:v5.
-		 */
-		vxor(2, 0, 4)
-		vxor(3, 0, 5)
-
-		bdnz(loop)
-
-		/*
-		 * The loop wrote the first 14 subkeys, but we need 15,
-		 * so we must do an extra write.
-		 */
-#if BR_POWER8_LE
-		vperm(10, 2, 2, 8)
-		stxvw4x(42, 0, %[sk])
-#else
-		stxvw4x(34, 0, %[sk])
-#endif
-
-: [sk] "+b" (sk), [cc] "+b" (cc)
-: [key] "b" (key)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
-  "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
-	);
-}
-
-/* see inner.h */
-int
-br_aes_pwr8_supported(void)
-{
-	return 1;
-}
-
-/* see inner.h */
-unsigned
-br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len)
-{
-	switch (len) {
-	case 16:
-		key_schedule_128(sk, key);
-		return 10;
-	case 24:
-		key_schedule_192(sk, key);
-		return 12;
-	default:
-		key_schedule_256(sk, key);
-		return 14;
-	}
-}
-
-#endif
--- a/third_party/bearssl/src/aes_pwr8_cbcdec.c
+++ b/third_party/bearssl/src/aes_pwr8_cbcdec.c
@ -1,670 +0,0 @@
-/*
- * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining 
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be 
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define BR_POWER_ASM_MACROS   1
-#include "inner.h"
-
-#if BR_POWER8
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
-	const void *key, size_t len)
-{
-	ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
-	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
-}
-
-static void
-cbcdec_128(const unsigned char *sk,
-	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
-{
-	long cc0, cc1, cc2, cc3;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc0 = 0;
-	cc1 = 16;
-	cc2 = 32;
-	cc3 = 48;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v10
-		 */
-		lxvw4x(32, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(33, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(34, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(35, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(36, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(37, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(38, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(39, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(40, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(41, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(42, %[cc0], %[sk])
-		li(%[cc0], 0)
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * Load IV into v24.
-		 */
-		lxvw4x(56, 0, %[iv])
-#if BR_POWER8_LE
-		vperm(24, 24, 24, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Load next ciphertext words in v16..v19. Also save them
-		 * in v20..v23.
-		 */
-		lxvw4x(48, %[cc0], %[buf])
-		lxvw4x(49, %[cc1], %[buf])
-		lxvw4x(50, %[cc2], %[buf])
-		lxvw4x(51, %[cc3], %[buf])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-		vand(20, 16, 16)
-		vand(21, 17, 17)
-		vand(22, 18, 18)
-		vand(23, 19, 19)
-
-		/*
-		 * Decrypt the blocks.
-		 */
-		vxor(16, 16, 10)
-		vxor(17, 17, 10)
-		vxor(18, 18, 10)
-		vxor(19, 19, 10)
-		vncipher(16, 16, 9)
-		vncipher(17, 17, 9)
-		vncipher(18, 18, 9)
-		vncipher(19, 19, 9)
-		vncipher(16, 16, 8)
-		vncipher(17, 17, 8)
-		vncipher(18, 18, 8)
-		vncipher(19, 19, 8)
-		vncipher(16, 16, 7)
-		vncipher(17, 17, 7)
-		vncipher(18, 18, 7)
-		vncipher(19, 19, 7)
-		vncipher(16, 16, 6)
-		vncipher(17, 17, 6)
-		vncipher(18, 18, 6)
-		vncipher(19, 19, 6)
-		vncipher(16, 16, 5)
-		vncipher(17, 17, 5)
-		vncipher(18, 18, 5)
-		vncipher(19, 19, 5)
-		vncipher(16, 16, 4)
-		vncipher(17, 17, 4)
-		vncipher(18, 18, 4)
-		vncipher(19, 19, 4)
-		vncipher(16, 16, 3)
-		vncipher(17, 17, 3)
-		vncipher(18, 18, 3)
-		vncipher(19, 19, 3)
-		vncipher(16, 16, 2)
-		vncipher(17, 17, 2)
-		vncipher(18, 18, 2)
-		vncipher(19, 19, 2)
-		vncipher(16, 16, 1)
-		vncipher(17, 17, 1)
-		vncipher(18, 18, 1)
-		vncipher(19, 19, 1)
-		vncipherlast(16, 16, 0)
-		vncipherlast(17, 17, 0)
-		vncipherlast(18, 18, 0)
-		vncipherlast(19, 19, 0)
-
-		/*
-		 * XOR decrypted blocks with IV / previous block.
-		 */
-		vxor(16, 16, 24)
-		vxor(17, 17, 20)
-		vxor(18, 18, 21)
-		vxor(19, 19, 22)
-
-		/*
-		 * Store back result (with byteswap)
-		 */
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-		stxvw4x(48, %[cc0], %[buf])
-		stxvw4x(49, %[cc1], %[buf])
-		stxvw4x(50, %[cc2], %[buf])
-		stxvw4x(51, %[cc3], %[buf])
-
-		/*
-		 * Fourth encrypted block is IV for next run.
-		 */
-		vand(24, 23, 23)
-
-		addi(%[buf], %[buf], 64)
-
-		bdnz(loop)
-
-: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
-  [buf] "+b" (buf)
-: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
-  "ctr", "memory"
-	);
-}
-
-static void
-cbcdec_192(const unsigned char *sk,
-	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
-{
-	long cc0, cc1, cc2, cc3;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc0 = 0;
-	cc1 = 16;
-	cc2 = 32;
-	cc3 = 48;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v12
-		 */
-		lxvw4x(32, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(33, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(34, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(35, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(36, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(37, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(38, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(39, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(40, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(41, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(42, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(43, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(44, %[cc0], %[sk])
-		li(%[cc0], 0)
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * Load IV into v24.
-		 */
-		lxvw4x(56, 0, %[iv])
-#if BR_POWER8_LE
-		vperm(24, 24, 24, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Load next ciphertext words in v16..v19. Also save them
-		 * in v20..v23.
-		 */
-		lxvw4x(48, %[cc0], %[buf])
-		lxvw4x(49, %[cc1], %[buf])
-		lxvw4x(50, %[cc2], %[buf])
-		lxvw4x(51, %[cc3], %[buf])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-		vand(20, 16, 16)
-		vand(21, 17, 17)
-		vand(22, 18, 18)
-		vand(23, 19, 19)
-
-		/*
-		 * Decrypt the blocks.
-		 */
-		vxor(16, 16, 12)
-		vxor(17, 17, 12)
-		vxor(18, 18, 12)
-		vxor(19, 19, 12)
-		vncipher(16, 16, 11)
-		vncipher(17, 17, 11)
-		vncipher(18, 18, 11)
-		vncipher(19, 19, 11)
-		vncipher(16, 16, 10)
-		vncipher(17, 17, 10)
-		vncipher(18, 18, 10)
-		vncipher(19, 19, 10)
-		vncipher(16, 16, 9)
-		vncipher(17, 17, 9)
-		vncipher(18, 18, 9)
-		vncipher(19, 19, 9)
-		vncipher(16, 16, 8)
-		vncipher(17, 17, 8)
-		vncipher(18, 18, 8)
-		vncipher(19, 19, 8)
-		vncipher(16, 16, 7)
-		vncipher(17, 17, 7)
-		vncipher(18, 18, 7)
-		vncipher(19, 19, 7)
-		vncipher(16, 16, 6)
-		vncipher(17, 17, 6)
-		vncipher(18, 18, 6)
-		vncipher(19, 19, 6)
-		vncipher(16, 16, 5)
-		vncipher(17, 17, 5)
-		vncipher(18, 18, 5)
-		vncipher(19, 19, 5)
-		vncipher(16, 16, 4)
-		vncipher(17, 17, 4)
-		vncipher(18, 18, 4)
-		vncipher(19, 19, 4)
-		vncipher(16, 16, 3)
-		vncipher(17, 17, 3)
-		vncipher(18, 18, 3)
-		vncipher(19, 19, 3)
-		vncipher(16, 16, 2)
-		vncipher(17, 17, 2)
-		vncipher(18, 18, 2)
-		vncipher(19, 19, 2)
-		vncipher(16, 16, 1)
-		vncipher(17, 17, 1)
-		vncipher(18, 18, 1)
-		vncipher(19, 19, 1)
-		vncipherlast(16, 16, 0)
-		vncipherlast(17, 17, 0)
-		vncipherlast(18, 18, 0)
-		vncipherlast(19, 19, 0)
-
-		/*
-		 * XOR decrypted blocks with IV / previous block.
-		 */
-		vxor(16, 16, 24)
-		vxor(17, 17, 20)
-		vxor(18, 18, 21)
-		vxor(19, 19, 22)
-
-		/*
-		 * Store back result (with byteswap)
-		 */
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-		stxvw4x(48, %[cc0], %[buf])
-		stxvw4x(49, %[cc1], %[buf])
-		stxvw4x(50, %[cc2], %[buf])
-		stxvw4x(51, %[cc3], %[buf])
-
-		/*
-		 * Fourth encrypted block is IV for next run.
-		 */
-		vand(24, 23, 23)
-
-		addi(%[buf], %[buf], 64)
-
-		bdnz(loop)
-
-: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
-  [buf] "+b" (buf)
-: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
-  "ctr", "memory"
-	);
-}
-
-static void
-cbcdec_256(const unsigned char *sk,
-	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
-{
-	long cc0, cc1, cc2, cc3;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc0 = 0;
-	cc1 = 16;
-	cc2 = 32;
-	cc3 = 48;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v14
-		 */
-		lxvw4x(32, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(33, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(34, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(35, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(36, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(37, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(38, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(39, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(40, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(41, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(42, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(43, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(44, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(45, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(46, %[cc0], %[sk])
-		li(%[cc0], 0)
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * Load IV into v24.
-		 */
-		lxvw4x(56, 0, %[iv])
-#if BR_POWER8_LE
-		vperm(24, 24, 24, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Load next ciphertext words in v16..v19. Also save them
-		 * in v20..v23.
-		 */
-		lxvw4x(48, %[cc0], %[buf])
-		lxvw4x(49, %[cc1], %[buf])
-		lxvw4x(50, %[cc2], %[buf])
-		lxvw4x(51, %[cc3], %[buf])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-		vand(20, 16, 16)
-		vand(21, 17, 17)
-		vand(22, 18, 18)
-		vand(23, 19, 19)
-
-		/*
-		 * Decrypt the blocks.
-		 */
-		vxor(16, 16, 14)
-		vxor(17, 17, 14)
-		vxor(18, 18, 14)
-		vxor(19, 19, 14)
-		vncipher(16, 16, 13)
-		vncipher(17, 17, 13)
-		vncipher(18, 18, 13)
-		vncipher(19, 19, 13)
-		vncipher(16, 16, 12)
-		vncipher(17, 17, 12)
-		vncipher(18, 18, 12)
-		vncipher(19, 19, 12)
-		vncipher(16, 16, 11)
-		vncipher(17, 17, 11)
-		vncipher(18, 18, 11)
-		vncipher(19, 19, 11)
-		vncipher(16, 16, 10)
-		vncipher(17, 17, 10)
-		vncipher(18, 18, 10)
-		vncipher(19, 19, 10)
-		vncipher(16, 16, 9)
-		vncipher(17, 17, 9)
-		vncipher(18, 18, 9)
-		vncipher(19, 19, 9)
-		vncipher(16, 16, 8)
-		vncipher(17, 17, 8)
-		vncipher(18, 18, 8)
-		vncipher(19, 19, 8)
-		vncipher(16, 16, 7)
-		vncipher(17, 17, 7)
-		vncipher(18, 18, 7)
-		vncipher(19, 19, 7)
-		vncipher(16, 16, 6)
-		vncipher(17, 17, 6)
-		vncipher(18, 18, 6)
-		vncipher(19, 19, 6)
-		vncipher(16, 16, 5)
-		vncipher(17, 17, 5)
-		vncipher(18, 18, 5)
-		vncipher(19, 19, 5)
-		vncipher(16, 16, 4)
-		vncipher(17, 17, 4)
-		vncipher(18, 18, 4)
-		vncipher(19, 19, 4)
-		vncipher(16, 16, 3)
-		vncipher(17, 17, 3)
-		vncipher(18, 18, 3)
-		vncipher(19, 19, 3)
-		vncipher(16, 16, 2)
-		vncipher(17, 17, 2)
-		vncipher(18, 18, 2)
-		vncipher(19, 19, 2)
-		vncipher(16, 16, 1)
-		vncipher(17, 17, 1)
-		vncipher(18, 18, 1)
-		vncipher(19, 19, 1)
-		vncipherlast(16, 16, 0)
-		vncipherlast(17, 17, 0)
-		vncipherlast(18, 18, 0)
-		vncipherlast(19, 19, 0)
-
-		/*
-		 * XOR decrypted blocks with IV / previous block.
-		 */
-		vxor(16, 16, 24)
-		vxor(17, 17, 20)
-		vxor(18, 18, 21)
-		vxor(19, 19, 22)
-
-		/*
-		 * Store back result (with byteswap)
-		 */
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-		stxvw4x(48, %[cc0], %[buf])
-		stxvw4x(49, %[cc1], %[buf])
-		stxvw4x(50, %[cc2], %[buf])
-		stxvw4x(51, %[cc3], %[buf])
-
-		/*
-		 * Fourth encrypted block is IV for next run.
-		 */
-		vand(24, 23, 23)
-
-		addi(%[buf], %[buf], 64)
-
-		bdnz(loop)
-
-: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
-  [buf] "+b" (buf)
-: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
-  "ctr", "memory"
-	);
-}
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
-	void *iv, void *data, size_t len)
-{
-	unsigned char nextiv[16];
-	unsigned char *buf;
-
-	if (len == 0) {
-		return;
-	}
-	buf = data;
-	memcpy(nextiv, buf + len - 16, 16);
-	if (len >= 64) {
-		size_t num_blocks;
-		unsigned char tmp[16];
-
-		num_blocks = (len >> 4) & ~(size_t)3;
-		memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
-		switch (ctx->num_rounds) {
-		case 10:
-			cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
-			break;
-		case 12:
-			cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
-			break;
-		default:
-			cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
-			break;
-		}
-		buf += num_blocks << 4;
-		len &= 63;
-		memcpy(iv, tmp, 16);
-	}
-	if (len > 0) {
-		unsigned char tmp[64];
-
-		memcpy(tmp, buf, len);
-		memset(tmp + len, 0, (sizeof tmp) - len);
-		switch (ctx->num_rounds) {
-		case 10:
-			cbcdec_128(ctx->skey.skni, iv, tmp, 4);
-			break;
-		case 12:
-			cbcdec_192(ctx->skey.skni, iv, tmp, 4);
-			break;
-		default:
-			cbcdec_256(ctx->skey.skni, iv, tmp, 4);
-			break;
-		}
-		memcpy(buf, tmp, len);
-	}
-	memcpy(iv, nextiv, 16);
-}
-
-/* see bearssl_block.h */
-const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
-	sizeof(br_aes_pwr8_cbcdec_keys),
-	16,
-	4,
-	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
-		&br_aes_pwr8_cbcdec_init,
-	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
-		&br_aes_pwr8_cbcdec_run
-};
-
-/* see bearssl_block.h */
-const br_block_cbcdec_class *
-br_aes_pwr8_cbcdec_get_vtable(void)
-{
-	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
-}
-
-#else
-
-/* see bearssl_block.h */
-const br_block_cbcdec_class *
-br_aes_pwr8_cbcdec_get_vtable(void)
-{
-	return NULL;
-}
-
-#endif
--- a/third_party/bearssl/src/aes_pwr8_cbcenc.c
+++ b/third_party/bearssl/src/aes_pwr8_cbcenc.c
@ -1,417 +0,0 @@
-/*
- * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining 
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be 
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define BR_POWER_ASM_MACROS   1
-#include "inner.h"
-
-#if BR_POWER8
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
-	const void *key, size_t len)
-{
-	ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
-	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
-}
-
-static void
-cbcenc_128(const unsigned char *sk,
-	const unsigned char *iv, unsigned char *buf, size_t len)
-{
-	long cc;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc = 0;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v10
-		 */
-		lxvw4x(32, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(33, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(34, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(35, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(36, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(37, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(38, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(39, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(40, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(41, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(42, %[cc], %[sk])
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * Load IV into v16.
-		 */
-		lxvw4x(48, 0, %[iv])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Load next plaintext word and XOR with current IV.
-		 */
-		lxvw4x(49, 0, %[buf])
-#if BR_POWER8_LE
-		vperm(17, 17, 17, 15)
-#endif
-		vxor(16, 16, 17)
-
-		/*
-		 * Encrypt the block.
-		 */
-		vxor(16, 16, 0)
-		vcipher(16, 16, 1)
-		vcipher(16, 16, 2)
-		vcipher(16, 16, 3)
-		vcipher(16, 16, 4)
-		vcipher(16, 16, 5)
-		vcipher(16, 16, 6)
-		vcipher(16, 16, 7)
-		vcipher(16, 16, 8)
-		vcipher(16, 16, 9)
-		vcipherlast(16, 16, 10)
-
-		/*
-		 * Store back result (with byteswap)
-		 */
-#if BR_POWER8_LE
-		vperm(17, 16, 16, 15)
-		stxvw4x(49, 0, %[buf])
-#else
-		stxvw4x(48, 0, %[buf])
-#endif
-		addi(%[buf], %[buf], 16)
-
-		bdnz(loop)
-
-: [cc] "+b" (cc), [buf] "+b" (buf)
-: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "ctr", "memory"
-	);
-}
-
-static void
-cbcenc_192(const unsigned char *sk,
-	const unsigned char *iv, unsigned char *buf, size_t len)
-{
-	long cc;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc = 0;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v12
-		 */
-		lxvw4x(32, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(33, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(34, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(35, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(36, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(37, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(38, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(39, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(40, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(41, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(42, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(43, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(44, %[cc], %[sk])
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * Load IV into v16.
-		 */
-		lxvw4x(48, 0, %[iv])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Load next plaintext word and XOR with current IV.
-		 */
-		lxvw4x(49, 0, %[buf])
-#if BR_POWER8_LE
-		vperm(17, 17, 17, 15)
-#endif
-		vxor(16, 16, 17)
-
-		/*
-		 * Encrypt the block.
-		 */
-		vxor(16, 16, 0)
-		vcipher(16, 16, 1)
-		vcipher(16, 16, 2)
-		vcipher(16, 16, 3)
-		vcipher(16, 16, 4)
-		vcipher(16, 16, 5)
-		vcipher(16, 16, 6)
-		vcipher(16, 16, 7)
-		vcipher(16, 16, 8)
-		vcipher(16, 16, 9)
-		vcipher(16, 16, 10)
-		vcipher(16, 16, 11)
-		vcipherlast(16, 16, 12)
-
-		/*
-		 * Store back result (with byteswap)
-		 */
-#if BR_POWER8_LE
-		vperm(17, 16, 16, 15)
-		stxvw4x(49, 0, %[buf])
-#else
-		stxvw4x(48, 0, %[buf])
-#endif
-		addi(%[buf], %[buf], 16)
-
-		bdnz(loop)
-
-: [cc] "+b" (cc), [buf] "+b" (buf)
-: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "ctr", "memory"
-	);
-}
-
-static void
-cbcenc_256(const unsigned char *sk,
-	const unsigned char *iv, unsigned char *buf, size_t len)
-{
-	long cc;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	cc = 0;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v14
-		 */
-		lxvw4x(32, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(33, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(34, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(35, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(36, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(37, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(38, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(39, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(40, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(41, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(42, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(43, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(44, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(45, %[cc], %[sk])
-		addi(%[cc], %[cc], 16)
-		lxvw4x(46, %[cc], %[sk])
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * Load IV into v16.
-		 */
-		lxvw4x(48, 0, %[iv])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Load next plaintext word and XOR with current IV.
-		 */
-		lxvw4x(49, 0, %[buf])
-#if BR_POWER8_LE
-		vperm(17, 17, 17, 15)
-#endif
-		vxor(16, 16, 17)
-
-		/*
-		 * Encrypt the block.
-		 */
-		vxor(16, 16, 0)
-		vcipher(16, 16, 1)
-		vcipher(16, 16, 2)
-		vcipher(16, 16, 3)
-		vcipher(16, 16, 4)
-		vcipher(16, 16, 5)
-		vcipher(16, 16, 6)
-		vcipher(16, 16, 7)
-		vcipher(16, 16, 8)
-		vcipher(16, 16, 9)
-		vcipher(16, 16, 10)
-		vcipher(16, 16, 11)
-		vcipher(16, 16, 12)
-		vcipher(16, 16, 13)
-		vcipherlast(16, 16, 14)
-
-		/*
-		 * Store back result (with byteswap)
-		 */
-#if BR_POWER8_LE
-		vperm(17, 16, 16, 15)
-		stxvw4x(49, 0, %[buf])
-#else
-		stxvw4x(48, 0, %[buf])
-#endif
-		addi(%[buf], %[buf], 16)
-
-		bdnz(loop)
-
-: [cc] "+b" (cc), [buf] "+b" (buf)
-: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "ctr", "memory"
-	);
-}
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
-	void *iv, void *data, size_t len)
-{
-	if (len > 0) {
-		switch (ctx->num_rounds) {
-		case 10:
-			cbcenc_128(ctx->skey.skni, iv, data, len);
-			break;
-		case 12:
-			cbcenc_192(ctx->skey.skni, iv, data, len);
-			break;
-		default:
-			cbcenc_256(ctx->skey.skni, iv, data, len);
-			break;
-		}
-		memcpy(iv, (unsigned char *)data + (len - 16), 16);
-	}
-}
-
-/* see bearssl_block.h */
-const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
-	sizeof(br_aes_pwr8_cbcenc_keys),
-	16,
-	4,
-	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
-		&br_aes_pwr8_cbcenc_init,
-	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
-		&br_aes_pwr8_cbcenc_run
-};
-
-/* see bearssl_block.h */
-const br_block_cbcenc_class *
-br_aes_pwr8_cbcenc_get_vtable(void)
-{
-	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
-}
-
-#else
-
-/* see bearssl_block.h */
-const br_block_cbcenc_class *
-br_aes_pwr8_cbcenc_get_vtable(void)
-{
-	return NULL;
-}
-
-#endif
--- a/third_party/bearssl/src/aes_pwr8_ctr.c
+++ b/third_party/bearssl/src/aes_pwr8_ctr.c
@ -1,717 +0,0 @@
-/*
- * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining 
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be 
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define BR_POWER_ASM_MACROS   1
-#include "inner.h"
-
-#if BR_POWER8
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
-	const void *key, size_t len)
-{
-	ctx->vtable = &br_aes_pwr8_ctr_vtable;
-	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
-}
-
-static void
-ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
-	unsigned char *buf, size_t num_blocks)
-{
-	long cc0, cc1, cc2, cc3;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-	static const uint32_t ctrinc[] = {
-		0, 0, 0, 4
-	};
-
-	cc0 = 0;
-	cc1 = 16;
-	cc2 = 32;
-	cc3 = 48;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v10
-		 */
-		lxvw4x(32, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(33, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(34, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(35, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(36, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(37, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(38, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(39, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(40, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(41, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(42, %[cc0], %[sk])
-		li(%[cc0], 0)
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * v28 = increment for IV counter.
-		 */
-		lxvw4x(60, 0, %[ctrinc])
-
-		/*
-		 * Load IV into v16..v19
-		 */
-		lxvw4x(48, %[cc0], %[ivbuf])
-		lxvw4x(49, %[cc1], %[ivbuf])
-		lxvw4x(50, %[cc2], %[ivbuf])
-		lxvw4x(51, %[cc3], %[ivbuf])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Compute next IV into v24..v27
-		 */
-		vadduwm(24, 16, 28)
-		vadduwm(25, 17, 28)
-		vadduwm(26, 18, 28)
-		vadduwm(27, 19, 28)
-
-		/*
-		 * Load next data blocks. We do this early on but we
-		 * won't need them until IV encryption is done.
-		 */
-		lxvw4x(52, %[cc0], %[buf])
-		lxvw4x(53, %[cc1], %[buf])
-		lxvw4x(54, %[cc2], %[buf])
-		lxvw4x(55, %[cc3], %[buf])
-
-		/*
-		 * Encrypt the current IV.
-		 */
-		vxor(16, 16, 0)
-		vxor(17, 17, 0)
-		vxor(18, 18, 0)
-		vxor(19, 19, 0)
-		vcipher(16, 16, 1)
-		vcipher(17, 17, 1)
-		vcipher(18, 18, 1)
-		vcipher(19, 19, 1)
-		vcipher(16, 16, 2)
-		vcipher(17, 17, 2)
-		vcipher(18, 18, 2)
-		vcipher(19, 19, 2)
-		vcipher(16, 16, 3)
-		vcipher(17, 17, 3)
-		vcipher(18, 18, 3)
-		vcipher(19, 19, 3)
-		vcipher(16, 16, 4)
-		vcipher(17, 17, 4)
-		vcipher(18, 18, 4)
-		vcipher(19, 19, 4)
-		vcipher(16, 16, 5)
-		vcipher(17, 17, 5)
-		vcipher(18, 18, 5)
-		vcipher(19, 19, 5)
-		vcipher(16, 16, 6)
-		vcipher(17, 17, 6)
-		vcipher(18, 18, 6)
-		vcipher(19, 19, 6)
-		vcipher(16, 16, 7)
-		vcipher(17, 17, 7)
-		vcipher(18, 18, 7)
-		vcipher(19, 19, 7)
-		vcipher(16, 16, 8)
-		vcipher(17, 17, 8)
-		vcipher(18, 18, 8)
-		vcipher(19, 19, 8)
-		vcipher(16, 16, 9)
-		vcipher(17, 17, 9)
-		vcipher(18, 18, 9)
-		vcipher(19, 19, 9)
-		vcipherlast(16, 16, 10)
-		vcipherlast(17, 17, 10)
-		vcipherlast(18, 18, 10)
-		vcipherlast(19, 19, 10)
-
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-
-		/*
-		 * Load next plaintext word and XOR with encrypted IV.
-		 */
-		vxor(16, 20, 16)
-		vxor(17, 21, 17)
-		vxor(18, 22, 18)
-		vxor(19, 23, 19)
-		stxvw4x(48, %[cc0], %[buf])
-		stxvw4x(49, %[cc1], %[buf])
-		stxvw4x(50, %[cc2], %[buf])
-		stxvw4x(51, %[cc3], %[buf])
-
-		addi(%[buf], %[buf], 64)
-
-		/*
-		 * Update IV.
-		 */
-		vand(16, 24, 24)
-		vand(17, 25, 25)
-		vand(18, 26, 26)
-		vand(19, 27, 27)
-
-		bdnz(loop)
-
-: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
-  [buf] "+b" (buf)
-: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
-  [ctrinc] "b" (ctrinc)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
-  "ctr", "memory"
-	);
-}
-
-static void
-ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
-	unsigned char *buf, size_t num_blocks)
-{
-	long cc0, cc1, cc2, cc3;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-	static const uint32_t ctrinc[] = {
-		0, 0, 0, 4
-	};
-
-	cc0 = 0;
-	cc1 = 16;
-	cc2 = 32;
-	cc3 = 48;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v12
-		 */
-		lxvw4x(32, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(33, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(34, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(35, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(36, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(37, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(38, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(39, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(40, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(41, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(42, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(43, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(44, %[cc0], %[sk])
-		li(%[cc0], 0)
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * v28 = increment for IV counter.
-		 */
-		lxvw4x(60, 0, %[ctrinc])
-
-		/*
-		 * Load IV into v16..v19
-		 */
-		lxvw4x(48, %[cc0], %[ivbuf])
-		lxvw4x(49, %[cc1], %[ivbuf])
-		lxvw4x(50, %[cc2], %[ivbuf])
-		lxvw4x(51, %[cc3], %[ivbuf])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Compute next IV into v24..v27
-		 */
-		vadduwm(24, 16, 28)
-		vadduwm(25, 17, 28)
-		vadduwm(26, 18, 28)
-		vadduwm(27, 19, 28)
-
-		/*
-		 * Load next data blocks. We do this early on but we
-		 * won't need them until IV encryption is done.
-		 */
-		lxvw4x(52, %[cc0], %[buf])
-		lxvw4x(53, %[cc1], %[buf])
-		lxvw4x(54, %[cc2], %[buf])
-		lxvw4x(55, %[cc3], %[buf])
-
-		/*
-		 * Encrypt the current IV.
-		 */
-		vxor(16, 16, 0)
-		vxor(17, 17, 0)
-		vxor(18, 18, 0)
-		vxor(19, 19, 0)
-		vcipher(16, 16, 1)
-		vcipher(17, 17, 1)
-		vcipher(18, 18, 1)
-		vcipher(19, 19, 1)
-		vcipher(16, 16, 2)
-		vcipher(17, 17, 2)
-		vcipher(18, 18, 2)
-		vcipher(19, 19, 2)
-		vcipher(16, 16, 3)
-		vcipher(17, 17, 3)
-		vcipher(18, 18, 3)
-		vcipher(19, 19, 3)
-		vcipher(16, 16, 4)
-		vcipher(17, 17, 4)
-		vcipher(18, 18, 4)
-		vcipher(19, 19, 4)
-		vcipher(16, 16, 5)
-		vcipher(17, 17, 5)
-		vcipher(18, 18, 5)
-		vcipher(19, 19, 5)
-		vcipher(16, 16, 6)
-		vcipher(17, 17, 6)
-		vcipher(18, 18, 6)
-		vcipher(19, 19, 6)
-		vcipher(16, 16, 7)
-		vcipher(17, 17, 7)
-		vcipher(18, 18, 7)
-		vcipher(19, 19, 7)
-		vcipher(16, 16, 8)
-		vcipher(17, 17, 8)
-		vcipher(18, 18, 8)
-		vcipher(19, 19, 8)
-		vcipher(16, 16, 9)
-		vcipher(17, 17, 9)
-		vcipher(18, 18, 9)
-		vcipher(19, 19, 9)
-		vcipher(16, 16, 10)
-		vcipher(17, 17, 10)
-		vcipher(18, 18, 10)
-		vcipher(19, 19, 10)
-		vcipher(16, 16, 11)
-		vcipher(17, 17, 11)
-		vcipher(18, 18, 11)
-		vcipher(19, 19, 11)
-		vcipherlast(16, 16, 12)
-		vcipherlast(17, 17, 12)
-		vcipherlast(18, 18, 12)
-		vcipherlast(19, 19, 12)
-
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-
-		/*
-		 * Load next plaintext word and XOR with encrypted IV.
-		 */
-		vxor(16, 20, 16)
-		vxor(17, 21, 17)
-		vxor(18, 22, 18)
-		vxor(19, 23, 19)
-		stxvw4x(48, %[cc0], %[buf])
-		stxvw4x(49, %[cc1], %[buf])
-		stxvw4x(50, %[cc2], %[buf])
-		stxvw4x(51, %[cc3], %[buf])
-
-		addi(%[buf], %[buf], 64)
-
-		/*
-		 * Update IV.
-		 */
-		vand(16, 24, 24)
-		vand(17, 25, 25)
-		vand(18, 26, 26)
-		vand(19, 27, 27)
-
-		bdnz(loop)
-
-: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
-  [buf] "+b" (buf)
-: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
-  [ctrinc] "b" (ctrinc)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
-  "ctr", "memory"
-	);
-}
-
-static void
-ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
-	unsigned char *buf, size_t num_blocks)
-{
-	long cc0, cc1, cc2, cc3;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-	static const uint32_t ctrinc[] = {
-		0, 0, 0, 4
-	};
-
-	cc0 = 0;
-	cc1 = 16;
-	cc2 = 32;
-	cc3 = 48;
-	asm volatile (
-
-		/*
-		 * Load subkeys into v0..v14
-		 */
-		lxvw4x(32, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(33, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(34, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(35, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(36, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(37, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(38, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(39, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(40, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(41, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(42, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(43, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(44, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(45, %[cc0], %[sk])
-		addi(%[cc0], %[cc0], 16)
-		lxvw4x(46, %[cc0], %[sk])
-		li(%[cc0], 0)
-
-#if BR_POWER8_LE
-		/*
-		 * v15 = constant for byteswapping words
-		 */
-		lxvw4x(47, 0, %[idx2be])
-#endif
-		/*
-		 * v28 = increment for IV counter.
-		 */
-		lxvw4x(60, 0, %[ctrinc])
-
-		/*
-		 * Load IV into v16..v19
-		 */
-		lxvw4x(48, %[cc0], %[ivbuf])
-		lxvw4x(49, %[cc1], %[ivbuf])
-		lxvw4x(50, %[cc2], %[ivbuf])
-		lxvw4x(51, %[cc3], %[ivbuf])
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-
-		mtctr(%[num_blocks])
-	label(loop)
-		/*
-		 * Compute next IV into v24..v27
-		 */
-		vadduwm(24, 16, 28)
-		vadduwm(25, 17, 28)
-		vadduwm(26, 18, 28)
-		vadduwm(27, 19, 28)
-
-		/*
-		 * Load next data blocks. We do this early on but we
-		 * won't need them until IV encryption is done.
-		 */
-		lxvw4x(52, %[cc0], %[buf])
-		lxvw4x(53, %[cc1], %[buf])
-		lxvw4x(54, %[cc2], %[buf])
-		lxvw4x(55, %[cc3], %[buf])
-
-		/*
-		 * Encrypt the current IV.
-		 */
-		vxor(16, 16, 0)
-		vxor(17, 17, 0)
-		vxor(18, 18, 0)
-		vxor(19, 19, 0)
-		vcipher(16, 16, 1)
-		vcipher(17, 17, 1)
-		vcipher(18, 18, 1)
-		vcipher(19, 19, 1)
-		vcipher(16, 16, 2)
-		vcipher(17, 17, 2)
-		vcipher(18, 18, 2)
-		vcipher(19, 19, 2)
-		vcipher(16, 16, 3)
-		vcipher(17, 17, 3)
-		vcipher(18, 18, 3)
-		vcipher(19, 19, 3)
-		vcipher(16, 16, 4)
-		vcipher(17, 17, 4)
-		vcipher(18, 18, 4)
-		vcipher(19, 19, 4)
-		vcipher(16, 16, 5)
-		vcipher(17, 17, 5)
-		vcipher(18, 18, 5)
-		vcipher(19, 19, 5)
-		vcipher(16, 16, 6)
-		vcipher(17, 17, 6)
-		vcipher(18, 18, 6)
-		vcipher(19, 19, 6)
-		vcipher(16, 16, 7)
-		vcipher(17, 17, 7)
-		vcipher(18, 18, 7)
-		vcipher(19, 19, 7)
-		vcipher(16, 16, 8)
-		vcipher(17, 17, 8)
-		vcipher(18, 18, 8)
-		vcipher(19, 19, 8)
-		vcipher(16, 16, 9)
-		vcipher(17, 17, 9)
-		vcipher(18, 18, 9)
-		vcipher(19, 19, 9)
-		vcipher(16, 16, 10)
-		vcipher(17, 17, 10)
-		vcipher(18, 18, 10)
-		vcipher(19, 19, 10)
-		vcipher(16, 16, 11)
-		vcipher(17, 17, 11)
-		vcipher(18, 18, 11)
-		vcipher(19, 19, 11)
-		vcipher(16, 16, 12)
-		vcipher(17, 17, 12)
-		vcipher(18, 18, 12)
-		vcipher(19, 19, 12)
-		vcipher(16, 16, 13)
-		vcipher(17, 17, 13)
-		vcipher(18, 18, 13)
-		vcipher(19, 19, 13)
-		vcipherlast(16, 16, 14)
-		vcipherlast(17, 17, 14)
-		vcipherlast(18, 18, 14)
-		vcipherlast(19, 19, 14)
-
-#if BR_POWER8_LE
-		vperm(16, 16, 16, 15)
-		vperm(17, 17, 17, 15)
-		vperm(18, 18, 18, 15)
-		vperm(19, 19, 19, 15)
-#endif
-
-		/*
-		 * Load next plaintext word and XOR with encrypted IV.
-		 */
-		vxor(16, 20, 16)
-		vxor(17, 21, 17)
-		vxor(18, 22, 18)
-		vxor(19, 23, 19)
-		stxvw4x(48, %[cc0], %[buf])
-		stxvw4x(49, %[cc1], %[buf])
-		stxvw4x(50, %[cc2], %[buf])
-		stxvw4x(51, %[cc3], %[buf])
-
-		addi(%[buf], %[buf], 64)
-
-		/*
-		 * Update IV.
-		 */
-		vand(16, 24, 24)
-		vand(17, 25, 25)
-		vand(18, 26, 26)
-		vand(19, 27, 27)
-
-		bdnz(loop)
-
-: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
-  [buf] "+b" (buf)
-: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
-  [ctrinc] "b" (ctrinc)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
-  "ctr", "memory"
-	);
-}
-
-/* see bearssl_block.h */
-uint32_t
-br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
-	const void *iv, uint32_t cc, void *data, size_t len)
-{
-	unsigned char *buf;
-	unsigned char ivbuf[64];
-
-	buf = data;
-	memcpy(ivbuf +  0, iv, 12);
-	memcpy(ivbuf + 16, iv, 12);
-	memcpy(ivbuf + 32, iv, 12);
-	memcpy(ivbuf + 48, iv, 12);
-	if (len >= 64) {
-		br_enc32be(ivbuf + 12, cc + 0);
-		br_enc32be(ivbuf + 28, cc + 1);
-		br_enc32be(ivbuf + 44, cc + 2);
-		br_enc32be(ivbuf + 60, cc + 3);
-		switch (ctx->num_rounds) {
-		case 10:
-			ctr_128(ctx->skey.skni, ivbuf, buf,
-				(len >> 4) & ~(size_t)3);
-			break;
-		case 12:
-			ctr_192(ctx->skey.skni, ivbuf, buf,
-				(len >> 4) & ~(size_t)3);
-			break;
-		default:
-			ctr_256(ctx->skey.skni, ivbuf, buf,
-				(len >> 4) & ~(size_t)3);
-			break;
-		}
-		cc += (len >> 4) & ~(size_t)3;
-		buf += len & ~(size_t)63;
-		len &= 63;
-	}
-	if (len > 0) {
-		unsigned char tmp[64];
-
-		memcpy(tmp, buf, len);
-		memset(tmp + len, 0, (sizeof tmp) - len);
-		br_enc32be(ivbuf + 12, cc + 0);
-		br_enc32be(ivbuf + 28, cc + 1);
-		br_enc32be(ivbuf + 44, cc + 2);
-		br_enc32be(ivbuf + 60, cc + 3);
-		switch (ctx->num_rounds) {
-		case 10:
-			ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
-			break;
-		case 12:
-			ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
-			break;
-		default:
-			ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
-			break;
-		}
-		memcpy(buf, tmp, len);
-		cc += (len + 15) >> 4;
-	}
-	return cc;
-}
-
-/* see bearssl_block.h */
-const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
-	sizeof(br_aes_pwr8_ctr_keys),
-	16,
-	4,
-	(void (*)(const br_block_ctr_class **, const void *, size_t))
-		&br_aes_pwr8_ctr_init,
-	(uint32_t (*)(const br_block_ctr_class *const *,
-		const void *, uint32_t, void *, size_t))
-		&br_aes_pwr8_ctr_run
-};
-
-/* see bearssl_block.h */
-const br_block_ctr_class *
-br_aes_pwr8_ctr_get_vtable(void)
-{
-	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
-}
-
-#else
-
-/* see bearssl_block.h */
-const br_block_ctr_class *
-br_aes_pwr8_ctr_get_vtable(void)
-{
-	return NULL;
-}
-
-#endif
--- a/third_party/bearssl/src/aes_pwr8_ctrcbc.c
+++ b/third_party/bearssl/src/aes_pwr8_ctrcbc.c
@ -1,946 +0,0 @@
-/*
- * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining 
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be 
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define BR_POWER_ASM_MACROS   1
-#include "inner.h"
-
-#if BR_POWER8
-
-/* see bearssl_block.h */
-const br_block_ctrcbc_class *
-br_aes_pwr8_ctrcbc_get_vtable(void)
-{
-	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
-}
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
-	const void *key, size_t len)
-{
-	ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
-	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
-}
-
-/*
- * Register conventions for CTR + CBC-MAC:
- *
- *   AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
- *   Register v15 contains the byteswap index register (little-endian only)
- *   Register v16 contains the CTR counter value
- *   Register v17 contains the CBC-MAC current value
- *   Registers v18 to v27 are scratch
- *   Counter increment uses v28, v29 and v30
- *
- * For CTR alone:
- *  
- *   AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
- *   Register v15 contains the byteswap index register (little-endian only)
- *   Registers v16 to v19 contain the CTR counter values (four blocks)
- *   Registers v20 to v27 are scratch
- *   Counter increment uses v28, v29 and v30
- */
-
-#define LOAD_SUBKEYS_128 \
-		lxvw4x(32, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(33, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(34, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(35, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(36, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(37, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(38, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(39, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(40, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(41, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(42, %[cc], %[sk])
-
-#define LOAD_SUBKEYS_192 \
-		LOAD_SUBKEYS_128 \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(43, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(44, %[cc], %[sk])
-
-#define LOAD_SUBKEYS_256 \
-		LOAD_SUBKEYS_192 \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(45, %[cc], %[sk])   \
-		addi(%[cc], %[cc], 16)     \
-		lxvw4x(46, %[cc], %[sk])
-
-#define BLOCK_ENCRYPT_128(x) \
-		vxor(x, x, 0) \
-		vcipher(x, x, 1) \
-		vcipher(x, x, 2) \
-		vcipher(x, x, 3) \
-		vcipher(x, x, 4) \
-		vcipher(x, x, 5) \
-		vcipher(x, x, 6) \
-		vcipher(x, x, 7) \
-		vcipher(x, x, 8) \
-		vcipher(x, x, 9) \
-		vcipherlast(x, x, 10)
-
-#define BLOCK_ENCRYPT_192(x) \
-		vxor(x, x, 0) \
-		vcipher(x, x, 1) \
-		vcipher(x, x, 2) \
-		vcipher(x, x, 3) \
-		vcipher(x, x, 4) \
-		vcipher(x, x, 5) \
-		vcipher(x, x, 6) \
-		vcipher(x, x, 7) \
-		vcipher(x, x, 8) \
-		vcipher(x, x, 9) \
-		vcipher(x, x, 10) \
-		vcipher(x, x, 11) \
-		vcipherlast(x, x, 12)
-
-#define BLOCK_ENCRYPT_256(x) \
-		vxor(x, x, 0) \
-		vcipher(x, x, 1) \
-		vcipher(x, x, 2) \
-		vcipher(x, x, 3) \
-		vcipher(x, x, 4) \
-		vcipher(x, x, 5) \
-		vcipher(x, x, 6) \
-		vcipher(x, x, 7) \
-		vcipher(x, x, 8) \
-		vcipher(x, x, 9) \
-		vcipher(x, x, 10) \
-		vcipher(x, x, 11) \
-		vcipher(x, x, 12) \
-		vcipher(x, x, 13) \
-		vcipherlast(x, x, 14)
-
-#define BLOCK_ENCRYPT_X2_128(x, y) \
-		vxor(x, x, 0) \
-		vxor(y, y, 0) \
-		vcipher(x, x, 1) \
-		vcipher(y, y, 1) \
-		vcipher(x, x, 2) \
-		vcipher(y, y, 2) \
-		vcipher(x, x, 3) \
-		vcipher(y, y, 3) \
-		vcipher(x, x, 4) \
-		vcipher(y, y, 4) \
-		vcipher(x, x, 5) \
-		vcipher(y, y, 5) \
-		vcipher(x, x, 6) \
-		vcipher(y, y, 6) \
-		vcipher(x, x, 7) \
-		vcipher(y, y, 7) \
-		vcipher(x, x, 8) \
-		vcipher(y, y, 8) \
-		vcipher(x, x, 9) \
-		vcipher(y, y, 9) \
-		vcipherlast(x, x, 10) \
-		vcipherlast(y, y, 10)
-
-#define BLOCK_ENCRYPT_X2_192(x, y) \
-		vxor(x, x, 0) \
-		vxor(y, y, 0) \
-		vcipher(x, x, 1) \
-		vcipher(y, y, 1) \
-		vcipher(x, x, 2) \
-		vcipher(y, y, 2) \
-		vcipher(x, x, 3) \
-		vcipher(y, y, 3) \
-		vcipher(x, x, 4) \
-		vcipher(y, y, 4) \
-		vcipher(x, x, 5) \
-		vcipher(y, y, 5) \
-		vcipher(x, x, 6) \
-		vcipher(y, y, 6) \
-		vcipher(x, x, 7) \
-		vcipher(y, y, 7) \
-		vcipher(x, x, 8) \
-		vcipher(y, y, 8) \
-		vcipher(x, x, 9) \
-		vcipher(y, y, 9) \
-		vcipher(x, x, 10) \
-		vcipher(y, y, 10) \
-		vcipher(x, x, 11) \
-		vcipher(y, y, 11) \
-		vcipherlast(x, x, 12) \
-		vcipherlast(y, y, 12)
-
-#define BLOCK_ENCRYPT_X2_256(x, y) \
-		vxor(x, x, 0) \
-		vxor(y, y, 0) \
-		vcipher(x, x, 1) \
-		vcipher(y, y, 1) \
-		vcipher(x, x, 2) \
-		vcipher(y, y, 2) \
-		vcipher(x, x, 3) \
-		vcipher(y, y, 3) \
-		vcipher(x, x, 4) \
-		vcipher(y, y, 4) \
-		vcipher(x, x, 5) \
-		vcipher(y, y, 5) \
-		vcipher(x, x, 6) \
-		vcipher(y, y, 6) \
-		vcipher(x, x, 7) \
-		vcipher(y, y, 7) \
-		vcipher(x, x, 8) \
-		vcipher(y, y, 8) \
-		vcipher(x, x, 9) \
-		vcipher(y, y, 9) \
-		vcipher(x, x, 10) \
-		vcipher(y, y, 10) \
-		vcipher(x, x, 11) \
-		vcipher(y, y, 11) \
-		vcipher(x, x, 12) \
-		vcipher(y, y, 12) \
-		vcipher(x, x, 13) \
-		vcipher(y, y, 13) \
-		vcipherlast(x, x, 14) \
-		vcipherlast(y, y, 14)
-
-#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
-		vxor(x0, x0, 0) \
-		vxor(x1, x1, 0) \
-		vxor(x2, x2, 0) \
-		vxor(x3, x3, 0) \
-		vcipher(x0, x0, 1) \
-		vcipher(x1, x1, 1) \
-		vcipher(x2, x2, 1) \
-		vcipher(x3, x3, 1) \
-		vcipher(x0, x0, 2) \
-		vcipher(x1, x1, 2) \
-		vcipher(x2, x2, 2) \
-		vcipher(x3, x3, 2) \
-		vcipher(x0, x0, 3) \
-		vcipher(x1, x1, 3) \
-		vcipher(x2, x2, 3) \
-		vcipher(x3, x3, 3) \
-		vcipher(x0, x0, 4) \
-		vcipher(x1, x1, 4) \
-		vcipher(x2, x2, 4) \
-		vcipher(x3, x3, 4) \
-		vcipher(x0, x0, 5) \
-		vcipher(x1, x1, 5) \
-		vcipher(x2, x2, 5) \
-		vcipher(x3, x3, 5) \
-		vcipher(x0, x0, 6) \
-		vcipher(x1, x1, 6) \
-		vcipher(x2, x2, 6) \
-		vcipher(x3, x3, 6) \
-		vcipher(x0, x0, 7) \
-		vcipher(x1, x1, 7) \
-		vcipher(x2, x2, 7) \
-		vcipher(x3, x3, 7) \
-		vcipher(x0, x0, 8) \
-		vcipher(x1, x1, 8) \
-		vcipher(x2, x2, 8) \
-		vcipher(x3, x3, 8) \
-		vcipher(x0, x0, 9) \
-		vcipher(x1, x1, 9) \
-		vcipher(x2, x2, 9) \
-		vcipher(x3, x3, 9) \
-		vcipherlast(x0, x0, 10) \
-		vcipherlast(x1, x1, 10) \
-		vcipherlast(x2, x2, 10) \
-		vcipherlast(x3, x3, 10)
-
-#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
-		vxor(x0, x0, 0) \
-		vxor(x1, x1, 0) \
-		vxor(x2, x2, 0) \
-		vxor(x3, x3, 0) \
-		vcipher(x0, x0, 1) \
-		vcipher(x1, x1, 1) \
-		vcipher(x2, x2, 1) \
-		vcipher(x3, x3, 1) \
-		vcipher(x0, x0, 2) \
-		vcipher(x1, x1, 2) \
-		vcipher(x2, x2, 2) \
-		vcipher(x3, x3, 2) \
-		vcipher(x0, x0, 3) \
-		vcipher(x1, x1, 3) \
-		vcipher(x2, x2, 3) \
-		vcipher(x3, x3, 3) \
-		vcipher(x0, x0, 4) \
-		vcipher(x1, x1, 4) \
-		vcipher(x2, x2, 4) \
-		vcipher(x3, x3, 4) \
-		vcipher(x0, x0, 5) \
-		vcipher(x1, x1, 5) \
-		vcipher(x2, x2, 5) \
-		vcipher(x3, x3, 5) \
-		vcipher(x0, x0, 6) \
-		vcipher(x1, x1, 6) \
-		vcipher(x2, x2, 6) \
-		vcipher(x3, x3, 6) \
-		vcipher(x0, x0, 7) \
-		vcipher(x1, x1, 7) \
-		vcipher(x2, x2, 7) \
-		vcipher(x3, x3, 7) \
-		vcipher(x0, x0, 8) \
-		vcipher(x1, x1, 8) \
-		vcipher(x2, x2, 8) \
-		vcipher(x3, x3, 8) \
-		vcipher(x0, x0, 9) \
-		vcipher(x1, x1, 9) \
-		vcipher(x2, x2, 9) \
-		vcipher(x3, x3, 9) \
-		vcipher(x0, x0, 10) \
-		vcipher(x1, x1, 10) \
-		vcipher(x2, x2, 10) \
-		vcipher(x3, x3, 10) \
-		vcipher(x0, x0, 11) \
-		vcipher(x1, x1, 11) \
-		vcipher(x2, x2, 11) \
-		vcipher(x3, x3, 11) \
-		vcipherlast(x0, x0, 12) \
-		vcipherlast(x1, x1, 12) \
-		vcipherlast(x2, x2, 12) \
-		vcipherlast(x3, x3, 12)
-
-#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
-		vxor(x0, x0, 0) \
-		vxor(x1, x1, 0) \
-		vxor(x2, x2, 0) \
-		vxor(x3, x3, 0) \
-		vcipher(x0, x0, 1) \
-		vcipher(x1, x1, 1) \
-		vcipher(x2, x2, 1) \
-		vcipher(x3, x3, 1) \
-		vcipher(x0, x0, 2) \
-		vcipher(x1, x1, 2) \
-		vcipher(x2, x2, 2) \
-		vcipher(x3, x3, 2) \
-		vcipher(x0, x0, 3) \
-		vcipher(x1, x1, 3) \
-		vcipher(x2, x2, 3) \
-		vcipher(x3, x3, 3) \
-		vcipher(x0, x0, 4) \
-		vcipher(x1, x1, 4) \
-		vcipher(x2, x2, 4) \
-		vcipher(x3, x3, 4) \
-		vcipher(x0, x0, 5) \
-		vcipher(x1, x1, 5) \
-		vcipher(x2, x2, 5) \
-		vcipher(x3, x3, 5) \
-		vcipher(x0, x0, 6) \
-		vcipher(x1, x1, 6) \
-		vcipher(x2, x2, 6) \
-		vcipher(x3, x3, 6) \
-		vcipher(x0, x0, 7) \
-		vcipher(x1, x1, 7) \
-		vcipher(x2, x2, 7) \
-		vcipher(x3, x3, 7) \
-		vcipher(x0, x0, 8) \
-		vcipher(x1, x1, 8) \
-		vcipher(x2, x2, 8) \
-		vcipher(x3, x3, 8) \
-		vcipher(x0, x0, 9) \
-		vcipher(x1, x1, 9) \
-		vcipher(x2, x2, 9) \
-		vcipher(x3, x3, 9) \
-		vcipher(x0, x0, 10) \
-		vcipher(x1, x1, 10) \
-		vcipher(x2, x2, 10) \
-		vcipher(x3, x3, 10) \
-		vcipher(x0, x0, 11) \
-		vcipher(x1, x1, 11) \
-		vcipher(x2, x2, 11) \
-		vcipher(x3, x3, 11) \
-		vcipher(x0, x0, 12) \
-		vcipher(x1, x1, 12) \
-		vcipher(x2, x2, 12) \
-		vcipher(x3, x3, 12) \
-		vcipher(x0, x0, 13) \
-		vcipher(x1, x1, 13) \
-		vcipher(x2, x2, 13) \
-		vcipher(x3, x3, 13) \
-		vcipherlast(x0, x0, 14) \
-		vcipherlast(x1, x1, 14) \
-		vcipherlast(x2, x2, 14) \
-		vcipherlast(x3, x3, 14)
-
-#if BR_POWER8_LE
-static const uint32_t idx2be[] = {
-	0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-};
-#define BYTESWAP_INIT     lxvw4x(47, 0, %[idx2be])
-#define BYTESWAP(x)       vperm(x, x, x, 15)
-#define BYTESWAPX(d, s)   vperm(d, s, s, 15)
-#define BYTESWAP_REG      , [idx2be] "b" (idx2be)
-#else
-#define BYTESWAP_INIT
-#define BYTESWAP(x)
-#define BYTESWAPX(d, s)   vand(d, s, s)
-#define BYTESWAP_REG
-#endif
-
-static const uint32_t ctrinc[] = {
-	0, 0, 0, 1
-};
-static const uint32_t ctrinc_x4[] = {
-	0, 0, 0, 4
-};
-#define INCR_128_INIT      lxvw4x(60, 0, %[ctrinc])
-#define INCR_128_X4_INIT   lxvw4x(60, 0, %[ctrinc_x4])
-#define INCR_128(d, s) \
-		vaddcuw(29, s, 28) \
-		vadduwm(d, s, 28) \
-		vsldoi(30, 29, 29, 4) \
-		vaddcuw(29, d, 30) \
-		vadduwm(d, d, 30) \
-		vsldoi(30, 29, 29, 4) \
-		vaddcuw(29, d, 30) \
-		vadduwm(d, d, 30) \
-		vsldoi(30, 29, 29, 4) \
-		vadduwm(d, d, 30)
-
-#define MKCTR(size) \
-static void \
-ctr_ ## size(const unsigned char *sk, \
-	unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
-{ \
-	long cc, cc0, cc1, cc2, cc3; \
- \
-	cc = 0; \
-	cc0 = 0; \
-	cc1 = 16; \
-	cc2 = 32; \
-	cc3 = 48; \
-	asm volatile ( \
- \
-		/* \
-		 * Load subkeys into v0..v10 \
-		 */ \
-		LOAD_SUBKEYS_ ## size \
-		li(%[cc], 0) \
- \
-		BYTESWAP_INIT \
-		INCR_128_X4_INIT \
- \
-		/* \
-		 * Load current CTR counters into v16 to v19. \
-		 */ \
-		lxvw4x(48, %[cc0], %[ctrbuf]) \
-		lxvw4x(49, %[cc1], %[ctrbuf]) \
-		lxvw4x(50, %[cc2], %[ctrbuf]) \
-		lxvw4x(51, %[cc3], %[ctrbuf]) \
-		BYTESWAP(16) \
-		BYTESWAP(17) \
-		BYTESWAP(18) \
-		BYTESWAP(19) \
- \
-		mtctr(%[num_blocks_x4]) \
- \
-	label(loop) \
-		/* \
-		 * Compute next counter values into v20..v23. \
-		 */ \
-		INCR_128(20, 16) \
-		INCR_128(21, 17) \
-		INCR_128(22, 18) \
-		INCR_128(23, 19) \
- \
-		/* \
-		 * Encrypt counter values and XOR into next data blocks. \
-		 */ \
-		lxvw4x(56, %[cc0], %[buf]) \
-		lxvw4x(57, %[cc1], %[buf]) \
-		lxvw4x(58, %[cc2], %[buf]) \
-		lxvw4x(59, %[cc3], %[buf]) \
-		BYTESWAP(24) \
-		BYTESWAP(25) \
-		BYTESWAP(26) \
-		BYTESWAP(27) \
-		BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
-		vxor(16, 16, 24) \
-		vxor(17, 17, 25) \
-		vxor(18, 18, 26) \
-		vxor(19, 19, 27) \
-		BYTESWAP(16) \
-		BYTESWAP(17) \
-		BYTESWAP(18) \
-		BYTESWAP(19) \
-		stxvw4x(48, %[cc0], %[buf]) \
-		stxvw4x(49, %[cc1], %[buf]) \
-		stxvw4x(50, %[cc2], %[buf]) \
-		stxvw4x(51, %[cc3], %[buf]) \
- \
-		/* \
-		 * Update counters and data pointer. \
-		 */ \
-		vand(16, 20, 20) \
-		vand(17, 21, 21) \
-		vand(18, 22, 22) \
-		vand(19, 23, 23) \
-		addi(%[buf], %[buf], 64) \
- \
-		bdnz(loop) \
- \
-		/* \
-		 * Write back new counter values. \
-		 */ \
-		BYTESWAP(16) \
-		BYTESWAP(17) \
-		BYTESWAP(18) \
-		BYTESWAP(19) \
-		stxvw4x(48, %[cc0], %[ctrbuf]) \
-		stxvw4x(49, %[cc1], %[ctrbuf]) \
-		stxvw4x(50, %[cc2], %[ctrbuf]) \
-		stxvw4x(51, %[cc3], %[ctrbuf]) \
- \
-: [cc] "+b" (cc), [buf] "+b" (buf), \
-	[cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
-: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
-	[num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
-	BYTESWAP_REG \
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
-  "v30", "ctr", "memory" \
-	); \
-}
-
-MKCTR(128)
-MKCTR(192)
-MKCTR(256)
-
-#define MKCBCMAC(size) \
-static void \
-cbcmac_ ## size(const unsigned char *sk, \
-	unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
-{ \
-	long cc; \
- \
-	cc = 0; \
-	asm volatile ( \
- \
-		/* \
-		 * Load subkeys into v0..v10 \
-		 */ \
-		LOAD_SUBKEYS_ ## size \
-		li(%[cc], 0) \
- \
-		BYTESWAP_INIT \
- \
-		/* \
-		 * Load current CBC-MAC value into v16. \
-		 */ \
-		lxvw4x(48, %[cc], %[cbcmac]) \
-		BYTESWAP(16) \
- \
-		mtctr(%[num_blocks]) \
- \
-	label(loop) \
-		/* \
-		 * Load next block, XOR into current CBC-MAC value, \
-		 * and then encrypt it. \
-		 */ \
-		lxvw4x(49, %[cc], %[buf]) \
-		BYTESWAP(17) \
-		vxor(16, 16, 17) \
-		BLOCK_ENCRYPT_ ## size(16) \
-		addi(%[buf], %[buf], 16) \
- \
-		bdnz(loop) \
- \
-		/* \
-		 * Write back new CBC-MAC value. \
-		 */ \
-		BYTESWAP(16) \
-		stxvw4x(48, %[cc], %[cbcmac]) \
- \
-: [cc] "+b" (cc), [buf] "+b" (buf) \
-: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
-	BYTESWAP_REG \
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
-  "v30", "ctr", "memory" \
-	); \
-}
-
-MKCBCMAC(128)
-MKCBCMAC(192)
-MKCBCMAC(256)
-
-#define MKENCRYPT(size) \
-static void \
-ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
-	unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
-	size_t num_blocks) \
-{ \
-	long cc; \
- \
-	cc = 0; \
-	asm volatile ( \
- \
-		/* \
-		 * Load subkeys into v0..v10 \
-		 */ \
-		LOAD_SUBKEYS_ ## size \
-		li(%[cc], 0) \
- \
-		BYTESWAP_INIT \
-		INCR_128_INIT \
- \
-		/* \
-		 * Load current CTR counter into v16, and current \
-		 * CBC-MAC IV into v17. \
-		 */ \
-		lxvw4x(48, %[cc], %[ctr]) \
-		lxvw4x(49, %[cc], %[cbcmac]) \
-		BYTESWAP(16) \
-		BYTESWAP(17) \
- \
-		/* \
-		 * At each iteration, we do two parallel encryption: \
-		 *  - new counter value for encryption of the next block; \
-		 *  - CBC-MAC over the previous encrypted block. \
-		 * Thus, each plaintext block implies two AES instances, \
-		 * over two successive iterations. This requires a single \
-		 * counter encryption before the loop, and a single \
-		 * CBC-MAC encryption after the loop. \
-		 */ \
- \
-		/* \
-		 * Encrypt first block (into v20). \
-		 */ \
-		lxvw4x(52, %[cc], %[buf]) \
-		BYTESWAP(20) \
-		INCR_128(22, 16) \
-		BLOCK_ENCRYPT_ ## size(16) \
-		vxor(20, 20, 16) \
-		BYTESWAPX(21, 20) \
-		stxvw4x(53, %[cc], %[buf]) \
-		vand(16, 22, 22) \
-		addi(%[buf], %[buf], 16) \
- \
-		/* \
-		 * Load loop counter; skip the loop if there is only \
-		 * one block in total (already handled by the boundary \
-		 * conditions). \
-		 */ \
-		mtctr(%[num_blocks]) \
-		bdz(fastexit) \
- \
-	label(loop) \
-		/* \
-		 * Upon loop entry: \
-		 *    v16   counter value for next block \
-		 *    v17   current CBC-MAC value \
-		 *    v20   encrypted previous block \
-		 */ \
-		vxor(17, 17, 20) \
-		INCR_128(22, 16) \
-		lxvw4x(52, %[cc], %[buf]) \
-		BYTESWAP(20) \
-		BLOCK_ENCRYPT_X2_ ## size(16, 17) \
-		vxor(20, 20, 16) \
-		BYTESWAPX(21, 20) \
-		stxvw4x(53, %[cc], %[buf]) \
-		addi(%[buf], %[buf], 16) \
-		vand(16, 22, 22) \
- \
-		bdnz(loop) \
- \
-	label(fastexit) \
-		vxor(17, 17, 20) \
-		BLOCK_ENCRYPT_ ## size(17) \
-		BYTESWAP(16) \
-		BYTESWAP(17) \
-		stxvw4x(48, %[cc], %[ctr]) \
-		stxvw4x(49, %[cc], %[cbcmac]) \
- \
-: [cc] "+b" (cc), [buf] "+b" (buf) \
-: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
-	[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
-	BYTESWAP_REG \
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
-  "v30", "ctr", "memory" \
-	); \
-}
-
-MKENCRYPT(128)
-MKENCRYPT(192)
-MKENCRYPT(256)
-
-#define MKDECRYPT(size) \
-static void \
-ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
-	unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
-	size_t num_blocks) \
-{ \
-	long cc; \
- \
-	cc = 0; \
-	asm volatile ( \
- \
-		/* \
-		 * Load subkeys into v0..v10 \
-		 */ \
-		LOAD_SUBKEYS_ ## size \
-		li(%[cc], 0) \
- \
-		BYTESWAP_INIT \
-		INCR_128_INIT \
- \
-		/* \
-		 * Load current CTR counter into v16, and current \
-		 * CBC-MAC IV into v17. \
-		 */ \
-		lxvw4x(48, %[cc], %[ctr]) \
-		lxvw4x(49, %[cc], %[cbcmac]) \
-		BYTESWAP(16) \
-		BYTESWAP(17) \
- \
-		/* \
-		 * At each iteration, we do two parallel encryption: \
-		 *  - new counter value for decryption of the next block; \
-		 *  - CBC-MAC over the next encrypted block. \
-		 * Each iteration performs the two AES instances related \
-		 * to the current block; there is thus no need for some \
-		 * extra pre-loop and post-loop work as in encryption. \
-		 */ \
- \
-		mtctr(%[num_blocks]) \
- \
-	label(loop) \
-		/* \
-		 * Upon loop entry: \
-		 *    v16   counter value for next block \
-		 *    v17   current CBC-MAC value \
-		 */ \
-		lxvw4x(52, %[cc], %[buf]) \
-		BYTESWAP(20) \
-		vxor(17, 17, 20) \
-		INCR_128(22, 16) \
-		BLOCK_ENCRYPT_X2_ ## size(16, 17) \
-		vxor(20, 20, 16) \
-		BYTESWAPX(21, 20) \
-		stxvw4x(53, %[cc], %[buf]) \
-		addi(%[buf], %[buf], 16) \
-		vand(16, 22, 22) \
- \
-		bdnz(loop) \
- \
-		/* \
-		 * Store back counter and CBC-MAC value. \
-		 */ \
-		BYTESWAP(16) \
-		BYTESWAP(17) \
-		stxvw4x(48, %[cc], %[ctr]) \
-		stxvw4x(49, %[cc], %[cbcmac]) \
- \
-: [cc] "+b" (cc), [buf] "+b" (buf) \
-: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
-	[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
-	BYTESWAP_REG \
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
-  "v30", "ctr", "memory" \
-	); \
-}
-
-MKDECRYPT(128)
-MKDECRYPT(192)
-MKDECRYPT(256)
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
-	void *ctr, void *cbcmac, void *data, size_t len)
-{
-	if (len == 0) {
-		return;
-	}
-	switch (ctx->num_rounds) {
-	case 10:
-		ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
-		break;
-	case 12:
-		ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
-		break;
-	default:
-		ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
-		break;
-	}
-}
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
-	void *ctr, void *cbcmac, void *data, size_t len)
-{
-	if (len == 0) {
-		return;
-	}
-	switch (ctx->num_rounds) {
-	case 10:
-		ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
-		break;
-	case 12:
-		ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
-		break;
-	default:
-		ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
-		break;
-	}
-}
-
-static inline void
-incr_ctr(void *dst, const void *src)
-{
-	uint64_t hi, lo;
-
-	hi = br_dec64be(src);
-	lo = br_dec64be((const unsigned char *)src + 8);
-	lo ++;
-	hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
-	br_enc64be(dst, hi);
-	br_enc64be((unsigned char *)dst + 8, lo);
-}
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
-	void *ctr, void *data, size_t len)
-{
-	unsigned char ctrbuf[64];
-
-	memcpy(ctrbuf, ctr, 16);
-	incr_ctr(ctrbuf + 16, ctrbuf);
-	incr_ctr(ctrbuf + 32, ctrbuf + 16);
-	incr_ctr(ctrbuf + 48, ctrbuf + 32);
-	if (len >= 64) {
-		switch (ctx->num_rounds) {
-		case 10:
-			ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
-			break;
-		case 12:
-			ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
-			break;
-		default:
-			ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
-			break;
-		}
-		data = (unsigned char *)data + (len & ~(size_t)63);
-		len &= 63;
-	}
-	if (len > 0) {
-		unsigned char tmp[64];
-
-		if (len >= 32) {
-			if (len >= 48) {
-				memcpy(ctr, ctrbuf + 48, 16);
-			} else {
-				memcpy(ctr, ctrbuf + 32, 16);
-			}
-		} else {
-			if (len >= 16) {
-				memcpy(ctr, ctrbuf + 16, 16);
-			}
-		}
-		memcpy(tmp, data, len);
-		memset(tmp + len, 0, (sizeof tmp) - len);
-		switch (ctx->num_rounds) {
-		case 10:
-			ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
-			break;
-		case 12:
-			ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
-			break;
-		default:
-			ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
-			break;
-		}
-		memcpy(data, tmp, len);
-	} else {
-		memcpy(ctr, ctrbuf, 16);
-	}
-}
-
-/* see bearssl_block.h */
-void
-br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
-	void *cbcmac, const void *data, size_t len)
-{
-	if (len > 0) {
-		switch (ctx->num_rounds) {
-		case 10:
-			cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
-			break;
-		case 12:
-			cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
-			break;
-		default:
-			cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
-			break;
-		}
-	}
-}
-
-/* see bearssl_block.h */
-const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
-	sizeof(br_aes_pwr8_ctrcbc_keys),
-	16,
-	4,
-	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
-		&br_aes_pwr8_ctrcbc_init,
-	(void (*)(const br_block_ctrcbc_class *const *,
-		void *, void *, void *, size_t))
-		&br_aes_pwr8_ctrcbc_encrypt,
-	(void (*)(const br_block_ctrcbc_class *const *,
-		void *, void *, void *, size_t))
-		&br_aes_pwr8_ctrcbc_decrypt,
-	(void (*)(const br_block_ctrcbc_class *const *,
-		void *, void *, size_t))
-		&br_aes_pwr8_ctrcbc_ctr,
-	(void (*)(const br_block_ctrcbc_class *const *,
-		void *, const void *, size_t))
-		&br_aes_pwr8_ctrcbc_mac
-};
-
-#else
-
-/* see bearssl_block.h */
-const br_block_ctrcbc_class *
-br_aes_pwr8_ctrcbc_get_vtable(void)
-{
-	return NULL;
-}
-
-#endif
--- a/third_party/bearssl/src/ghash_pwr8.c
+++ b/third_party/bearssl/src/ghash_pwr8.c
@ -1,411 +0,0 @@
-/*
- * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
- *
- * Permission is hereby granted, free of charge, to any person obtaining 
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be 
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#define BR_POWER_ASM_MACROS   1
-#include "inner.h"
-
-/*
- * This is the GHASH implementation that leverages the POWER8 opcodes.
- */
-
-#if BR_POWER8
-
-/*
- * Some symbolic names for registers.
- *   HB0 = 16 bytes of value 0
- *   HB1 = 16 bytes of value 1
- *   HB2 = 16 bytes of value 2
- *   HB6 = 16 bytes of value 6
- *   HB7 = 16 bytes of value 7
- *   TT0, TT1 and TT2 are temporaries
- *
- * BSW holds the pattern for byteswapping 32-bit words; this is set only
- * on little-endian systems. XBSW is the same register with the +32 offset
- * for access with the VSX opcodes.
- */
-#define HB0     0
-#define HB1     1
-#define HB2     2
-#define HB6     3
-#define HB7     4
-#define TT0     5
-#define TT1     6
-#define TT2     7
-
-#define BSW     8
-#define XBSW   40
-
-/*
- * Macro to initialise the constants.
- */
-#define INIT \
-		vxor(HB0, HB0, HB0) \
-		vspltisb(HB1, 1) \
-		vspltisb(HB2, 2) \
-		vspltisb(HB6, 6) \
-		vspltisb(HB7, 7) \
-		INIT_BSW
-
-/*
- * Fix endianness of a value after reading it or before writing it, if
- * necessary.
- */
-#if BR_POWER8_LE
-#define INIT_BSW         lxvw4x(XBSW, 0, %[idx2be])
-#define FIX_ENDIAN(xx)   vperm(xx, xx, xx, BSW)
-#else
-#define INIT_BSW
-#define FIX_ENDIAN(xx)
-#endif
-
-/*
- * Left-shift x0:x1 by one bit to the left. This is a corrective action
- * needed because GHASH is defined in full little-endian specification,
- * while the opcodes use full big-endian convention, so the 255-bit product
- * ends up one bit to the right.
- */
-#define SL_256(x0, x1) \
-		vsldoi(TT0, HB0, x1, 1) \
-		vsl(x0, x0, HB1) \
-		vsr(TT0, TT0, HB7) \
-		vsl(x1, x1, HB1) \
-		vxor(x0, x0, TT0)
-
-/*
- * Reduce x0:x1 in GF(2^128), result in xd (register xd may be the same as
- * x0 or x1, or a different register). x0 and x1 are modified.
- */
-#define REDUCE_F128(xd, x0, x1) \
-		vxor(x0, x0, x1) \
-		vsr(TT0, x1, HB1) \
-		vsr(TT1, x1, HB2) \
-		vsr(TT2, x1, HB7) \
-		vxor(x0, x0, TT0) \
-		vxor(TT1, TT1, TT2) \
-		vxor(x0, x0, TT1) \
-		vsldoi(x1, x1, HB0, 15) \
-		vsl(TT1, x1, HB6) \
-		vsl(TT2, x1, HB1) \
-		vxor(x1, TT1, TT2) \
-		vsr(TT0, x1, HB1) \
-		vsr(TT1, x1, HB2) \
-		vsr(TT2, x1, HB7) \
-		vxor(x0, x0, x1) \
-		vxor(x0, x0, TT0) \
-		vxor(TT1, TT1, TT2) \
-		vxor(xd, x0, TT1)
-
-/* see bearssl_hash.h */
-void
-br_ghash_pwr8(void *y, const void *h, const void *data, size_t len)
-{
-	const unsigned char *buf1, *buf2;
-	size_t num4, num1;
-	unsigned char tmp[64];
-	long cc0, cc1, cc2, cc3;
-
-#if BR_POWER8_LE
-	static const uint32_t idx2be[] = {
-		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
-	};
-#endif
-
-	buf1 = data;
-
-	/*
-	 * Assembly code requires data into two chunks; first chunk
-	 * must contain a number of blocks which is a multiple of 4.
-	 * Since the processing for the first chunk is faster, we want
-	 * to make it as big as possible.
-	 *
-	 * For the remainder, there are two possibilities:
-	 *  -- if the remainder size is a multiple of 16, then use it
-	 *     in place;
-	 *  -- otherwise, copy it to the tmp[] array and pad it with
-	 *     zeros.
-	 */
-	num4 = len >> 6;
-	buf2 = buf1 + (num4 << 6);
-	len &= 63;
-	num1 = (len + 15) >> 4;
-	if ((len & 15) != 0) {
-		memcpy(tmp, buf2, len);
-		memset(tmp + len, 0, (num1 << 4) - len);
-		buf2 = tmp;
-	}
-
-	cc0 =  0;
-	cc1 = 16;
-	cc2 = 32;
-	cc3 = 48;
-	asm volatile (
-		INIT
-
-		/*
-		 * Load current h (denoted hereafter h1) in v9.
-		 */
-		lxvw4x(41, 0, %[h])
-		FIX_ENDIAN(9)
-
-		/*
-		 * Load current y into v28.
-		 */
-		lxvw4x(60, 0, %[y])
-		FIX_ENDIAN(28)
-
-		/*
-		 * Split h1 into three registers:
-		 *   v17 = h1_1:h1_0
-		 *   v18 =    0:h1_0
-		 *   v19 = h1_1:0
-		 */
-		xxpermdi(49, 41, 41, 2)
-		vsldoi(18, HB0, 9, 8)
-		vsldoi(19, 9, HB0, 8)
-
-		/*
-		 * If num4 is 0, skip directly to the second chunk.
-		 */
-		cmpldi(%[num4], 0)
-		beq(chunk1)
-
-		/*
-		 * Compute h2 = h*h in v10.
-		 */
-		vpmsumd(10, 18, 18)
-		vpmsumd(11, 19, 19)
-		SL_256(10, 11)
-		REDUCE_F128(10, 10, 11)
-
-		/*
-		 * Compute h3 = h*h*h in v11.
-		 * We first split h2 into:
-		 *   v10 = h2_0:h2_1
-		 *   v11 =    0:h2_0
-		 *   v12 = h2_1:0
-		 * Then we do the product with h1, and reduce into v11.
-		 */
-		vsldoi(11, HB0, 10, 8)
-		vsldoi(12, 10, HB0, 8)
-		vpmsumd(13, 10, 17)
-		vpmsumd(11, 11, 18)
-		vpmsumd(12, 12, 19)
-		vsldoi(14, HB0, 13, 8)
-		vsldoi(15, 13, HB0, 8)
-		vxor(11, 11, 14)
-		vxor(12, 12, 15)
-		SL_256(11, 12)
-		REDUCE_F128(11, 11, 12)
-
-		/*
-		 * Compute h4 = h*h*h*h in v12. This is done by squaring h2.
-		 */
-		vsldoi(12, HB0, 10, 8)
-		vsldoi(13, 10, HB0, 8)
-		vpmsumd(12, 12, 12)
-		vpmsumd(13, 13, 13)
-		SL_256(12, 13)
-		REDUCE_F128(12, 12, 13)
-
-		/*
-		 * Repack h1, h2, h3 and h4:
-		 *   v13 = h4_0:h3_0
-		 *   v14 = h4_1:h3_1
-		 *   v15 = h2_0:h1_0
-		 *   v16 = h2_1:h1_1
-		 */
-		xxpermdi(45, 44, 43, 0)
-		xxpermdi(46, 44, 43, 3)
-		xxpermdi(47, 42, 41, 0)
-		xxpermdi(48, 42, 41, 3)
-
-		/*
-		 * Loop for each group of four blocks.
-		 */
-		mtctr(%[num4])
-	label(loop4)
-		/*
-		 * Read the four next blocks.
-		 *   v20 = y + a0 = b0
-		 *   v21 = a1     = b1
-		 *   v22 = a2     = b2
-		 *   v23 = a3     = b3
-		 */
-		lxvw4x(52, %[cc0], %[buf1])
-		lxvw4x(53, %[cc1], %[buf1])
-		lxvw4x(54, %[cc2], %[buf1])
-		lxvw4x(55, %[cc3], %[buf1])
-		FIX_ENDIAN(20)
-		FIX_ENDIAN(21)
-		FIX_ENDIAN(22)
-		FIX_ENDIAN(23)
-		addi(%[buf1], %[buf1], 64)
-		vxor(20, 20, 28)
-
-		/*
-		 * Repack the blocks into v9, v10, v11 and v12.
-		 *   v9  = b0_0:b1_0
-		 *   v10 = b0_1:b1_1
-		 *   v11 = b2_0:b3_0
-		 *   v12 = b2_1:b3_1
-		 */
-		xxpermdi(41, 52, 53, 0)
-		xxpermdi(42, 52, 53, 3)
-		xxpermdi(43, 54, 55, 0)
-		xxpermdi(44, 54, 55, 3)
-
-		/*
-		 * Compute the products.
-		 *   v20 = b0_0*h4_0 + b1_0*h3_0
-		 *   v21 = b0_1*h4_0 + b1_1*h3_0
-		 *   v22 = b0_0*h4_1 + b1_0*h3_1
-		 *   v23 = b0_1*h4_1 + b1_1*h3_1
-		 *   v24 = b2_0*h2_0 + b3_0*h1_0
-		 *   v25 = b2_1*h2_0 + b3_1*h1_0
-		 *   v26 = b2_0*h2_1 + b3_0*h1_1
-		 *   v27 = b2_1*h2_1 + b3_1*h1_1
-		 */
-		vpmsumd(20, 13,  9)
-		vpmsumd(21, 13, 10)
-		vpmsumd(22, 14,  9)
-		vpmsumd(23, 14, 10)
-		vpmsumd(24, 15, 11)
-		vpmsumd(25, 15, 12)
-		vpmsumd(26, 16, 11)
-		vpmsumd(27, 16, 12)
-
-		/*
-		 * Sum products into a single 256-bit result in v11:v12.
-		 */
-		vxor(11, 20, 24)
-		vxor(12, 23, 27)
-		vxor( 9, 21, 22)
-		vxor(10, 25, 26)
-		vxor(20,  9, 10)
-		vsldoi( 9, HB0, 20, 8)
-		vsldoi(10, 20, HB0, 8)
-		vxor(11, 11, 9)
-		vxor(12, 12, 10)
-
-		/*
-		 * Fix and reduce in GF(2^128); this is the new y (in v28).
-		 */
-		SL_256(11, 12)
-		REDUCE_F128(28, 11, 12)
-
-		/*
-		 * Loop for next group of four blocks.
-		 */
-		bdnz(loop4)
-
-		/*
-		 * Process second chunk, one block at a time.
-		 */
-	label(chunk1)
-		cmpldi(%[num1], 0)
-		beq(done)
-
-		mtctr(%[num1])
-	label(loop1)
-		/*
-		 * Load next data block and XOR it into y.
-		 */
-		lxvw4x(41, 0, %[buf2])
-#if BR_POWER8_LE
-		FIX_ENDIAN(9)
-#endif
-		addi(%[buf2], %[buf2], 16)
-		vxor(9, 28, 9)
-
-		/*
-		 * Split y into doublewords:
-		 *   v9  = y_0:y_1
-		 *   v10 =   0:y_0
-		 *   v11 = y_1:0
-		 */
-		vsldoi(10, HB0, 9, 8)
-		vsldoi(11, 9, HB0, 8)
-
-		/*
-		 * Compute products with h:
-		 *   v12 = y_0 * h_0
-		 *   v13 = y_1 * h_1
-		 *   v14 = y_1 * h_0 + y_0 * h_1
-		 */
-		vpmsumd(14,  9, 17)
-		vpmsumd(12, 10, 18)
-		vpmsumd(13, 11, 19)
-
-		/*
-		 * Propagate v14 into v12:v13 to finalise product.
-		 */
-		vsldoi(10, HB0, 14, 8)
-		vsldoi(11, 14, HB0, 8)
-		vxor(12, 12, 10)
-		vxor(13, 13, 11)
-
-		/*
-		 * Fix result and reduce into v28 (next value for y).
-		 */
-		SL_256(12, 13)
-		REDUCE_F128(28, 12, 13)
-		bdnz(loop1)
-
-	label(done)
-		/*
-		 * Write back the new y.
-		 */
-		FIX_ENDIAN(28)
-		stxvw4x(60, 0, %[y])
-
-: [buf1] "+b" (buf1), [buf2] "+b" (buf2)
-: [y] "b" (y), [h] "b" (h), [num4] "b" (num4), [num1] "b" (num1),
-  [cc0] "b" (cc0), [cc1] "b" (cc1), [cc2] "b" (cc2), [cc3] "b" (cc3)
-#if BR_POWER8_LE
-	, [idx2be] "b" (idx2be)
-#endif
-: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
-  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
-  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
-  "ctr", "memory"
-	);
-}
-
-/* see bearssl_hash.h */
-br_ghash
-br_ghash_pwr8_get(void)
-{
-	return &br_ghash_pwr8;
-}
-
-#else
-
-/* see bearssl_hash.h */
-br_ghash
-br_ghash_pwr8_get(void)
-{
-	return 0;
-}
-
-#endif
--- a/third_party/bearssl/src/inner.h
+++ b/third_party/bearssl/src/inner.h
@ -1913,19 +1913,6 @@ unsigned br_aes_x86ni_keysched_enc(unsigned char *skni,
 unsigned br_aes_x86ni_keysched_dec(unsigned char *skni,
 	const void *key, size_t len);

-/*
- * Test support for AES POWER8 opcodes.
- */
-int br_aes_pwr8_supported(void);
-
-/*
- * AES key schedule, using POWER8 instructions. This yields the
- * subkeys in the encryption direction. Number of rounds is returned.
- * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
- */
-unsigned br_aes_pwr8_keysched(unsigned char *skni,
-	const void *key, size_t len);
-
 /* ==================================================================== */
 /*
 * RSA.
--- a/third_party/bearssl/src/settings.c
+++ b/third_party/bearssl/src/settings.c
@ -229,13 +229,6 @@ static const br_config_option config[] = {
 	 1
 #else
 	 0
-#endif
-	},
-	{ "BR_POWER8",
-#if BR_POWER8
-	 1
-#else
-	 0
 #endif
 	},
 	{ "BR_RDRAND",
--- a/third_party/bearssl/src/ssl_engine_default_aescbc.c
+++ b/third_party/bearssl/src/ssl_engine_default_aescbc.c
@ -28,7 +28,7 @@
 void
 br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc)
 {
-#if BR_AES_X86NI || BR_POWER8
+#if BR_AES_X86NI
 	const br_block_cbcenc_class *ienc;
 	const br_block_cbcdec_class *idec;
 #endif
@ -44,14 +44,6 @@ br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc)
 		return;
 	}
 #endif
-#if BR_POWER8
-	ienc = br_aes_pwr8_cbcenc_get_vtable();
-	idec = br_aes_pwr8_cbcdec_get_vtable();
-	if (ienc != NULL && idec != NULL) {
-		br_ssl_engine_set_aes_cbc(cc, ienc, idec);
-		return;
-	}
-#endif
 #if BR_64
 	br_ssl_engine_set_aes_cbc(cc,
 		&br_aes_ct64_cbcenc_vtable,
--- a/third_party/bearssl/src/ssl_engine_default_aesccm.c
+++ b/third_party/bearssl/src/ssl_engine_default_aesccm.c
@ -28,7 +28,7 @@
 void
 br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc)
 {
-#if BR_AES_X86NI || BR_POWER8
+#if BR_AES_X86NI
 	const br_block_ctrcbc_class *ictrcbc;
 #endif

@ -46,17 +46,6 @@ br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc)
 		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
 #endif
 	}
-#elif BR_POWER8
-	ictrcbc = br_aes_pwr8_ctrcbc_get_vtable();
-	if (ictrcbc != NULL) {
-		br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc);
-	} else {
-#if BR_64
-		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
-#else
-		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
-#endif
-	}
 #else
 #if BR_64
 	br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
--- a/third_party/bearssl/src/ssl_engine_default_aesgcm.c
+++ b/third_party/bearssl/src/ssl_engine_default_aesgcm.c
@ -28,7 +28,7 @@
 void
 br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
 {
-#if BR_AES_X86NI || BR_POWER8
+#if BR_AES_X86NI
 	const br_block_ctr_class *ictr;
 	br_ghash ighash;
 #endif
@ -47,17 +47,6 @@ br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
 		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
 #endif
 	}
-#elif BR_POWER8
-	ictr = br_aes_pwr8_ctr_get_vtable();
-	if (ictr != NULL) {
-		br_ssl_engine_set_aes_ctr(cc, ictr);
-	} else {
-#if BR_64
-		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
-#else
-		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
-#endif
-	}
 #else
 #if BR_64
 	br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
@ -72,13 +61,6 @@ br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
 		return;
 	}
 #endif
-#if BR_POWER8
-	ighash = br_ghash_pwr8_get();
-	if (ighash != 0) {
-		br_ssl_engine_set_ghash(cc, ighash);
-		return;
-	}
-#endif
 #if BR_LOMUL
 	br_ssl_engine_set_ghash(cc, &br_ghash_ctmul32);
 #elif BR_64