WIP on adding BearSSL SSL backend

2023-09-13 21:52:35 +10:00 · 2023-09-13 21:52:35 +10:00 · 797f75140f
parent 08e93f7562
commit 797f75140f
313 changed files with 77542 additions and 0 deletions
--- a/license.txt
+++ b/license.txt
@ -150,6 +150,33 @@ SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
 ==============================================================================


+BearSSL license
+==============================================================================
+Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+
+Permission is hereby granted, free of charge, to any person obtaining 
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be 
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+==============================================================================
+
+
+
 FreeType license
 ==================
                    The FreeType Project LICENSE
--- a/src/SSL.c
+++ b/src/SSL.c
@ -417,6 +417,174 @@ cc_result SSL_Free(void* ctx_) {
 	Mem_Free(ctx);
 	return 0; 
 }
+#elif defined CC_BUILD_BEARSSL
+#include "bearssl.h"
+#define CERT_ATTRIBUTES
+#include "../misc/RootCerts.h"
+#include "String.h"
+// https://github.com/unkaktus/bearssl/blob/master/samples/client_basic.c#L283
+
+typedef struct SSLContext {
+	br_ssl_client_context sc;
+	br_x509_minimal_context xc;
+	unsigned char iobuf[BR_SSL_BUFSIZE_BIDI];
+	br_sslio_context ioc;
+} SSLContext;
+
+static cc_bool _verifyCerts;
+
+
+void SSLBackend_Init(cc_bool verifyCerts) {
+	_verifyCerts = verifyCerts; // TODO support
+}
+cc_bool SSLBackend_DescribeError(cc_result res, cc_string* dst) { return false; }
+
+static int sock_read(void *ctx, unsigned char *buf, size_t len) {
+	cc_uint32 read;
+	cc_result res = Socket_Read((int)ctx, buf, len, &read);
+	
+	if (res) return -1;
+	return read;
+}
+static int sock_write(void *ctx, const unsigned char *buf, size_t len) {
+	cc_uint32 wrote;
+	cc_result res = Socket_Write((int)ctx, buf, len, &wrote);
+	
+	if (res) return -1;
+	return wrote;
+}
+/*
+ * The hardcoded trust anchors. These are the two DN + public key that
+ * correspond to the self-signed certificates cert-root-rsa.pem and
+ * cert-root-ec.pem.
+ *
+ * C code for hardcoded trust anchors can be generated with the "brssl"
+ * command-line tool (with the "ta" command).
+ */
+static const unsigned char TA0_DN[] = {
+	0x30, 0x1C, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13,
+	0x02, 0x43, 0x41, 0x31, 0x0D, 0x30, 0x0B, 0x06, 0x03, 0x55, 0x04, 0x03,
+	0x13, 0x04, 0x52, 0x6F, 0x6F, 0x74
+};
+static const unsigned char TA0_RSA_N[] = {
+	0xB6, 0xD9, 0x34, 0xD4, 0x50, 0xFD, 0xB3, 0xAF, 0x7A, 0x73, 0xF1, 0xCE,
+	0x38, 0xBF, 0x5D, 0x6F, 0x45, 0xE1, 0xFD, 0x4E, 0xB1, 0x98, 0xC6, 0x60,
+	0x83, 0x26, 0xD2, 0x17, 0xD1, 0xC5, 0xB7, 0x9A, 0xA3, 0xC1, 0xDE, 0x63,
+	0x39, 0x97, 0x9C, 0xF0, 0x5E, 0x5C, 0xC8, 0x1C, 0x17, 0xB9, 0x88, 0x19,
+	0x6D, 0xF0, 0xB6, 0x2E, 0x30, 0x50, 0xA1, 0x54, 0x6E, 0x93, 0xC0, 0xDB,
+	0xCF, 0x30, 0xCB, 0x9F, 0x1E, 0x27, 0x79, 0xF1, 0xC3, 0x99, 0x52, 0x35,
+	0xAA, 0x3D, 0xB6, 0xDF, 0xB0, 0xAD, 0x7C, 0xCB, 0x49, 0xCD, 0xC0, 0xED,
+	0xE7, 0x66, 0x10, 0x2A, 0xE9, 0xCE, 0x28, 0x1F, 0x21, 0x50, 0xFA, 0x77,
+	0x4C, 0x2D, 0xDA, 0xEF, 0x3C, 0x58, 0xEB, 0x4E, 0xBF, 0xCE, 0xE9, 0xFB,
+	0x1A, 0xDA, 0xA3, 0x83, 0xA3, 0xCD, 0xA3, 0xCA, 0x93, 0x80, 0xDC, 0xDA,
+	0xF3, 0x17, 0xCC, 0x7A, 0xAB, 0x33, 0x80, 0x9C, 0xB2, 0xD4, 0x7F, 0x46,
+	0x3F, 0xC5, 0x3C, 0xDC, 0x61, 0x94, 0xB7, 0x27, 0x29, 0x6E, 0x2A, 0xBC,
+	0x5B, 0x09, 0x36, 0xD4, 0xC6, 0x3B, 0x0D, 0xEB, 0xBE, 0xCE, 0xDB, 0x1D,
+	0x1C, 0xBC, 0x10, 0x6A, 0x71, 0x71, 0xB3, 0xF2, 0xCA, 0x28, 0x9A, 0x77,
+	0xF2, 0x8A, 0xEC, 0x42, 0xEF, 0xB1, 0x4A, 0x8E, 0xE2, 0xF2, 0x1A, 0x32,
+	0x2A, 0xCD, 0xC0, 0xA6, 0x46, 0x2C, 0x9A, 0xC2, 0x85, 0x37, 0x91, 0x7F,
+	0x46, 0xA1, 0x93, 0x81, 0xA1, 0x74, 0x66, 0xDF, 0xBA, 0xB3, 0x39, 0x20,
+	0x91, 0x93, 0xFA, 0x1D, 0xA1, 0xA8, 0x85, 0xE7, 0xE4, 0xF9, 0x07, 0xF6,
+	0x10, 0xF6, 0xA8, 0x27, 0x01, 0xB6, 0x7F, 0x12, 0xC3, 0x40, 0xC3, 0xC9,
+	0xE2, 0xB0, 0xAB, 0x49, 0x18, 0x3A, 0x64, 0xB6, 0x59, 0xB7, 0x95, 0xB5,
+	0x96, 0x36, 0xDF, 0x22, 0x69, 0xAA, 0x72, 0x6A, 0x54, 0x4E, 0x27, 0x29,
+	0xA3, 0x0E, 0x97, 0x15
+};
+static const unsigned char TA0_RSA_E[] = {
+	0x01, 0x00, 0x01
+};
+static const unsigned char TA1_DN[] = {
+	0x30, 0x1C, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13,
+	0x02, 0x43, 0x41, 0x31, 0x0D, 0x30, 0x0B, 0x06, 0x03, 0x55, 0x04, 0x03,
+	0x13, 0x04, 0x52, 0x6F, 0x6F, 0x74
+};
+static const unsigned char TA1_EC_Q[] = {
+	0x04, 0x71, 0x74, 0xBA, 0xAB, 0xB9, 0x30, 0x2E, 0x81, 0xD5, 0xE5, 0x57,
+	0xF9, 0xF3, 0x20, 0x68, 0x0C, 0x9C, 0xF9, 0x64, 0xDB, 0xB4, 0x20, 0x0D,
+	0x6D, 0xEA, 0x40, 0xD0, 0x4A, 0x6E, 0x42, 0xFD, 0xB6, 0x9A, 0x68, 0x25,
+	0x44, 0xF6, 0xDF, 0x7B, 0xC4, 0xFC, 0xDE, 0xDD, 0x7B, 0xBB, 0xC5, 0xDB,
+	0x7C, 0x76, 0x3F, 0x41, 0x66, 0x40, 0x6E, 0xDB, 0xA7, 0x87, 0xC2, 0xE5,
+	0xD8, 0xC5, 0xF3, 0x7F, 0x8D
+};
+static const br_x509_trust_anchor TAs[2] = {
+	{
+		{ (unsigned char *)TA0_DN, sizeof TA0_DN },
+		BR_X509_TA_CA,
+		{
+			BR_KEYTYPE_RSA,
+			{ .rsa = {
+				(unsigned char *)TA0_RSA_N, sizeof TA0_RSA_N,
+				(unsigned char *)TA0_RSA_E, sizeof TA0_RSA_E,
+			} }
+		}
+	},
+	{
+		{ (unsigned char *)TA1_DN, sizeof TA1_DN },
+		BR_X509_TA_CA,
+		{
+			BR_KEYTYPE_EC,
+			{ .ec = {
+				BR_EC_secp256r1,
+				(unsigned char *)TA1_EC_Q, sizeof TA1_EC_Q,
+			} }
+		}
+	}
+};
+#define TAs_NUM   2
+
+cc_result SSL_Init(cc_socket socket, const cc_string* host_, void** out_ctx) {
+	SSLContext* ctx;
+	char host[NATIVE_STR_LEN];
+	String_EncodeUtf8(host, host_);
+	
+	ctx = Mem_TryAlloc(1, sizeof(SSLContext));
+	if (!ctx) return ERR_OUT_OF_MEMORY;
+	*out_ctx = (void*)ctx;
+	
+	br_ssl_client_init_full(&ctx->sc, &ctx->xc, TAs, TAs_NUM);
+	if (!_verify_certs) {
+		br_x509_minimal_set_rsa(&ctx->xc,   &br_rsa_i31_pkcs1_vrfy);
+		br_x509_minimal_set_ecdsa(&ctx->xc, &br_ec_prime_i31, &br_ecdsa_i31_vrfy_asn1);
+	}
+	br_ssl_engine_set_buffer(&ctx->sc.eng, ctx->iobuf, sizeof(ctx->iobuf), 1);
+	br_ssl_client_reset(&ctx->sc, host, 0);
+	
+	br_sslio_init(&ctx->ioc, &ctx->sc.eng, 
+			sock_read,  (void*)socket, 
+			sock_write, (void*)socket);
+	
+	return 0;
+}
+
+cc_result SSL_Read(void* ctx_, cc_uint8* data, cc_uint32 count, cc_uint32* read) { 
+	SSLContext* ctx = (SSLContext*)ctx_;
+	// TODO: just br_sslio_write ??
+	int res = br_sslio_read(&ctx->ioc, data, count);
+	if (res < 0) return br_ssl_engine_last_error(&ctx->sc.eng);
+	
+	br_sslio_flush(&ctx->ioc);
+	*read = res;
+	return 0;
+}
+
+cc_result SSL_Write(void* ctx_, const cc_uint8* data, cc_uint32 count, cc_uint32* wrote) {
+	SSLContext* ctx = (SSLContext*)ctx_;
+	// TODO: just br_sslio_write ??
+	int res = br_sslio_write_all(&ctx->ioc, data, count);
+	if (res < 0) return br_ssl_engine_last_error(&ctx->sc.eng);
+	
+	br_sslio_flush(&ctx->ioc);
+	*wrote = res;
+	return 0;
+}
+
+cc_result SSL_Free(void* ctx_) {
+	SSLContext* ctx = (SSLContext*)ctx_;
+	if (ctx) br_sslio_close(&ctx->ioc);
+	
+	Mem_Free(ctx_);
+	return 0;
+}
 #elif defined CC_BUILD_3DS
 #include <3ds.h>
 #include "String.h"
--- a/third_party/bearssl/LICENSE.txt
+++ b/third_party/bearssl/LICENSE.txt
@ -0,0 +1,21 @@
+Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+
+Permission is hereby granted, free of charge, to any person obtaining 
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be 
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/third_party/bearssl/README.txt
+++ b/third_party/bearssl/README.txt
@ -0,0 +1,136 @@
+# Documentation
+
+The most up-to-date documentation is supposed to be available on the
+[BearSSL Web site](https://www.bearssl.org/).
+
+# Disclaimer
+
+BearSSL is considered beta-level software. Most planned functionalities
+are implemented; new evolution may still break both source and binary
+compatibility.
+
+Using BearSSL for production purposes would be a relatively bold but not
+utterly crazy move. BearSSL is free, open-source software, provided
+without any guarantee of fitness or reliability. That being said, it
+appears to behave properly, and only minor issues have been found (and
+fixed) so far. You are encourage to inspect its API and code for
+learning, testing and possibly contributing.
+
+The usage license is explicited in the `LICENSE.txt` file. This is the
+"MIT license". It can be summarised in the following way:
+
+ - You can use and reuse the library as you wish, and modify it, and
+   integrate it in your own code, and distribute it as is or in any
+   modified form, and so on.
+
+ - The only obligation that the license terms put upon you is that you
+   acknowledge and make it clear that if anything breaks, it is not my
+   fault, and I am not liable for anything, regardless of the type and
+   amount of collateral damage. The license terms say that the copyright
+   notice "shall be included in all copies or substantial portions of
+   the Software": this is how the disclaimer is "made explicit".
+   Basically, I have put it in every source file, so just keep it there.
+
+# Installation
+
+Right now, BearSSL is a simple library, along with a few test and debug
+command-line tools. There is no installer yet. The library _can_ be
+compiled as a shared library on some systems, but since the binary API
+is not fully stabilised, this is not a very good idea to do that right
+now.
+
+To compile the code, just type `make`. This will try to use sane
+"default" values. On a Windows system with Visual Studio, run a console
+with the environment initialised for a specific version of the C compiler,
+and type `nmake`.
+
+To override the default settings, create a custom configuration file in
+the `conf` directory, and invoke `make` (or `nmake`) with an explicit
+`CONF=` parameter. For instance, to use the provided `samd20.mk`
+configuration file (that targets cross-compilation for an Atmel board
+that features a Cortex-M0+ CPU), type:
+
+    make CONF=samd20
+
+The `conf/samd20.mk` file includes the `Unix.mk` file and then overrides
+some of the parameters, including the destination directory. Any custom
+configuration can be made the same way.
+
+Some compile-time options can be set through macros, either on the
+compiler command-line, or in the `src/config.h` file. See the comments
+in that file. Some settings are autodetected but they can still be
+explicitly overridden.
+
+When compilation is done, the library (static and DLL, when appropriate)
+and the command-line tools can be found in the designated build
+directory (by default named `build`). The public headers (to be used
+by applications linked against BearSSL) are in the `inc/` directory.
+
+To run the tests:
+
+  - `testcrypto all` runs the cryptographic tests (test vectors on all
+    implemented cryptogaphic functions). It can be slow. You can also
+    run a selection of the tests by providing their names (run
+    `testcrypto` without any parameter to see the available names).
+
+  - `testspeed all` runs a number of performance benchmarks, there again
+    on cryptographic functions. It gives a taste of how things go on the
+    current platform. As for `testcrypto`, specific named benchmarks can
+    be executed.
+
+  - `testx509` runs X.509 validation tests. The test certificates are
+    all in `test/x509/`.
+
+The `brssl` command-line tool produced in the build directory is a
+stand-alone binary. It can exercise some of the functionalities of
+BearSSL, in particular running a test SSL client or server. It is not
+meant for production purposes (e.g. the SSL client has a mode where it
+disregards the inability to validate the server's certificate, which is
+inherently unsafe, but convenient for debug).
+
+**Using the library** means writing some application code that invokes
+it, and linking with the static library. The header files are all in the
+`inc` directory; copy them wherever makes sense (e.g. in the
+`/usr/local/include` directory). The library itself (`libbearssl.a`) is
+what you link against.
+
+Alternatively, you may want to copy the source files directly into your
+own application code. This will make integrating ulterior versions of
+BearSSL more difficult. If you still want to go down that road, then
+simply copy all the `*.h` and `*.c` files from the `src` and `inc`
+directories into your application source code. In the BearSSL source
+archive, the source files are segregated into various sub-directories,
+but this is for my convenience only. There is no technical requirement
+for that, and all files can be dumped together in a simple directory.
+
+Dependencies are simple and systematic:
+
+  - Each `*.c` file includes `inner.h`
+  - `inner.h` includes `config.h` and `bearssl.h`
+  - `bearssl.h` includes the other `bearssl_*.h`
+
+# Versioning
+
+I follow this simple version numbering scheme:
+
+ - Version numbers are `x.y` or `x.y.z` where `x`, `y` and `z` are
+   decimal integers (possibly greater than 10). When the `.z` part is
+   missing, it is equivalent to `.0`.
+
+ - Backward compatibility is maintained, at both source and binary levels,
+   for each major version: this means that if some application code was
+   designed for version `x.y`, then it should compile, link and run
+   properly with any version `x.y'` for any `y'` greater than `y`.
+
+   The major version `0` is an exception. You shall not expect that any
+   version that starts with `0.` offers any kind of compatibility,
+   either source or binary, with any other `0.` version. (Of course I
+   will try to maintain some decent level of source compatibility, but I
+   make no promise in that respect. Since the API uses caller-allocated
+   context structures, I already know that binary compatibility _will_
+   be broken.)
+
+ - Sub-versions (the `y` part) are about added functionality. That is,
+   it can be expected that `1.3` will contain some extra functions when
+   compared to `1.2`. The next version level (the `z` part) is for
+   bugfixes that do not add any functionality.
--- a/third_party/bearssl/inc/bearssl.h
+++ b/third_party/bearssl/inc/bearssl.h
@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_H__
+#define BR_BEARSSL_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+/** \mainpage BearSSL API
+ *
+ * # API Layout
+ *
+ * The functions and structures defined by the BearSSL API are located
+ * in various header files:
+ *
+ * | Header file     | Elements                                          |
+ * | :-------------- | :------------------------------------------------ |
+ * | bearssl_hash.h  | Hash functions                                    |
+ * | bearssl_hmac.h  | HMAC                                              |
+ * | bearssl_kdf.h   | Key Derivation Functions                          |
+ * | bearssl_rand.h  | Pseudorandom byte generators                      |
+ * | bearssl_prf.h   | PRF implementations (for SSL/TLS)                 |
+ * | bearssl_block.h | Symmetric encryption                              |
+ * | bearssl_aead.h  | AEAD algorithms (combined encryption + MAC)       |
+ * | bearssl_rsa.h   | RSA encryption and signatures                     |
+ * | bearssl_ec.h    | Elliptic curves support (including ECDSA)         |
+ * | bearssl_ssl.h   | SSL/TLS engine interface                          |
+ * | bearssl_x509.h  | X.509 certificate decoding and validation         |
+ * | bearssl_pem.h   | Base64/PEM decoding support functions             |
+ *
+ * Applications using BearSSL are supposed to simply include `bearssl.h`
+ * as follows:
+ *
+ *     #include <bearssl.h>
+ *
+ * The `bearssl.h` file itself includes all the other header files. It is
+ * possible to include specific header files, but it has no practical
+ * advantage for the application. The API is separated into separate
+ * header files only for documentation convenience.
+ *
+ *
+ * # Conventions
+ *
+ * ## MUST and SHALL
+ *
+ * In all descriptions, the usual "MUST", "SHALL", "MAY",... terminology
+ * is used. Failure to meet requirements expressed with a "MUST" or
+ * "SHALL" implies undefined behaviour, which means that segmentation
+ * faults, buffer overflows, and other similar adverse events, may occur.
+ *
+ * In general, BearSSL is not very forgiving of programming errors, and
+ * does not include much failsafes or error reporting when the problem
+ * does not arise from external transient conditions, and can be fixed
+ * only in the application code. This is done so in order to make the
+ * total code footprint lighter.
+ *
+ *
+ * ## `NULL` values
+ *
+ * Function parameters with a pointer type shall not be `NULL` unless
+ * explicitly authorised by the documentation. As an exception, when
+ * the pointer aims at a sequence of bytes and is accompanied with
+ * a length parameter, and the length is zero (meaning that there is
+ * no byte at all to retrieve), then the pointer may be `NULL` even if
+ * not explicitly allowed.
+ *
+ *
+ * ## Memory Allocation
+ *
+ * BearSSL does not perform dynamic memory allocation. This implies that
+ * for any functionality that requires a non-transient state, the caller
+ * is responsible for allocating the relevant context structure. Such
+ * allocation can be done in any appropriate area, including static data
+ * segments, the heap, and the stack, provided that proper alignment is
+ * respected. The header files define these context structures
+ * (including size and contents), so the C compiler should handle
+ * alignment automatically.
+ *
+ * Since there is no dynamic resource allocation, there is also nothing to
+ * release. When the calling code is done with a BearSSL feature, it
+ * may simple release the context structures it allocated itself, with
+ * no "close function" to call. If the context structures were allocated
+ * on the stack (as local variables), then even that release operation is
+ * implicit.
+ *
+ *
+ * ## Structure Contents
+ *
+ * Except when explicitly indicated, structure contents are opaque: they
+ * are included in the header files so that calling code may know the
+ * structure sizes and alignment requirements, but callers SHALL NOT
+ * access individual fields directly. For fields that are supposed to
+ * be read from or written to, the API defines accessor functions (the
+ * simplest of these accessor functions are defined as `static inline`
+ * functions, and the C compiler will optimise them away).
+ *
+ *
+ * # API Usage
+ *
+ * BearSSL usage for running a SSL/TLS client or server is described
+ * on the [BearSSL Web site](https://www.bearssl.org/api1.html). The
+ * BearSSL source archive also comes with sample code.
+ */
+
+#include "bearssl_hash.h"
+#include "bearssl_hmac.h"
+#include "bearssl_kdf.h"
+#include "bearssl_rand.h"
+#include "bearssl_prf.h"
+#include "bearssl_block.h"
+#include "bearssl_aead.h"
+#include "bearssl_rsa.h"
+#include "bearssl_ec.h"
+#include "bearssl_ssl.h"
+#include "bearssl_x509.h"
+#include "bearssl_pem.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Type for a configuration option.
+ *
+ * A "configuration option" is a value that is selected when the BearSSL
+ * library itself is compiled. Most options are boolean; their value is
+ * then either 1 (option is enabled) or 0 (option is disabled). Some
+ * values have other integer values. Option names correspond to macro
+ * names. Some of the options can be explicitly set in the internal
+ * `"config.h"` file.
+ */
+typedef struct {
+	/** \brief Configurable option name. */
+	const char *name;
+	/** \brief Configurable option value. */
+	long value;
+} br_config_option;
+
+/** \brief Get configuration report.
+ *
+ * This function returns compiled configuration options, each as a
+ * 'long' value. Names match internal macro names, in particular those
+ * that can be set in the `"config.h"` inner file. For boolean options,
+ * the numerical value is 1 if enabled, 0 if disabled. For maximum
+ * key sizes, values are expressed in bits.
+ *
+ * The returned array is terminated by an entry whose `name` is `NULL`.
+ *
+ * \return  the configuration report.
+ */
+const br_config_option *br_get_config(void);
+
+/* ======================================================================= */
+
+/** \brief Version feature: support for time callback. */
+#define BR_FEATURE_X509_TIME_CALLBACK   1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/third_party/bearssl/inc/bearssl_aead.h
+++ b/third_party/bearssl/inc/bearssl_aead.h
--- a/third_party/bearssl/inc/bearssl_block.h
+++ b/third_party/bearssl/inc/bearssl_block.h
--- a/third_party/bearssl/inc/bearssl_ec.h
+++ b/third_party/bearssl/inc/bearssl_ec.h
@ -0,0 +1,967 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_EC_H__
+#define BR_BEARSSL_EC_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_rand.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_ec.h
+ *
+ * # Elliptic Curves
+ *
+ * This file documents the EC implementations provided with BearSSL, and
+ * ECDSA.
+ *
+ * ## Elliptic Curve API
+ *
+ * Only "named curves" are supported. Each EC implementation supports
+ * one or several named curves, identified by symbolic identifiers.
+ * These identifiers are small integers, that correspond to the values
+ * registered by the
+ * [IANA](http://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-8).
+ *
+ * Since all currently defined elliptic curve identifiers are in the 0..31
+ * range, it is convenient to encode support of some curves in a 32-bit
+ * word, such that bit x corresponds to curve of identifier x.
+ *
+ * An EC implementation is incarnated by a `br_ec_impl` instance, that
+ * offers the following fields:
+ *
+ *   - `supported_curves`
+ *
+ *      A 32-bit word that documents the identifiers of the curves supported
+ *      by this implementation.
+ *
+ *   - `generator()`
+ *
+ *      Callback method that returns a pointer to the conventional generator
+ *      point for that curve.
+ *
+ *   - `order()`
+ *
+ *      Callback method that returns a pointer to the subgroup order for
+ *      that curve. That value uses unsigned big-endian encoding.
+ *
+ *   - `xoff()`
+ *
+ *      Callback method that returns the offset and length of the X
+ *      coordinate in an encoded point.
+ *
+ *   - `mul()`
+ *
+ *      Multiply a curve point with an integer.
+ *
+ *   - `mulgen()`
+ *
+ *      Multiply the curve generator with an integer. This may be faster
+ *      than the generic `mul()`.
+ *
+ *   - `muladd()`
+ *
+ *      Multiply two curve points by two integers, and return the sum of
+ *      the two products.
+ *
+ * All curve points are represented in uncompressed format. The `mul()`
+ * and `muladd()` methods take care to validate that the provided points
+ * are really part of the relevant curve subgroup.
+ *
+ * For all point multiplication functions, the following holds:
+ *
+ *   - Functions validate that the provided points are valid members
+ *     of the relevant curve subgroup. An error is reported if that is
+ *     not the case.
+ *
+ *   - Processing is constant-time, even if the point operands are not
+ *     valid. This holds for both the source and resulting points, and
+ *     the multipliers (integers). Only the byte length of the provided
+ *     multiplier arrays (not their actual value length in bits) may
+ *     leak through timing-based side channels.
+ *
+ *   - The multipliers (integers) MUST be lower than the subgroup order.
+ *     If this property is not met, then the result is indeterminate,
+ *     but an error value is not necessarily returned.
+ * 
+ *
+ * ## ECDSA
+ *
+ * ECDSA signatures have two standard formats, called "raw" and "asn1".
+ * Internally, such a signature is a pair of modular integers `(r,s)`.
+ * The "raw" format is the concatenation of the unsigned big-endian
+ * encodings of these two integers, possibly left-padded with zeros so
+ * that they have the same encoded length. The "asn1" format is the
+ * DER encoding of an ASN.1 structure that contains the two integer
+ * values:
+ *
+ *     ECDSASignature ::= SEQUENCE {
+ *         r   INTEGER,
+ *         s   INTEGER
+ *     }
+ *
+ * In general, in all of X.509 and SSL/TLS, the "asn1" format is used.
+ * BearSSL offers ECDSA implementations for both formats; conversion
+ * functions between the two formats are also provided. Conversion of a
+ * "raw" format signature into "asn1" may enlarge a signature by no more
+ * than 9 bytes for all supported curves; conversely, conversion of an
+ * "asn1" signature to "raw" may expand the signature but the "raw"
+ * length will never be more than twice the length of the "asn1" length
+ * (and usually it will be shorter).
+ *
+ * Note that for a given signature, the "raw" format is not fully
+ * deterministic, in that it does not enforce a minimal common length.
+ */
+
+/*
+ * Standard curve ID. These ID are equal to the assigned numerical
+ * identifiers assigned to these curves for TLS:
+ *    http://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-8
+ */
+
+/** \brief Identifier for named curve sect163k1. */
+#define BR_EC_sect163k1           1
+
+/** \brief Identifier for named curve sect163r1. */
+#define BR_EC_sect163r1           2
+
+/** \brief Identifier for named curve sect163r2. */
+#define BR_EC_sect163r2           3
+
+/** \brief Identifier for named curve sect193r1. */
+#define BR_EC_sect193r1           4
+
+/** \brief Identifier for named curve sect193r2. */
+#define BR_EC_sect193r2           5
+
+/** \brief Identifier for named curve sect233k1. */
+#define BR_EC_sect233k1           6
+
+/** \brief Identifier for named curve sect233r1. */
+#define BR_EC_sect233r1           7
+
+/** \brief Identifier for named curve sect239k1. */
+#define BR_EC_sect239k1           8
+
+/** \brief Identifier for named curve sect283k1. */
+#define BR_EC_sect283k1           9
+
+/** \brief Identifier for named curve sect283r1. */
+#define BR_EC_sect283r1          10
+
+/** \brief Identifier for named curve sect409k1. */
+#define BR_EC_sect409k1          11
+
+/** \brief Identifier for named curve sect409r1. */
+#define BR_EC_sect409r1          12
+
+/** \brief Identifier for named curve sect571k1. */
+#define BR_EC_sect571k1          13
+
+/** \brief Identifier for named curve sect571r1. */
+#define BR_EC_sect571r1          14
+
+/** \brief Identifier for named curve secp160k1. */
+#define BR_EC_secp160k1          15
+
+/** \brief Identifier for named curve secp160r1. */
+#define BR_EC_secp160r1          16
+
+/** \brief Identifier for named curve secp160r2. */
+#define BR_EC_secp160r2          17
+
+/** \brief Identifier for named curve secp192k1. */
+#define BR_EC_secp192k1          18
+
+/** \brief Identifier for named curve secp192r1. */
+#define BR_EC_secp192r1          19
+
+/** \brief Identifier for named curve secp224k1. */
+#define BR_EC_secp224k1          20
+
+/** \brief Identifier for named curve secp224r1. */
+#define BR_EC_secp224r1          21
+
+/** \brief Identifier for named curve secp256k1. */
+#define BR_EC_secp256k1          22
+
+/** \brief Identifier for named curve secp256r1. */
+#define BR_EC_secp256r1          23
+
+/** \brief Identifier for named curve secp384r1. */
+#define BR_EC_secp384r1          24
+
+/** \brief Identifier for named curve secp521r1. */
+#define BR_EC_secp521r1          25
+
+/** \brief Identifier for named curve brainpoolP256r1. */
+#define BR_EC_brainpoolP256r1    26
+
+/** \brief Identifier for named curve brainpoolP384r1. */
+#define BR_EC_brainpoolP384r1    27
+
+/** \brief Identifier for named curve brainpoolP512r1. */
+#define BR_EC_brainpoolP512r1    28
+
+/** \brief Identifier for named curve Curve25519. */
+#define BR_EC_curve25519         29
+
+/** \brief Identifier for named curve Curve448. */
+#define BR_EC_curve448           30
+
+/**
+ * \brief Structure for an EC public key.
+ */
+typedef struct {
+	/** \brief Identifier for the curve used by this key. */
+	int curve;
+	/** \brief Public curve point (uncompressed format). */
+	unsigned char *q;
+	/** \brief Length of public curve point (in bytes). */
+	size_t qlen;
+} br_ec_public_key;
+
+/**
+ * \brief Structure for an EC private key.
+ *
+ * The private key is an integer modulo the curve subgroup order. The
+ * encoding below tolerates extra leading zeros. In general, it is
+ * recommended that the private key has the same length as the curve
+ * subgroup order.
+ */
+typedef struct {
+	/** \brief Identifier for the curve used by this key. */
+	int curve;
+	/** \brief Private key (integer, unsigned big-endian encoding). */
+	unsigned char *x;
+	/** \brief Private key length (in bytes). */
+	size_t xlen;
+} br_ec_private_key;
+
+/**
+ * \brief Type for an EC implementation.
+ */
+typedef struct {
+	/**
+	 * \brief Supported curves.
+	 *
+	 * This word is a bitfield: bit `x` is set if the curve of ID `x`
+	 * is supported. E.g. an implementation supporting both NIST P-256
+	 * (secp256r1, ID 23) and NIST P-384 (secp384r1, ID 24) will have
+	 * value `0x01800000` in this field.
+	 */
+	uint32_t supported_curves;
+
+	/**
+	 * \brief Get the conventional generator.
+	 *
+	 * This function returns the conventional generator (encoded
+	 * curve point) for the specified curve. This function MUST NOT
+	 * be called if the curve is not supported.
+	 *
+	 * \param curve   curve identifier.
+	 * \param len     receiver for the encoded generator length (in bytes).
+	 * \return  the encoded generator.
+	 */
+	const unsigned char *(*generator)(int curve, size_t *len);
+
+	/**
+	 * \brief Get the subgroup order.
+	 *
+	 * This function returns the order of the subgroup generated by
+	 * the conventional generator, for the specified curve. Unsigned
+	 * big-endian encoding is used. This function MUST NOT be called
+	 * if the curve is not supported.
+	 *
+	 * \param curve   curve identifier.
+	 * \param len     receiver for the encoded order length (in bytes).
+	 * \return  the encoded order.
+	 */
+	const unsigned char *(*order)(int curve, size_t *len);
+
+	/**
+	 * \brief Get the offset and length for the X coordinate.
+	 *
+	 * This function returns the offset and length (in bytes) of
+	 * the X coordinate in an encoded non-zero point.
+	 *
+	 * \param curve   curve identifier.
+	 * \param len     receiver for the X coordinate length (in bytes).
+	 * \return  the offset for the X coordinate (in bytes).
+	 */
+	size_t (*xoff)(int curve, size_t *len);
+
+	/**
+	 * \brief Multiply a curve point by an integer.
+	 *
+	 * The source point is provided in array `G` (of size `Glen` bytes);
+	 * the multiplication result is written over it. The multiplier
+	 * `x` (of size `xlen` bytes) uses unsigned big-endian encoding.
+	 *
+	 * Rules:
+	 *
+	 *   - The specified curve MUST be supported.
+	 *
+	 *   - The source point must be a valid point on the relevant curve
+	 *     subgroup (and not the "point at infinity" either). If this is
+	 *     not the case, then this function returns an error (0).
+	 *
+	 *   - The multiplier integer MUST be non-zero and less than the
+	 *     curve subgroup order. If this property does not hold, then
+	 *     the result is indeterminate and an error code is not
+	 *     guaranteed.
+	 *
+	 * Returned value is 1 on success, 0 on error. On error, the
+	 * contents of `G` are indeterminate.
+	 *
+	 * \param G       point to multiply.
+	 * \param Glen    length of the encoded point (in bytes).
+	 * \param x       multiplier (unsigned big-endian).
+	 * \param xlen    multiplier length (in bytes).
+	 * \param curve   curve identifier.
+	 * \return  1 on success, 0 on error.
+	 */
+	uint32_t (*mul)(unsigned char *G, size_t Glen,
+		const unsigned char *x, size_t xlen, int curve);
+
+	/**
+	 * \brief Multiply the generator by an integer.
+	 *
+	 * The multiplier MUST be non-zero and less than the curve
+	 * subgroup order. Results are indeterminate if this property
+	 * does not hold.
+	 *
+	 * \param R       output buffer for the point.
+	 * \param x       multiplier (unsigned big-endian).
+	 * \param xlen    multiplier length (in bytes).
+	 * \param curve   curve identifier.
+	 * \return  encoded result point length (in bytes).
+	 */
+	size_t (*mulgen)(unsigned char *R,
+		const unsigned char *x, size_t xlen, int curve);
+
+	/**
+	 * \brief Multiply two points by two integers and add the
+	 * results.
+	 *
+	 * The point `x*A + y*B` is computed and written back in the `A`
+	 * array.
+	 *
+	 * Rules:
+	 *
+	 *   - The specified curve MUST be supported.
+	 *
+	 *   - The source points (`A` and `B`)  must be valid points on
+	 *     the relevant curve subgroup (and not the "point at
+	 *     infinity" either). If this is not the case, then this
+	 *     function returns an error (0).
+	 *
+	 *   - If the `B` pointer is `NULL`, then the conventional
+	 *     subgroup generator is used. With some implementations,
+	 *     this may be faster than providing a pointer to the
+	 *     generator.
+	 *
+	 *   - The multiplier integers (`x` and `y`) MUST be non-zero
+	 *     and less than the curve subgroup order. If either integer
+	 *     is zero, then an error is reported, but if one of them is
+	 *     not lower than the subgroup order, then the result is
+	 *     indeterminate and an error code is not guaranteed.
+	 *
+	 *   - If the final result is the point at infinity, then an
+	 *     error is returned.
+	 *
+	 * Returned value is 1 on success, 0 on error. On error, the
+	 * contents of `A` are indeterminate.
+	 *
+	 * \param A       first point to multiply.
+	 * \param B       second point to multiply (`NULL` for the generator).
+	 * \param len     common length of the encoded points (in bytes).
+	 * \param x       multiplier for `A` (unsigned big-endian).
+	 * \param xlen    length of multiplier for `A` (in bytes).
+	 * \param y       multiplier for `A` (unsigned big-endian).
+	 * \param ylen    length of multiplier for `A` (in bytes).
+	 * \param curve   curve identifier.
+	 * \return  1 on success, 0 on error.
+	 */
+	uint32_t (*muladd)(unsigned char *A, const unsigned char *B, size_t len,
+		const unsigned char *x, size_t xlen,
+		const unsigned char *y, size_t ylen, int curve);
+} br_ec_impl;
+
+/**
+ * \brief EC implementation "i31".
+ *
+ * This implementation internally uses generic code for modular integers,
+ * with a representation as sequences of 31-bit words. It supports secp256r1,
+ * secp384r1 and secp521r1 (aka NIST curves P-256, P-384 and P-521).
+ */
+extern const br_ec_impl br_ec_prime_i31;
+
+/**
+ * \brief EC implementation "i15".
+ *
+ * This implementation internally uses generic code for modular integers,
+ * with a representation as sequences of 15-bit words. It supports secp256r1,
+ * secp384r1 and secp521r1 (aka NIST curves P-256, P-384 and P-521).
+ */
+extern const br_ec_impl br_ec_prime_i15;
+
+/**
+ * \brief EC implementation "m15" for P-256.
+ *
+ * This implementation uses specialised code for curve secp256r1 (also
+ * known as NIST P-256), with optional Karatsuba decomposition, and fast
+ * modular reduction thanks to the field modulus special format. Only
+ * 32-bit multiplications are used (with 32-bit results, not 64-bit).
+ */
+extern const br_ec_impl br_ec_p256_m15;
+
+/**
+ * \brief EC implementation "m31" for P-256.
+ *
+ * This implementation uses specialised code for curve secp256r1 (also
+ * known as NIST P-256), relying on multiplications of 31-bit values
+ * (MUL31).
+ */
+extern const br_ec_impl br_ec_p256_m31;
+
+/**
+ * \brief EC implementation "m62" (specialised code) for P-256.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 64 bits, with a 128-bit result. This implementation is
+ * defined only on platforms that offer the 64x64->128 multiplication
+ * support; use `br_ec_p256_m62_get()` to dynamically obtain a pointer
+ * to that implementation.
+ */
+extern const br_ec_impl br_ec_p256_m62;
+
+/**
+ * \brief Get the "m62" implementation of P-256, if available.
+ *
+ * \return  the implementation, or 0.
+ */
+const br_ec_impl *br_ec_p256_m62_get(void);
+
+/**
+ * \brief EC implementation "m64" (specialised code) for P-256.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 64 bits, with a 128-bit result. This implementation is
+ * defined only on platforms that offer the 64x64->128 multiplication
+ * support; use `br_ec_p256_m64_get()` to dynamically obtain a pointer
+ * to that implementation.
+ */
+extern const br_ec_impl br_ec_p256_m64;
+
+/**
+ * \brief Get the "m64" implementation of P-256, if available.
+ *
+ * \return  the implementation, or 0.
+ */
+const br_ec_impl *br_ec_p256_m64_get(void);
+
+/**
+ * \brief EC implementation "i15" (generic code) for Curve25519.
+ *
+ * This implementation uses the generic code for modular integers (with
+ * 15-bit words) to support Curve25519. Due to the specificities of the
+ * curve definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_i15;
+
+/**
+ * \brief EC implementation "i31" (generic code) for Curve25519.
+ *
+ * This implementation uses the generic code for modular integers (with
+ * 31-bit words) to support Curve25519. Due to the specificities of the
+ * curve definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_i31;
+
+/**
+ * \brief EC implementation "m15" (specialised code) for Curve25519.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 15 bits. Due to the specificities of the curve
+ * definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_m15;
+
+/**
+ * \brief EC implementation "m31" (specialised code) for Curve25519.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 31 bits. Due to the specificities of the curve
+ * definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_m31;
+
+/**
+ * \brief EC implementation "m62" (specialised code) for Curve25519.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 62 bits, with a 124-bit result. This implementation is
+ * defined only on platforms that offer the 64x64->128 multiplication
+ * support; use `br_ec_c25519_m62_get()` to dynamically obtain a pointer
+ * to that implementation. Due to the specificities of the curve
+ * definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_m62;
+
+/**
+ * \brief Get the "m62" implementation of Curve25519, if available.
+ *
+ * \return  the implementation, or 0.
+ */
+const br_ec_impl *br_ec_c25519_m62_get(void);
+
+/**
+ * \brief EC implementation "m64" (specialised code) for Curve25519.
+ *
+ * This implementation uses custom code relying on multiplication of
+ * integers up to 64 bits, with a 128-bit result. This implementation is
+ * defined only on platforms that offer the 64x64->128 multiplication
+ * support; use `br_ec_c25519_m64_get()` to dynamically obtain a pointer
+ * to that implementation. Due to the specificities of the curve
+ * definition, the following applies:
+ *
+ *   - `muladd()` is not implemented (the function returns 0 systematically).
+ *   - `order()` returns 2^255-1, since the point multiplication algorithm
+ *     accepts any 32-bit integer as input (it clears the top bit and low
+ *     three bits systematically).
+ */
+extern const br_ec_impl br_ec_c25519_m64;
+
+/**
+ * \brief Get the "m64" implementation of Curve25519, if available.
+ *
+ * \return  the implementation, or 0.
+ */
+const br_ec_impl *br_ec_c25519_m64_get(void);
+
+/**
+ * \brief Aggregate EC implementation "m15".
+ *
+ * This implementation is a wrapper for:
+ *
+ *   - `br_ec_c25519_m15` for Curve25519
+ *   - `br_ec_p256_m15` for NIST P-256
+ *   - `br_ec_prime_i15` for other curves (NIST P-384 and NIST-P512)
+ */
+extern const br_ec_impl br_ec_all_m15;
+
+/**
+ * \brief Aggregate EC implementation "m31".
+ *
+ * This implementation is a wrapper for:
+ *
+ *   - `br_ec_c25519_m31` for Curve25519
+ *   - `br_ec_p256_m31` for NIST P-256
+ *   - `br_ec_prime_i31` for other curves (NIST P-384 and NIST-P512)
+ */
+extern const br_ec_impl br_ec_all_m31;
+
+/**
+ * \brief Get the "default" EC implementation for the current system.
+ *
+ * This returns a pointer to the preferred implementation on the
+ * current system.
+ *
+ * \return  the default EC implementation.
+ */
+const br_ec_impl *br_ec_get_default(void);
+
+/**
+ * \brief Convert a signature from "raw" to "asn1".
+ *
+ * Conversion is done "in place" and the new length is returned.
+ * Conversion may enlarge the signature, but by no more than 9 bytes at
+ * most. On error, 0 is returned (error conditions include an odd raw
+ * signature length, or an oversized integer).
+ *
+ * \param sig       signature to convert.
+ * \param sig_len   signature length (in bytes).
+ * \return  the new signature length, or 0 on error.
+ */
+size_t br_ecdsa_raw_to_asn1(void *sig, size_t sig_len);
+
+/**
+ * \brief Convert a signature from "asn1" to "raw".
+ *
+ * Conversion is done "in place" and the new length is returned.
+ * Conversion may enlarge the signature, but the new signature length
+ * will be less than twice the source length at most. On error, 0 is
+ * returned (error conditions include an invalid ASN.1 structure or an
+ * oversized integer).
+ *
+ * \param sig       signature to convert.
+ * \param sig_len   signature length (in bytes).
+ * \return  the new signature length, or 0 on error.
+ */
+size_t br_ecdsa_asn1_to_raw(void *sig, size_t sig_len);
+
+/**
+ * \brief Type for an ECDSA signer function.
+ *
+ * A pointer to the EC implementation is provided. The hash value is
+ * assumed to have the length inferred from the designated hash function
+ * class.
+ *
+ * Signature is written in the buffer pointed to by `sig`, and the length
+ * (in bytes) is returned. On error, nothing is written in the buffer,
+ * and 0 is returned. This function returns 0 if the specified curve is
+ * not supported by the provided EC implementation.
+ *
+ * The signature format is either "raw" or "asn1", depending on the
+ * implementation; maximum length is predictable from the implemented
+ * curve:
+ *
+ * | curve      | raw | asn1 |
+ * | :--------- | --: | ---: |
+ * | NIST P-256 |  64 |   72 |
+ * | NIST P-384 |  96 |  104 |
+ * | NIST P-521 | 132 |  139 |
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+typedef size_t (*br_ecdsa_sign)(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief Type for an ECDSA signature verification function.
+ *
+ * A pointer to the EC implementation is provided. The hashed value,
+ * computed over the purportedly signed data, is also provided with
+ * its length.
+ *
+ * The signature format is either "raw" or "asn1", depending on the
+ * implementation.
+ *
+ * Returned value is 1 on success (valid signature), 0 on error. This
+ * function returns 0 if the specified curve is not supported by the
+ * provided EC implementation.
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+typedef uint32_t (*br_ecdsa_vrfy)(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature generator, "i31" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i31_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature generator, "i31" implementation, "raw" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i31_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature verifier, "i31" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i31_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature verifier, "i31" implementation, "raw" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i31_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature generator, "i15" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i15_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature generator, "i15" implementation, "raw" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i15_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature verifier, "i15" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature verifier, "i15" implementation, "raw" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief Get "default" ECDSA implementation (signer, asn1 format).
+ *
+ * This returns the preferred implementation of ECDSA signature generation
+ * ("asn1" output format) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_ecdsa_sign br_ecdsa_sign_asn1_get_default(void);
+
+/**
+ * \brief Get "default" ECDSA implementation (signer, raw format).
+ *
+ * This returns the preferred implementation of ECDSA signature generation
+ * ("raw" output format) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_ecdsa_sign br_ecdsa_sign_raw_get_default(void);
+
+/**
+ * \brief Get "default" ECDSA implementation (verifier, asn1 format).
+ *
+ * This returns the preferred implementation of ECDSA signature verification
+ * ("asn1" output format) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_ecdsa_vrfy br_ecdsa_vrfy_asn1_get_default(void);
+
+/**
+ * \brief Get "default" ECDSA implementation (verifier, raw format).
+ *
+ * This returns the preferred implementation of ECDSA signature verification
+ * ("raw" output format) on the current system.
+ *
+ * \return  the default implementation.
+ */
+br_ecdsa_vrfy br_ecdsa_vrfy_raw_get_default(void);
+
+/**
+ * \brief Maximum size for EC private key element buffer.
+ *
+ * This is the largest number of bytes that `br_ec_keygen()` may need or
+ * ever return.
+ */
+#define BR_EC_KBUF_PRIV_MAX_SIZE   72
+
+/**
+ * \brief Maximum size for EC public key element buffer.
+ *
+ * This is the largest number of bytes that `br_ec_compute_public()` may
+ * need or ever return.
+ */
+#define BR_EC_KBUF_PUB_MAX_SIZE    145
+
+/**
+ * \brief Generate a new EC private key.
+ *
+ * If the specified `curve` is not supported by the elliptic curve
+ * implementation (`impl`), then this function returns zero.
+ *
+ * The `sk` structure fields are set to the new private key data. In
+ * particular, `sk.x` is made to point to the provided key buffer (`kbuf`),
+ * in which the actual private key data is written. That buffer is assumed
+ * to be large enough. The `BR_EC_KBUF_PRIV_MAX_SIZE` defines the maximum
+ * size for all supported curves.
+ *
+ * The number of bytes used in `kbuf` is returned. If `kbuf` is `NULL`, then
+ * the private key is not actually generated, and `sk` may also be `NULL`;
+ * the minimum length for `kbuf` is still computed and returned.
+ *
+ * If `sk` is `NULL` but `kbuf` is not `NULL`, then the private key is
+ * still generated and stored in `kbuf`.
+ *
+ * \param rng_ctx   source PRNG context (already initialized).
+ * \param impl      the elliptic curve implementation.
+ * \param sk        the private key structure to fill, or `NULL`.
+ * \param kbuf      the key element buffer, or `NULL`.
+ * \param curve     the curve identifier.
+ * \return  the key data length (in bytes), or zero.
+ */
+size_t br_ec_keygen(const br_prng_class **rng_ctx,
+	const br_ec_impl *impl, br_ec_private_key *sk,
+	void *kbuf, int curve);
+
+/**
+ * \brief Compute EC public key from EC private key.
+ *
+ * This function uses the provided elliptic curve implementation (`impl`)
+ * to compute the public key corresponding to the private key held in `sk`.
+ * The public key point is written into `kbuf`, which is then linked from
+ * the `*pk` structure. The size of the public key point, i.e. the number
+ * of bytes used in `kbuf`, is returned.
+ *
+ * If `kbuf` is `NULL`, then the public key point is NOT computed, and
+ * the public key structure `*pk` is unmodified (`pk` may be `NULL` in
+ * that case). The size of the public key point is still returned.
+ *
+ * If `pk` is `NULL` but `kbuf` is not `NULL`, then the public key
+ * point is computed and stored in `kbuf`, and its size is returned.
+ *
+ * If the curve used by the private key is not supported by the curve
+ * implementation, then this function returns zero.
+ *
+ * The private key MUST be valid. An off-range private key value is not
+ * necessarily detected, and leads to unpredictable results.
+ *
+ * \param impl   the elliptic curve implementation.
+ * \param pk     the public key structure to fill (or `NULL`).
+ * \param kbuf   the public key point buffer (or `NULL`).
+ * \param sk     the source private key.
+ * \return  the public key point length (in bytes), or zero.
+ */
+size_t br_ec_compute_pub(const br_ec_impl *impl, br_ec_public_key *pk,
+	void *kbuf, const br_ec_private_key *sk);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/third_party/bearssl/inc/bearssl_hash.h
+++ b/third_party/bearssl/inc/bearssl_hash.h
--- a/third_party/bearssl/inc/bearssl_hmac.h
+++ b/third_party/bearssl/inc/bearssl_hmac.h
@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_HMAC_H__
+#define BR_BEARSSL_HMAC_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_hash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_hmac.h
+ *
+ * # HMAC
+ *
+ * HMAC is initialized with a key and an underlying hash function; it
+ * then fills a "key context". That context contains the processed
+ * key.
+ *
+ * With the key context, a HMAC context can be initialized to process
+ * the input bytes and obtain the MAC output. The key context is not
+ * modified during that process, and can be reused.
+ *
+ * IMPORTANT: HMAC shall be used only with functions that have the
+ * following properties:
+ *
+ *   - hash output size does not exceed 64 bytes;
+ *   - hash internal state size does not exceed 64 bytes;
+ *   - internal block length is a power of 2 between 16 and 256 bytes.
+ */
+
+/**
+ * \brief HMAC key context.
+ *
+ * The HMAC key context is initialised with a hash function implementation
+ * and a secret key. Contents are opaque (callers should not access them
+ * directly). The caller is responsible for allocating the context where
+ * appropriate. Context initialisation and usage incurs no dynamic
+ * allocation, so there is no release function.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	const br_hash_class *dig_vtable;
+	unsigned char ksi[64], kso[64];
+#endif
+} br_hmac_key_context;
+
+/**
+ * \brief HMAC key context initialisation.
+ *
+ * Initialise the key context with the provided key, using the hash function
+ * identified by `digest_vtable`. This supports arbitrary key lengths.
+ *
+ * \param kc              HMAC key context to initialise.
+ * \param digest_vtable   pointer to the hash function implementation vtable.
+ * \param key             pointer to the HMAC secret key.
+ * \param key_len         HMAC secret key length (in bytes).
+ */
+void br_hmac_key_init(br_hmac_key_context *kc,
+	const br_hash_class *digest_vtable, const void *key, size_t key_len);
+
+/*
+ * \brief Get the underlying hash function.
+ *
+ * This function returns a pointer to the implementation vtable of the
+ * hash function used for this HMAC key context.
+ *
+ * \param kc   HMAC key context.
+ * \return  the hash function implementation.
+ */
+static inline const br_hash_class *br_hmac_key_get_digest(
+	const br_hmac_key_context *kc)
+{
+	return kc->dig_vtable;
+}
+
+/**
+ * \brief HMAC computation context.
+ *
+ * The HMAC computation context maintains the state for a single HMAC
+ * computation. It is modified as input bytes are injected. The context
+ * is caller-allocated and has no release function since it does not
+ * dynamically allocate external resources. Its contents are opaque.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	br_hash_compat_context dig;
+	unsigned char kso[64];
+	size_t out_len;
+#endif
+} br_hmac_context;
+
+/**
+ * \brief HMAC computation initialisation.
+ *
+ * Initialise a HMAC context with a key context. The key context is
+ * unmodified. Relevant data from the key context is immediately copied;
+ * the key context can thus be independently reused, modified or released
+ * without impacting this HMAC computation.
+ *
+ * An explicit output length can be specified; the actual output length
+ * will be the minimum of that value and the natural HMAC output length.
+ * If `out_len` is 0, then the natural HMAC output length is selected. The
+ * "natural output length" is the output length of the underlying hash
+ * function.
+ *
+ * \param ctx       HMAC context to initialise.
+ * \param kc        HMAC key context (already initialised with the key).
+ * \param out_len   HMAC output length (0 to select "natural length").
+ */
+void br_hmac_init(br_hmac_context *ctx,
+	const br_hmac_key_context *kc, size_t out_len);
+
+/**
+ * \brief Get the HMAC output size.
+ *
+ * The HMAC output size is the number of bytes that will actually be
+ * produced with `br_hmac_out()` with the provided context. This function
+ * MUST NOT be called on a non-initialised HMAC computation context.
+ * The returned value is the minimum of the HMAC natural length (output
+ * size of the underlying hash function) and the `out_len` parameter which
+ * was used with the last `br_hmac_init()` call on that context (if the
+ * initialisation `out_len` parameter was 0, then this function will
+ * return the HMAC natural length).
+ *
+ * \param ctx   the (already initialised) HMAC computation context.
+ * \return  the HMAC actual output size.
+ */
+static inline size_t
+br_hmac_size(br_hmac_context *ctx)
+{
+	return ctx->out_len;
+}
+
+/*
+ * \brief Get the underlying hash function.
+ *
+ * This function returns a pointer to the implementation vtable of the
+ * hash function used for this HMAC context.
+ *
+ * \param hc   HMAC context.
+ * \return  the hash function implementation.
+ */
+static inline const br_hash_class *br_hmac_get_digest(
+	const br_hmac_context *hc)
+{
+	return hc->dig.vtable;
+}
+
+/**
+ * \brief Inject some bytes in HMAC.
+ *
+ * The provided `len` bytes are injected as extra input in the HMAC
+ * computation incarnated by the `ctx` HMAC context. It is acceptable
+ * that `len` is zero, in which case `data` is ignored (and may be
+ * `NULL`) and this function does nothing.
+ */
+void br_hmac_update(br_hmac_context *ctx, const void *data, size_t len);
+
+/**
+ * \brief Compute the HMAC output.
+ *
+ * The destination buffer MUST be large enough to accommodate the result;
+ * its length is at most the "natural length" of HMAC (i.e. the output
+ * length of the underlying hash function). The context is NOT modified;
+ * further bytes may be processed. Thus, "partial HMAC" values can be
+ * efficiently obtained.
+ *
+ * Returned value is the output length (in bytes).
+ *
+ * \param ctx   HMAC computation context.
+ * \param out   destination buffer for the HMAC output.
+ * \return  the produced value length (in bytes).
+ */
+size_t br_hmac_out(const br_hmac_context *ctx, void *out);
+
+/**
+ * \brief Constant-time HMAC computation.
+ *
+ * This function compute the HMAC output in constant time. Some extra
+ * input bytes are processed, then the output is computed. The extra
+ * input consists in the `len` bytes pointed to by `data`. The `len`
+ * parameter must lie between `min_len` and `max_len` (inclusive);
+ * `max_len` bytes are actually read from `data`. Computing time (and
+ * memory access pattern) will not depend upon the data byte contents or
+ * the value of `len`.
+ *
+ * The output is written in the `out` buffer, that MUST be large enough
+ * to receive it.
+ *
+ * The difference `max_len - min_len` MUST be less than 2<sup>30</sup>
+ * (i.e. about one gigabyte).
+ *
+ * This function computes the output properly only if the underlying
+ * hash function uses MD padding (i.e. MD5, SHA-1, SHA-224, SHA-256,
+ * SHA-384 or SHA-512).
+ *
+ * The provided context is NOT modified.
+ *
+ * \param ctx       the (already initialised) HMAC computation context.
+ * \param data      the extra input bytes.
+ * \param len       the extra input length (in bytes).
+ * \param min_len   minimum extra input length (in bytes).
+ * \param max_len   maximum extra input length (in bytes).
+ * \param out       destination buffer for the HMAC output.
+ * \return  the produced value length (in bytes).
+ */
+size_t br_hmac_outCT(const br_hmac_context *ctx,
+	const void *data, size_t len, size_t min_len, size_t max_len,
+	void *out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/third_party/bearssl/inc/bearssl_kdf.h
+++ b/third_party/bearssl/inc/bearssl_kdf.h
@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_KDF_H__
+#define BR_BEARSSL_KDF_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_hash.h"
+#include "bearssl_hmac.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_kdf.h
+ *
+ * # Key Derivation Functions
+ *
+ * KDF are functions that takes a variable length input, and provide a
+ * variable length output, meant to be used to derive subkeys from a
+ * master key.
+ *
+ * ## HKDF
+ *
+ * HKDF is a KDF defined by [RFC 5869](https://tools.ietf.org/html/rfc5869).
+ * It is based on HMAC, itself using an underlying hash function. Any
+ * hash function can be used, as long as it is compatible with the rules
+ * for the HMAC implementation (i.e. output size is 64 bytes or less, hash
+ * internal state size is 64 bytes or less, and the internal block length is
+ * a power of 2 between 16 and 256 bytes). HKDF has two phases:
+ *
+ *  - HKDF-Extract: the input data in ingested, along with a "salt" value.
+ *
+ *  - HKDF-Expand: the output is produced, from the result of processing
+ *    the input and salt, and using an extra non-secret parameter called
+ *    "info".
+ *
+ * The "salt" and "info" strings are non-secret and can be empty. Their role
+ * is normally to bind the input and output, respectively, to conventional
+ * identifiers that qualifu them within the used protocol or application.
+ *
+ * The implementation defined in this file uses the following functions:
+ *
+ *  - `br_hkdf_init()`: initialize an HKDF context, with a hash function,
+ *    and the salt. This starts the HKDF-Extract process.
+ *
+ *  - `br_hkdf_inject()`: inject more input bytes. This function may be
+ *    called repeatedly if the input data is provided by chunks.
+ *
+ *  - `br_hkdf_flip()`: end the HKDF-Extract process, and start the
+ *    HKDF-Expand process.
+ *
+ *  - `br_hkdf_produce()`: get the next bytes of output. This function
+ *    may be called several times to obtain the full output by chunks.
+ *    For correct HKDF processing, the same "info" string must be
+ *    provided for each call.
+ *
+ * Note that the HKDF total output size (the number of bytes that
+ * HKDF-Expand is willing to produce) is limited: if the hash output size
+ * is _n_ bytes, then the maximum output size is _255*n_.
+ *
+ * ## SHAKE
+ *
+ * SHAKE is defined in
+ * [FIPS 202](https://csrc.nist.gov/publications/detail/fips/202/final)
+ * under two versions: SHAKE128 and SHAKE256, offering an alleged
+ * "security level" of 128 and 256 bits, respectively (SHAKE128 is
+ * about 20 to 25% faster than SHAKE256). SHAKE internally relies on
+ * the Keccak family of sponge functions, not on any externally provided
+ * hash function. Contrary to HKDF, SHAKE does not have a concept of
+ * either a "salt" or an "info" string. The API consists in four
+ * functions:
+ *
+ *  - `br_shake_init()`: initialize a SHAKE context for a given
+ *    security level.
+ *
+ *  - `br_shake_inject()`: inject more input bytes. This function may be
+ *    called repeatedly if the input data is provided by chunks.
+ *
+ *  - `br_shake_flip()`: end the data injection process, and start the
+ *    data production process.
+ *
+ *  - `br_shake_produce()`: get the next bytes of output. This function
+ *    may be called several times to obtain the full output by chunks.
+ */
+
+/**
+ * \brief HKDF context.
+ *
+ * The HKDF context is initialized with a hash function implementation
+ * and a salt value. Contents are opaque (callers should not access them
+ * directly). The caller is responsible for allocating the context where
+ * appropriate. Context initialisation and usage incurs no dynamic
+ * allocation, so there is no release function.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	union {
+		br_hmac_context hmac_ctx;
+		br_hmac_key_context prk_ctx;
+	} u;
+	unsigned char buf[64];
+	size_t ptr;
+	size_t dig_len;
+	unsigned chunk_num;
+#endif
+} br_hkdf_context;
+
+/**
+ * \brief HKDF context initialization.
+ *
+ * The underlying hash function and salt value are provided. Arbitrary
+ * salt lengths can be used.
+ *
+ * HKDF makes a difference between a salt of length zero, and an
+ * absent salt (the latter being equivalent to a salt consisting of
+ * bytes of value zero, of the same length as the hash function output).
+ * If `salt_len` is zero, then this function assumes that the salt is
+ * present but of length zero. To specify an _absent_ salt, use
+ * `BR_HKDF_NO_SALT` as `salt` parameter (`salt_len` is then ignored).
+ *
+ * \param hc              HKDF context to initialise.
+ * \param digest_vtable   pointer to the hash function implementation vtable.
+ * \param salt            HKDF-Extract salt.
+ * \param salt_len        HKDF-Extract salt length (in bytes).
+ */
+void br_hkdf_init(br_hkdf_context *hc, const br_hash_class *digest_vtable,
+	const void *salt, size_t salt_len);
+
+/**
+ * \brief The special "absent salt" value for HKDF.
+ */
+#define BR_HKDF_NO_SALT   (&br_hkdf_no_salt)
+
+#ifndef BR_DOXYGEN_IGNORE
+extern const unsigned char br_hkdf_no_salt;
+#endif
+
+/**
+ * \brief HKDF input injection (HKDF-Extract).
+ *
+ * This function injects some more input bytes ("key material") into
+ * HKDF. This function may be called several times, after `br_hkdf_init()`
+ * but before `br_hkdf_flip()`.
+ *
+ * \param hc        HKDF context.
+ * \param ikm       extra input bytes.
+ * \param ikm_len   number of extra input bytes.
+ */
+void br_hkdf_inject(br_hkdf_context *hc, const void *ikm, size_t ikm_len);
+
+/**
+ * \brief HKDF switch to the HKDF-Expand phase.
+ *
+ * This call terminates the HKDF-Extract process (input injection), and
+ * starts the HKDF-Expand process (output production).
+ *
+ * \param hc   HKDF context.
+ */
+void br_hkdf_flip(br_hkdf_context *hc);
+
+/**
+ * \brief HKDF output production (HKDF-Expand).
+ *
+ * Produce more output bytes from the current state. This function may be
+ * called several times, but only after `br_hkdf_flip()`.
+ *
+ * Returned value is the number of actually produced bytes. The total
+ * output length is limited to 255 times the output length of the
+ * underlying hash function.
+ *
+ * \param hc         HKDF context.
+ * \param info       application specific information string.
+ * \param info_len   application specific information string length (in bytes).
+ * \param out        destination buffer for the HKDF output.
+ * \param out_len    the length of the requested output (in bytes).
+ * \return  the produced output length (in bytes).
+ */
+size_t br_hkdf_produce(br_hkdf_context *hc,
+	const void *info, size_t info_len, void *out, size_t out_len);
+
+/**
+ * \brief SHAKE context.
+ *
+ * The HKDF context is initialized with a "security level". The internal
+ * notion is called "capacity"; the capacity is twice the security level
+ * (for instance, SHAKE128 has capacity 256).
+ *
+ * The caller is responsible for allocating the context where
+ * appropriate. Context initialisation and usage incurs no dynamic
+ * allocation, so there is no release function.
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char dbuf[200];
+	size_t dptr;
+	size_t rate;
+	uint64_t A[25];
+#endif
+} br_shake_context;
+
+/**
+ * \brief SHAKE context initialization.
+ *
+ * The context is initialized for the provided "security level".
+ * Internally, this sets the "capacity" to twice the security level;
+ * thus, for SHAKE128, the `security_level` parameter should be 128,
+ * which corresponds to a 256-bit capacity.
+ *
+ * Allowed security levels are all multiples of 32, from 32 to 768,
+ * inclusive. Larger security levels imply lower performance; levels
+ * beyond 256 bits don't make much sense. Standard levels are 128
+ * and 256 bits (for SHAKE128 and SHAKE256, respectively).
+ *
+ * \param sc               SHAKE context to initialise.
+ * \param security_level   security level (in bits).
+ */
+void br_shake_init(br_shake_context *sc, int security_level);
+
+/**
+ * \brief SHAKE input injection.
+ *
+ * This function injects some more input bytes ("key material") into
+ * SHAKE. This function may be called several times, after `br_shake_init()`
+ * but before `br_shake_flip()`.
+ *
+ * \param sc     SHAKE context.
+ * \param data   extra input bytes.
+ * \param len    number of extra input bytes.
+ */
+void br_shake_inject(br_shake_context *sc, const void *data, size_t len);
+
+/**
+ * \brief SHAKE switch to production phase.
+ *
+ * This call terminates the input injection process, and starts the
+ * output production process.
+ *
+ * \param sc   SHAKE context.
+ */
+void br_shake_flip(br_shake_context *hc);
+
+/**
+ * \brief SHAKE output production.
+ *
+ * Produce more output bytes from the current state. This function may be
+ * called several times, but only after `br_shake_flip()`.
+ *
+ * There is no practical limit to the number of bytes that may be produced.
+ *
+ * \param sc    SHAKE context.
+ * \param out   destination buffer for the SHAKE output.
+ * \param len   the length of the requested output (in bytes).
+ */
+void br_shake_produce(br_shake_context *sc, void *out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/third_party/bearssl/inc/bearssl_pem.h
+++ b/third_party/bearssl/inc/bearssl_pem.h
@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_PEM_H__
+#define BR_BEARSSL_PEM_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_pem.h
+ *
+ * # PEM Support
+ *
+ * PEM is a traditional encoding layer use to store binary objects (in
+ * particular X.509 certificates, and private keys) in text files. While
+ * the acronym comes from an old, defunct standard ("Privacy Enhanced
+ * Mail"), the format has been reused, with some variations, by many
+ * systems, and is a _de facto_ standard, even though it is not, actually,
+ * specified in all clarity anywhere.
+ *
+ * ## Format Details
+ *
+ * BearSSL contains a generic, streamed PEM decoder, which handles the
+ * following format:
+ *
+ *   - The input source (a sequence of bytes) is assumed to be the
+ *     encoding of a text file in an ASCII-compatible charset. This
+ *     includes ISO-8859-1, Windows-1252, and UTF-8 encodings. Each
+ *     line ends on a newline character (U+000A LINE FEED). The
+ *     U+000D CARRIAGE RETURN characters are ignored, so the code
+ *     accepts both Windows-style and Unix-style line endings.
+ *
+ *   - Each object begins with a banner that occurs at the start of
+ *     a line; the first banner characters are "`-----BEGIN `" (five
+ *     dashes, the word "BEGIN", and a space). The banner matching is
+ *     not case-sensitive.
+ *
+ *   - The _object name_ consists in the characters that follow the
+ *     banner start sequence, up to the end of the line, but without
+ *     trailing dashes (in "normal" PEM, there are five trailing
+ *     dashes, but this implementation is not picky about these dashes).
+ *     The BearSSL decoder normalises the name characters to uppercase
+ *     (for ASCII letters only) and accepts names up to 127 characters.
+ *
+ *   - The object ends with a banner that again occurs at the start of
+ *     a line, and starts with "`-----END `" (again case-insensitive).
+ *
+ *   - Between that start and end banner, only Base64 data shall occur.
+ *     Base64 converts each sequence of three bytes into four
+ *     characters; the four characters are ASCII letters, digits, "`+`"
+ *     or "`-`" signs, and one or two "`=`" signs may occur in the last
+ *     quartet. Whitespace is ignored (whitespace is any ASCII character
+ *     of code 32 or less, so control characters are whitespace) and
+ *     lines may have arbitrary length; the only restriction is that the
+ *     four characters of a quartet must appear on the same line (no
+ *     line break inside a quartet).
+ *
+ *   - A single file may contain more than one PEM object. Bytes that
+ *     occur between objects are ignored.
+ *
+ *
+ * ## PEM Decoder API
+ *
+ * The PEM decoder offers a state-machine API. The caller allocates a
+ * decoder context, then injects source bytes. Source bytes are pushed
+ * with `br_pem_decoder_push()`. The decoder stops accepting bytes when
+ * it reaches an "event", which is either the start of an object, the
+ * end of an object, or a decoding error within an object.
+ *
+ * The `br_pem_decoder_event()` function is used to obtain the current
+ * event; it also clears it, thus allowing the decoder to accept more
+ * bytes. When a object start event is raised, the decoder context
+ * offers the found object name (normalised to ASCII uppercase).
+ *
+ * When an object is reached, the caller must set an appropriate callback
+ * function, which will receive (by chunks) the decoded object data.
+ *
+ * Since the decoder context makes no dynamic allocation, it requires
+ * no explicit deallocation.
+ */
+
+/**
+ * \brief PEM decoder context.
+ *
+ * Contents are opaque (they should not be accessed directly).
+ */
+typedef struct {
+#ifndef BR_DOXYGEN_IGNORE
+	/* CPU for the T0 virtual machine. */
+	struct {
+		uint32_t *dp;
+		uint32_t *rp;
+		const unsigned char *ip;
+	} cpu;
+	uint32_t dp_stack[32];
+	uint32_t rp_stack[32];
+	int err;
+
+	const unsigned char *hbuf;
+	size_t hlen;
+
+	void (*dest)(void *dest_ctx, const void *src, size_t len);
+	void *dest_ctx;
+
+	unsigned char event;
+	char name[128];
+	unsigned char buf[255];
+	size_t ptr;
+#endif
+} br_pem_decoder_context;
+
+/**
+ * \brief Initialise a PEM decoder structure.
+ *
+ * \param ctx   decoder context to initialise.
+ */
+void br_pem_decoder_init(br_pem_decoder_context *ctx);
+
+/**
+ * \brief Push some bytes into the decoder.
+ *
+ * Returned value is the number of bytes actually consumed; this may be
+ * less than the number of provided bytes if an event is raised. When an
+ * event is raised, it must be read (with `br_pem_decoder_event()`);
+ * until the event is read, this function will return 0.
+ *
+ * \param ctx    decoder context.
+ * \param data   new data bytes.
+ * \param len    number of new data bytes.
+ * \return  the number of bytes actually received (may be less than `len`).
+ */
+size_t br_pem_decoder_push(br_pem_decoder_context *ctx,
+	const void *data, size_t len);
+
+/**
+ * \brief Set the receiver for decoded data.
+ *
+ * When an object is entered, the provided function (with opaque context
+ * pointer) will be called repeatedly with successive chunks of decoded
+ * data for that object. If `dest` is set to 0, then decoded data is
+ * simply ignored. The receiver can be set at any time, but, in practice,
+ * it should be called immediately after receiving a "start of object"
+ * event.
+ *
+ * \param ctx        decoder context.
+ * \param dest       callback for receiving decoded data.
+ * \param dest_ctx   opaque context pointer for the `dest` callback.
+ */
+static inline void
+br_pem_decoder_setdest(br_pem_decoder_context *ctx,
+	void (*dest)(void *dest_ctx, const void *src, size_t len),
+	void *dest_ctx)
+{
+	ctx->dest = dest;
+	ctx->dest_ctx = dest_ctx;
+}
+
+/**
+ * \brief Get the last event.
+ *
+ * If an event was raised, then this function returns the event value, and
+ * also clears it, thereby allowing the decoder to proceed. If no event
+ * was raised since the last call to `br_pem_decoder_event()`, then this
+ * function returns 0.
+ *
+ * \param ctx   decoder context.
+ * \return  the raised event, or 0.
+ */
+int br_pem_decoder_event(br_pem_decoder_context *ctx);
+
+/**
+ * \brief Event: start of object.
+ *
+ * This event is raised when the start of a new object has been detected.
+ * The object name (normalised to uppercase) can be accessed with
+ * `br_pem_decoder_name()`.
+ */
+#define BR_PEM_BEGIN_OBJ   1
+
+/**
+ * \brief Event: end of object.
+ *
+ * This event is raised when the end of the current object is reached
+ * (normally, i.e. with no decoding error).
+ */
+#define BR_PEM_END_OBJ     2
+
+/**
+ * \brief Event: decoding error.
+ *
+ * This event is raised when decoding fails within an object.
+ * This formally closes the current object and brings the decoder back
+ * to the "out of any object" state. The offending line in the source
+ * is consumed.
+ */
+#define BR_PEM_ERROR       3
+
+/**
+ * \brief Get the name of the encountered object.
+ *
+ * The encountered object name is defined only when the "start of object"
+ * event is raised. That name is normalised to uppercase (for ASCII letters
+ * only) and does not include trailing dashes.
+ *
+ * \param ctx   decoder context.
+ * \return  the current object name.
+ */
+static inline const char *
+br_pem_decoder_name(br_pem_decoder_context *ctx)
+{
+	return ctx->name;
+}
+
+/**
+ * \brief Encode an object in PEM.
+ *
+ * This function encodes the provided binary object (`data`, of length `len`
+ * bytes) into PEM. The `banner` text will be included in the header and
+ * footer (e.g. use `"CERTIFICATE"` to get a `"BEGIN CERTIFICATE"` header).
+ *
+ * The length (in characters) of the PEM output is returned; that length
+ * does NOT include the terminating zero, that this function nevertheless
+ * adds. If using the returned value for allocation purposes, the allocated
+ * buffer size MUST be at least one byte larger than the returned size.
+ *
+ * If `dest` is `NULL`, then the encoding does not happen; however, the
+ * length of the encoded object is still computed and returned.
+ *
+ * The `data` pointer may be `NULL` only if `len` is zero (when encoding
+ * an object of length zero, which is not very useful), or when `dest`
+ * is `NULL` (in that case, source data bytes are ignored).
+ *
+ * Some `flags` can be specified to alter the encoding behaviour:
+ *
+ *   - If `BR_PEM_LINE64` is set, then line-breaking will occur after
+ *     every 64 characters of output, instead of the default of 76.
+ *
+ *   - If `BR_PEM_CRLF` is set, then end-of-line sequence will use
+ *     CR+LF instead of a single LF.
+ *
+ * The `data` and `dest` buffers may overlap, in which case the source
+ * binary data is destroyed in the process. Note that the PEM-encoded output
+ * is always larger than the source binary.
+ *
+ * \param dest     the destination buffer (or `NULL`).
+ * \param data     the source buffer (can be `NULL` in some cases).
+ * \param len      the source length (in bytes).
+ * \param banner   the PEM banner expression.
+ * \param flags    the behavioural flags.
+ * \return  the PEM object length (in characters), EXCLUDING the final zero.
+ */
+size_t br_pem_encode(void *dest, const void *data, size_t len,
+	const char *banner, unsigned flags);
+
+/**
+ * \brief PEM encoding flag: split lines at 64 characters.
+ */
+#define BR_PEM_LINE64   0x0001
+
+/**
+ * \brief PEM encoding flag: use CR+LF line endings.
+ */
+#define BR_PEM_CRLF     0x0002
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/third_party/bearssl/inc/bearssl_prf.h
+++ b/third_party/bearssl/inc/bearssl_prf.h
@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_PRF_H__
+#define BR_BEARSSL_PRF_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_prf.h
+ *
+ * # The TLS PRF
+ *
+ * The "PRF" is the pseudorandom function used internally during the
+ * SSL/TLS handshake, notably to expand negotiated shared secrets into
+ * the symmetric encryption keys that will be used to process the
+ * application data.
+ *
+ * TLS 1.0 and 1.1 define a PRF that is based on both MD5 and SHA-1. This
+ * is implemented by the `br_tls10_prf()` function.
+ *
+ * TLS 1.2 redefines the PRF, using an explicit hash function. The
+ * `br_tls12_sha256_prf()` and `br_tls12_sha384_prf()` functions apply that
+ * PRF with, respectively, SHA-256 and SHA-384. Most standard cipher suites
+ * rely on the SHA-256 based PRF, but some use SHA-384.
+ *
+ * The PRF always uses as input three parameters: a "secret" (some
+ * bytes), a "label" (ASCII string), and a "seed" (again some bytes). An
+ * arbitrary output length can be produced. The "seed" is provided as an
+ * arbitrary number of binary chunks, that gets internally concatenated.
+ */
+
+/**
+ * \brief Type for a seed chunk.
+ *
+ * Each chunk may have an arbitrary length, and may be empty (no byte at
+ * all). If the chunk length is zero, then the pointer to the chunk data
+ * may be `NULL`.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to the chunk data.
+	 */
+	const void *data;
+
+	/**
+	 * \brief Chunk length (in bytes).
+	 */
+	size_t len;
+} br_tls_prf_seed_chunk;
+
+/**
+ * \brief PRF implementation for TLS 1.0 and 1.1.
+ *
+ * This PRF is the one specified by TLS 1.0 and 1.1. It internally uses
+ * MD5 and SHA-1.
+ *
+ * \param dst          destination buffer.
+ * \param len          output length (in bytes).
+ * \param secret       secret value (key) for this computation.
+ * \param secret_len   length of "secret" (in bytes).
+ * \param label        PRF label (zero-terminated ASCII string).
+ * \param seed_num     number of seed chunks.
+ * \param seed         seed chnks for this computation (usually non-secret).
+ */
+void br_tls10_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+/**
+ * \brief PRF implementation for TLS 1.2, with SHA-256.
+ *
+ * This PRF is the one specified by TLS 1.2, when the underlying hash
+ * function is SHA-256.
+ *
+ * \param dst          destination buffer.
+ * \param len          output length (in bytes).
+ * \param secret       secret value (key) for this computation.
+ * \param secret_len   length of "secret" (in bytes).
+ * \param label        PRF label (zero-terminated ASCII string).
+ * \param seed_num     number of seed chunks.
+ * \param seed         seed chnks for this computation (usually non-secret).
+ */
+void br_tls12_sha256_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+/**
+ * \brief PRF implementation for TLS 1.2, with SHA-384.
+ *
+ * This PRF is the one specified by TLS 1.2, when the underlying hash
+ * function is SHA-384.
+ *
+ * \param dst          destination buffer.
+ * \param len          output length (in bytes).
+ * \param secret       secret value (key) for this computation.
+ * \param secret_len   length of "secret" (in bytes).
+ * \param label        PRF label (zero-terminated ASCII string).
+ * \param seed_num     number of seed chunks.
+ * \param seed         seed chnks for this computation (usually non-secret).
+ */
+void br_tls12_sha384_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+/** 
+ * brief A convenient type name for a PRF implementation.
+ *
+ * \param dst          destination buffer.
+ * \param len          output length (in bytes).
+ * \param secret       secret value (key) for this computation.
+ * \param secret_len   length of "secret" (in bytes).
+ * \param label        PRF label (zero-terminated ASCII string).
+ * \param seed_num     number of seed chunks.
+ * \param seed         seed chnks for this computation (usually non-secret).
+ */
+typedef void (*br_tls_prf_impl)(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/third_party/bearssl/inc/bearssl_rand.h
+++ b/third_party/bearssl/inc/bearssl_rand.h
@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef BR_BEARSSL_RAND_H__
+#define BR_BEARSSL_RAND_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "bearssl_block.h"
+#include "bearssl_hash.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file bearssl_rand.h
+ *
+ * # Pseudo-Random Generators
+ *
+ * A PRNG is a state-based engine that outputs pseudo-random bytes on
+ * demand. It is initialized with an initial seed, and additional seed
+ * bytes can be added afterwards. Bytes produced depend on the seeds and
+ * also on the exact sequence of calls (including sizes requested for
+ * each call).
+ *
+ *
+ * ## Procedural and OOP API
+ *
+ * For the PRNG of name "`xxx`", two API are provided. The _procedural_
+ * API defined a context structure `br_xxx_context` and three functions:
+ *
+ *   - `br_xxx_init()`
+ *
+ *     Initialise the context with an initial seed.
+ *
+ *   - `br_xxx_generate()`
+ *
+ *     Produce some pseudo-random bytes.
+ *
+ *   - `br_xxx_update()`
+ *
+ *     Inject some additional seed.
+ *
+ * The initialisation function sets the first context field (`vtable`)
+ * to a pointer to the vtable that supports the OOP API. The OOP API
+ * provides access to the same functions through function pointers,
+ * named `init()`, `generate()` and `update()`.
+ *
+ * Note that the context initialisation method may accept additional
+ * parameters, provided as a 'const void *' pointer at API level. These
+ * additional parameters depend on the implemented PRNG.
+ *
+ *
+ * ## HMAC_DRBG
+ *
+ * HMAC_DRBG is defined in [NIST SP 800-90A Revision
+ * 1](http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf).
+ * It uses HMAC repeatedly, over some configurable underlying hash
+ * function. In BearSSL, it is implemented under the "`hmac_drbg`" name.
+ * The "extra parameters" pointer for context initialisation should be
+ * set to a pointer to the vtable for the underlying hash function (e.g.
+ * pointer to `br_sha256_vtable` to use HMAC_DRBG with SHA-256).
+ *
+ * According to the NIST standard, each request shall produce up to
+ * 2<sup>19</sup> bits (i.e. 64 kB of data); moreover, the context shall
+ * be reseeded at least once every 2<sup>48</sup> requests. This
+ * implementation does not maintain the reseed counter (the threshold is
+ * too high to be reached in practice) and does not object to producing
+ * more than 64 kB in a single request; thus, the code cannot fail,
+ * which corresponds to the fact that the API has no room for error
+ * codes. However, this implies that requesting more than 64 kB in one
+ * `generate()` request, or making more than 2<sup>48</sup> requests
+ * without reseeding, is formally out of NIST specification. There is
+ * no currently known security penalty for exceeding the NIST limits,
+ * and, in any case, HMAC_DRBG usage in implementing SSL/TLS always
+ * stays much below these thresholds.
+ *
+ *
+ * ## AESCTR_DRBG
+ *
+ * AESCTR_DRBG is a custom PRNG based on AES-128 in CTR mode. This is
+ * meant to be used only in situations where you are desperate for
+ * speed, and have an hardware-optimized AES/CTR implementation. Whether
+ * this will yield perceptible improvements depends on what you use the
+ * pseudorandom bytes for, and how many you want; for instance, RSA key
+ * pair generation uses a substantial amount of randomness, and using
+ * AESCTR_DRBG instead of HMAC_DRBG yields a 15 to 20% increase in key
+ * generation speed on a recent x86 CPU (Intel Core i7-6567U at 3.30 GHz).
+ *
+ * Internally, it uses CTR mode with successive counter values, starting
+ * at zero (counter value expressed over 128 bits, big-endian convention).
+ * The counter is not allowed to reach 32768; thus, every 32768*16 bytes
+ * at most, the `update()` function is run (on an empty seed, if none is
+ * provided). The `update()` function computes the new AES-128 key by
+ * applying a custom hash function to the concatenation of a state-dependent
+ * word (encryption of an all-one block with the current key) and the new
+ * seed. The custom hash function uses Hirose's construction over AES-256;
+ * see the comments in `aesctr_drbg.c` for details.
+ *
+ * This DRBG does not follow an existing standard, and thus should be
+ * considered as inadequate for production use until it has been properly
+ * analysed.
+ */
+
+/**
+ * \brief Class type for PRNG implementations.
+ *
+ * A `br_prng_class` instance references the methods implementing a PRNG.
+ * Constant instances of this structure are defined for each implemented
+ * PRNG. Such instances are also called "vtables".
+ */
+typedef struct br_prng_class_ br_prng_class;
+struct br_prng_class_ {
+	/**
+	 * \brief Size (in bytes) of the context structure appropriate for
+	 * running this PRNG.
+	 */
+	size_t context_size;
+
+	/**
+	 * \brief Initialisation method.
+	 *
+	 * The context to initialise is provided as a pointer to its
+	 * first field (the vtable pointer); this function sets that
+	 * first field to a pointer to the vtable.
+	 *
+	 * The extra parameters depend on the implementation; each
+	 * implementation defines what kind of extra parameters it
+	 * expects (if any).
+	 *
+	 * Requirements on the initial seed depend on the implemented
+	 * PRNG.
+	 *
+	 * \param ctx        PRNG context to initialise.
+	 * \param params     extra parameters for the PRNG.
+	 * \param seed       initial seed.
+	 * \param seed_len   initial seed length (in bytes).
+	 */
+	void (*init)(const br_prng_class **ctx, const void *params,
+		const void *seed, size_t seed_len);
+
+	/**
+	 * \brief Random bytes generation.
+	 *
+	 * This method produces `len` pseudorandom bytes, in the `out`
+	 * buffer. The context is updated accordingly.
+	 *
+	 * \param ctx   PRNG context.
+	 * \param out   output buffer.
+	 * \param len   number of pseudorandom bytes to produce.
+	 */
+	void (*generate)(const br_prng_class **ctx, void *out, size_t len);
+
+	/**
+	 * \brief Inject additional seed bytes.
+	 *
+	 * The provided seed bytes are added into the PRNG internal
+	 * entropy pool.
+	 *
+	 * \param ctx        PRNG context.
+	 * \param seed       additional seed.
+	 * \param seed_len   additional seed length (in bytes).
+	 */
+	void (*update)(const br_prng_class **ctx,
+		const void *seed, size_t seed_len);
+};
+
+/**
+ * \brief Context for HMAC_DRBG.
+ *
+ * The context contents are opaque, except the first field, which
+ * supports OOP.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to the vtable.
+	 *
+	 * This field is set with the initialisation method/function.
+	 */
+	const br_prng_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	unsigned char K[64];
+	unsigned char V[64];
+	const br_hash_class *digest_class;
+#endif
+} br_hmac_drbg_context;
+
+/**
+ * \brief Statically allocated, constant vtable for HMAC_DRBG.
+ */
+extern const br_prng_class br_hmac_drbg_vtable;
+
+/**
+ * \brief HMAC_DRBG initialisation.
+ *
+ * The context to initialise is provided as a pointer to its first field
+ * (the vtable pointer); this function sets that first field to a
+ * pointer to the vtable.
+ *
+ * The `seed` value is what is called, in NIST terminology, the
+ * concatenation of the "seed", "nonce" and "personalization string", in
+ * that order.
+ *
+ * The `digest_class` parameter defines the underlying hash function.
+ * Formally, the NIST standard specifies that the hash function shall
+ * be only SHA-1 or one of the SHA-2 functions. This implementation also
+ * works with any other implemented hash function (such as MD5), but
+ * this is non-standard and therefore not recommended.
+ *
+ * \param ctx            HMAC_DRBG context to initialise.
+ * \param digest_class   vtable for the underlying hash function.
+ * \param seed           initial seed.
+ * \param seed_len       initial seed length (in bytes).
+ */
+void br_hmac_drbg_init(br_hmac_drbg_context *ctx,
+	const br_hash_class *digest_class, const void *seed, size_t seed_len);
+
+/**
+ * \brief Random bytes generation with HMAC_DRBG.
+ *
+ * This method produces `len` pseudorandom bytes, in the `out`
+ * buffer. The context is updated accordingly. Formally, requesting
+ * more than 65536 bytes in one request falls out of specification
+ * limits (but it won't fail).
+ *
+ * \param ctx   HMAC_DRBG context.
+ * \param out   output buffer.
+ * \param len   number of pseudorandom bytes to produce.
+ */
+void br_hmac_drbg_generate(br_hmac_drbg_context *ctx, void *out, size_t len);
+
+/**
+ * \brief Inject additional seed bytes in HMAC_DRBG.
+ *
+ * The provided seed bytes are added into the HMAC_DRBG internal
+ * entropy pool. The process does not _replace_ existing entropy,
+ * thus pushing non-random bytes (i.e. bytes which are known to the
+ * attackers) does not degrade the overall quality of generated bytes.
+ *
+ * \param ctx        HMAC_DRBG context.
+ * \param seed       additional seed.
+ * \param seed_len   additional seed length (in bytes).
+ */
+void br_hmac_drbg_update(br_hmac_drbg_context *ctx,
+	const void *seed, size_t seed_len);
+
+/**
+ * \brief Get the hash function implementation used by a given instance of
+ * HMAC_DRBG.
+ *
+ * This calls MUST NOT be performed on a context which was not
+ * previously initialised.
+ *
+ * \param ctx   HMAC_DRBG context.
+ * \return  the hash function vtable.
+ */
+static inline const br_hash_class *
+br_hmac_drbg_get_hash(const br_hmac_drbg_context *ctx)
+{
+	return ctx->digest_class;
+}
+
+/**
+ * \brief Type for a provider of entropy seeds.
+ *
+ * A "seeder" is a function that is able to obtain random values from
+ * some source and inject them as entropy seed in a PRNG. A seeder
+ * shall guarantee that the total entropy of the injected seed is large
+ * enough to seed a PRNG for purposes of cryptographic key generation
+ * (i.e. at least 128 bits).
+ *
+ * A seeder may report a failure to obtain adequate entropy. Seeders
+ * shall endeavour to fix themselves transient errors by trying again;
+ * thus, callers may consider reported errors as permanent.
+ *
+ * \param ctx   PRNG context to seed.
+ * \return  1 on success, 0 on error.
+ */
+typedef int (*br_prng_seeder)(const br_prng_class **ctx);
+
+/**
+ * \brief Get a seeder backed by the operating system or hardware.
+ *
+ * Get a seeder that feeds on RNG facilities provided by the current
+ * operating system or hardware. If no such facility is known, then 0
+ * is returned.
+ *
+ * If `name` is not `NULL`, then `*name` is set to a symbolic string
+ * that identifies the seeder implementation. If no seeder is returned
+ * and `name` is not `NULL`, then `*name` is set to a pointer to the
+ * constant string `"none"`.
+ *
+ * \param name   receiver for seeder name, or `NULL`.
+ * \return  the system seeder, if available, or 0.
+ */
+br_prng_seeder br_prng_seeder_system(const char **name);
+
+/**
+ * \brief Context for AESCTR_DRBG.
+ *
+ * The context contents are opaque, except the first field, which
+ * supports OOP.
+ */
+typedef struct {
+	/**
+	 * \brief Pointer to the vtable.
+	 *
+	 * This field is set with the initialisation method/function.
+	 */
+	const br_prng_class *vtable;
+#ifndef BR_DOXYGEN_IGNORE
+	br_aes_gen_ctr_keys sk;
+	uint32_t cc;
+#endif
+} br_aesctr_drbg_context;
+
+/**
+ * \brief Statically allocated, constant vtable for AESCTR_DRBG.
+ */
+extern const br_prng_class br_aesctr_drbg_vtable;
+
+/**
+ * \brief AESCTR_DRBG initialisation.
+ *
+ * The context to initialise is provided as a pointer to its first field
+ * (the vtable pointer); this function sets that first field to a
+ * pointer to the vtable.
+ *
+ * The internal AES key is first set to the all-zero key; then, the
+ * `br_aesctr_drbg_update()` function is called with the provided `seed`.
+ * The call is performed even if the seed length (`seed_len`) is zero.
+ *
+ * The `aesctr` parameter defines the underlying AES/CTR implementation.
+ *
+ * \param ctx        AESCTR_DRBG context to initialise.
+ * \param aesctr     vtable for the AES/CTR implementation.
+ * \param seed       initial seed (can be `NULL` if `seed_len` is zero).
+ * \param seed_len   initial seed length (in bytes).
+ */
+void br_aesctr_drbg_init(br_aesctr_drbg_context *ctx,
+	const br_block_ctr_class *aesctr, const void *seed, size_t seed_len);
+
+/**
+ * \brief Random bytes generation with AESCTR_DRBG.
+ *
+ * This method produces `len` pseudorandom bytes, in the `out`
+ * buffer. The context is updated accordingly.
+ *
+ * \param ctx   AESCTR_DRBG context.
+ * \param out   output buffer.
+ * \param len   number of pseudorandom bytes to produce.
+ */
+void br_aesctr_drbg_generate(br_aesctr_drbg_context *ctx,
+	void *out, size_t len);
+
+/**
+ * \brief Inject additional seed bytes in AESCTR_DRBG.
+ *
+ * The provided seed bytes are added into the AESCTR_DRBG internal
+ * entropy pool. The process does not _replace_ existing entropy,
+ * thus pushing non-random bytes (i.e. bytes which are known to the
+ * attackers) does not degrade the overall quality of generated bytes.
+ *
+ * \param ctx        AESCTR_DRBG context.
+ * \param seed       additional seed.
+ * \param seed_len   additional seed length (in bytes).
+ */
+void br_aesctr_drbg_update(br_aesctr_drbg_context *ctx,
+	const void *seed, size_t seed_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/third_party/bearssl/inc/bearssl_rsa.h
+++ b/third_party/bearssl/inc/bearssl_rsa.h
--- a/third_party/bearssl/inc/bearssl_ssl.h
+++ b/third_party/bearssl/inc/bearssl_ssl.h
--- a/third_party/bearssl/inc/bearssl_x509.h
+++ b/third_party/bearssl/inc/bearssl_x509.h
--- a/third_party/bearssl/src/aes_big_cbcdec.c
+++ b/third_party/bearssl/src/aes_big_cbcdec.c
@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_cbcdec_vtable;
+	ctx->num_rounds = br_aes_big_keysched_inv(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+		int i;
+
+		memcpy(tmp, buf, 16);
+		br_aes_big_decrypt(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_big_cbcdec_vtable = {
+	sizeof(br_aes_big_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_big_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_big_cbcdec_run
+};
--- a/third_party/bearssl/src/aes_big_cbcenc.c
+++ b/third_party/bearssl/src/aes_big_cbcenc.c
@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcenc_init(br_aes_big_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_cbcenc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcenc_run(const br_aes_big_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_big_cbcenc_vtable = {
+	sizeof(br_aes_big_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_big_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_big_cbcenc_run
+};
--- a/third_party/bearssl/src/aes_big_ctr.c
+++ b/third_party/bearssl/src/aes_big_ctr.c
@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_ctr_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+
+		memcpy(tmp, iv, 12);
+		br_enc32be(tmp + 12, cc ++);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		if (len <= 16) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_big_ctr_vtable = {
+	sizeof(br_aes_big_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_big_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_big_ctr_run
+};
--- a/third_party/bearssl/src/aes_big_ctrcbc.c
+++ b/third_party/bearssl/src/aes_big_ctrcbc.c
@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf, *bctr;
+	uint32_t cc0, cc1, cc2, cc3;
+
+	buf = data;
+	bctr = ctr;
+	cc3 = br_dec32be(bctr +  0);
+	cc2 = br_dec32be(bctr +  4);
+	cc1 = br_dec32be(bctr +  8);
+	cc0 = br_dec32be(bctr + 12);
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t carry;
+
+		br_enc32be(tmp +  0, cc3);
+		br_enc32be(tmp +  4, cc2);
+		br_enc32be(tmp +  8, cc1);
+		br_enc32be(tmp + 12, cc0);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+		cc0 ++;
+		carry = (~(cc0 | -cc0)) >> 31;
+		cc1 += carry;
+		carry &= (~(cc1 | -cc1)) >> 31;
+		cc2 += carry;
+		carry &= (~(cc2 | -cc2)) >> 31;
+		cc3 += carry;
+	}
+	br_enc32be(bctr +  0, cc3);
+	br_enc32be(bctr +  4, cc2);
+	br_enc32be(bctr +  8, cc1);
+	br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		xorbuf(cbcmac, buf, 16);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+	br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+	br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable = {
+	sizeof(br_aes_big_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_big_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_big_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_big_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_big_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_big_ctrcbc_mac
+};
--- a/third_party/bearssl/src/aes_big_dec.c
+++ b/third_party/bearssl/src/aes_big_dec.c
@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Inverse S-box (used in key schedule for decryption).
+ */
+static const unsigned char iS[] = {
+	0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+	0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+	0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+	0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+	0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+	0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+	0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+	0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+	0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+	0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+	0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+	0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+	0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+	0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+	0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+	0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+	0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+	0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+	0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+	0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+	0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+	0x55, 0x21, 0x0C, 0x7D
+};
+
+static const uint32_t iSsm0[] = {
+	0x51F4A750, 0x7E416553, 0x1A17A4C3, 0x3A275E96, 0x3BAB6BCB, 0x1F9D45F1,
+	0xACFA58AB, 0x4BE30393, 0x2030FA55, 0xAD766DF6, 0x88CC7691, 0xF5024C25,
+	0x4FE5D7FC, 0xC52ACBD7, 0x26354480, 0xB562A38F, 0xDEB15A49, 0x25BA1B67,
+	0x45EA0E98, 0x5DFEC0E1, 0xC32F7502, 0x814CF012, 0x8D4697A3, 0x6BD3F9C6,
+	0x038F5FE7, 0x15929C95, 0xBF6D7AEB, 0x955259DA, 0xD4BE832D, 0x587421D3,
+	0x49E06929, 0x8EC9C844, 0x75C2896A, 0xF48E7978, 0x99583E6B, 0x27B971DD,
+	0xBEE14FB6, 0xF088AD17, 0xC920AC66, 0x7DCE3AB4, 0x63DF4A18, 0xE51A3182,
+	0x97513360, 0x62537F45, 0xB16477E0, 0xBB6BAE84, 0xFE81A01C, 0xF9082B94,
+	0x70486858, 0x8F45FD19, 0x94DE6C87, 0x527BF8B7, 0xAB73D323, 0x724B02E2,
+	0xE31F8F57, 0x6655AB2A, 0xB2EB2807, 0x2FB5C203, 0x86C57B9A, 0xD33708A5,
+	0x302887F2, 0x23BFA5B2, 0x02036ABA, 0xED16825C, 0x8ACF1C2B, 0xA779B492,
+	0xF307F2F0, 0x4E69E2A1, 0x65DAF4CD, 0x0605BED5, 0xD134621F, 0xC4A6FE8A,
+	0x342E539D, 0xA2F355A0, 0x058AE132, 0xA4F6EB75, 0x0B83EC39, 0x4060EFAA,
+	0x5E719F06, 0xBD6E1051, 0x3E218AF9, 0x96DD063D, 0xDD3E05AE, 0x4DE6BD46,
+	0x91548DB5, 0x71C45D05, 0x0406D46F, 0x605015FF, 0x1998FB24, 0xD6BDE997,
+	0x894043CC, 0x67D99E77, 0xB0E842BD, 0x07898B88, 0xE7195B38, 0x79C8EEDB,
+	0xA17C0A47, 0x7C420FE9, 0xF8841EC9, 0x00000000, 0x09808683, 0x322BED48,
+	0x1E1170AC, 0x6C5A724E, 0xFD0EFFFB, 0x0F853856, 0x3DAED51E, 0x362D3927,
+	0x0A0FD964, 0x685CA621, 0x9B5B54D1, 0x24362E3A, 0x0C0A67B1, 0x9357E70F,
+	0xB4EE96D2, 0x1B9B919E, 0x80C0C54F, 0x61DC20A2, 0x5A774B69, 0x1C121A16,
+	0xE293BA0A, 0xC0A02AE5, 0x3C22E043, 0x121B171D, 0x0E090D0B, 0xF28BC7AD,
+	0x2DB6A8B9, 0x141EA9C8, 0x57F11985, 0xAF75074C, 0xEE99DDBB, 0xA37F60FD,
+	0xF701269F, 0x5C72F5BC, 0x44663BC5, 0x5BFB7E34, 0x8B432976, 0xCB23C6DC,
+	0xB6EDFC68, 0xB8E4F163, 0xD731DCCA, 0x42638510, 0x13972240, 0x84C61120,
+	0x854A247D, 0xD2BB3DF8, 0xAEF93211, 0xC729A16D, 0x1D9E2F4B, 0xDCB230F3,
+	0x0D8652EC, 0x77C1E3D0, 0x2BB3166C, 0xA970B999, 0x119448FA, 0x47E96422,
+	0xA8FC8CC4, 0xA0F03F1A, 0x567D2CD8, 0x223390EF, 0x87494EC7, 0xD938D1C1,
+	0x8CCAA2FE, 0x98D40B36, 0xA6F581CF, 0xA57ADE28, 0xDAB78E26, 0x3FADBFA4,
+	0x2C3A9DE4, 0x5078920D, 0x6A5FCC9B, 0x547E4662, 0xF68D13C2, 0x90D8B8E8,
+	0x2E39F75E, 0x82C3AFF5, 0x9F5D80BE, 0x69D0937C, 0x6FD52DA9, 0xCF2512B3,
+	0xC8AC993B, 0x10187DA7, 0xE89C636E, 0xDB3BBB7B, 0xCD267809, 0x6E5918F4,
+	0xEC9AB701, 0x834F9AA8, 0xE6956E65, 0xAAFFE67E, 0x21BCCF08, 0xEF15E8E6,
+	0xBAE79BD9, 0x4A6F36CE, 0xEA9F09D4, 0x29B07CD6, 0x31A4B2AF, 0x2A3F2331,
+	0xC6A59430, 0x35A266C0, 0x744EBC37, 0xFC82CAA6, 0xE090D0B0, 0x33A7D815,
+	0xF104984A, 0x41ECDAF7, 0x7FCD500E, 0x1791F62F, 0x764DD68D, 0x43EFB04D,
+	0xCCAA4D54, 0xE49604DF, 0x9ED1B5E3, 0x4C6A881B, 0xC12C1FB8, 0x4665517F,
+	0x9D5EEA04, 0x018C355D, 0xFA877473, 0xFB0B412E, 0xB3671D5A, 0x92DBD252,
+	0xE9105633, 0x6DD64713, 0x9AD7618C, 0x37A10C7A, 0x59F8148E, 0xEB133C89,
+	0xCEA927EE, 0xB761C935, 0xE11CE5ED, 0x7A47B13C, 0x9CD2DF59, 0x55F2733F,
+	0x1814CE79, 0x73C737BF, 0x53F7CDEA, 0x5FFDAA5B, 0xDF3D6F14, 0x7844DB86,
+	0xCAAFF381, 0xB968C43E, 0x3824342C, 0xC2A3405F, 0x161DC372, 0xBCE2250C,
+	0x283C498B, 0xFF0D9541, 0x39A80171, 0x080CB3DE, 0xD8B4E49C, 0x6456C190,
+	0x7BCB8461, 0xD532B670, 0x486C5C74, 0xD0B85742
+};
+
+static unsigned
+mul2(unsigned x)
+{
+	x <<= 1;
+	return x ^ ((unsigned)(-(int)(x >> 8)) & 0x11B);
+}
+
+static unsigned
+mul9(unsigned x)
+{
+	return x ^ mul2(mul2(mul2(x)));
+}
+
+static unsigned
+mulb(unsigned x)
+{
+	unsigned x2;
+	
+	x2 = mul2(x);
+	return x ^ x2 ^ mul2(mul2(x2));
+}
+
+static unsigned
+muld(unsigned x)
+{
+	unsigned x4;
+
+	x4 = mul2(mul2(x));
+	return x ^ x4 ^ mul2(x4);
+}
+
+static unsigned
+mule(unsigned x)
+{
+	unsigned x2, x4;
+
+	x2 = mul2(x);
+	x4 = mul2(x2);
+	return x2 ^ x4 ^ mul2(x4);
+}
+
+/* see inner.h */
+unsigned
+br_aes_big_keysched_inv(uint32_t *skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, m;
+
+	/*
+	 * Sub-keys for decryption are distinct from encryption sub-keys
+	 * in that InvMixColumns() is already applied for the inner
+	 * rounds.
+	 */
+	num_rounds = br_aes_keysched(skey, key, key_len);
+	m = (int)(num_rounds << 2);
+	for (i = 4; i < m; i ++) {
+		uint32_t p;
+		unsigned p0, p1, p2, p3;
+		uint32_t q0, q1, q2, q3;
+
+		p = skey[i];
+		p0 = p >> 24;
+		p1 = (p >> 16) & 0xFF;
+		p2 = (p >> 8) & 0xFF;
+		p3 = p & 0xFF;
+		q0 = mule(p0) ^ mulb(p1) ^ muld(p2) ^ mul9(p3);
+		q1 = mul9(p0) ^ mule(p1) ^ mulb(p2) ^ muld(p3);
+		q2 = muld(p0) ^ mul9(p1) ^ mule(p2) ^ mulb(p3);
+		q3 = mulb(p0) ^ muld(p1) ^ mul9(p2) ^ mule(p3);
+		skey[i] = (q0 << 24) | (q1 << 16) | (q2 << 8) | q3;
+	}
+	return num_rounds;
+}
+
+static inline uint32_t
+rotr(uint32_t x, int n)
+{
+	return (x << (32 - n)) | (x >> n);
+}
+
+#define iSboxExt0(x)   (iSsm0[x])
+#define iSboxExt1(x)   (rotr(iSsm0[x], 8))
+#define iSboxExt2(x)   (rotr(iSsm0[x], 16))
+#define iSboxExt3(x)   (rotr(iSsm0[x], 24))
+
+/* see bearssl.h */
+void
+br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	uint32_t s0, s1, s2, s3;
+	uint32_t t0, t1, t2, t3;
+	unsigned u;
+
+	buf = data;
+	s0 = br_dec32be(buf);
+	s1 = br_dec32be(buf + 4);
+	s2 = br_dec32be(buf + 8);
+	s3 = br_dec32be(buf + 12);
+	s0 ^= skey[(num_rounds << 2) + 0];
+	s1 ^= skey[(num_rounds << 2) + 1];
+	s2 ^= skey[(num_rounds << 2) + 2];
+	s3 ^= skey[(num_rounds << 2) + 3];
+	for (u = num_rounds - 1; u > 0; u --) {
+		uint32_t v0 = iSboxExt0(s0 >> 24)
+			^ iSboxExt1((s3 >> 16) & 0xFF)
+			^ iSboxExt2((s2 >> 8) & 0xFF)
+			^ iSboxExt3(s1 & 0xFF);
+		uint32_t v1 = iSboxExt0(s1 >> 24)
+			^ iSboxExt1((s0 >> 16) & 0xFF)
+			^ iSboxExt2((s3 >> 8) & 0xFF)
+			^ iSboxExt3(s2 & 0xFF);
+		uint32_t v2 = iSboxExt0(s2 >> 24)
+			^ iSboxExt1((s1 >> 16) & 0xFF)
+			^ iSboxExt2((s0 >> 8) & 0xFF)
+			^ iSboxExt3(s3 & 0xFF);
+		uint32_t v3 = iSboxExt0(s3 >> 24)
+			^ iSboxExt1((s2 >> 16) & 0xFF)
+			^ iSboxExt2((s1 >> 8) & 0xFF)
+			^ iSboxExt3(s0 & 0xFF);
+		s0 = v0;
+		s1 = v1;
+		s2 = v2;
+		s3 = v3;
+		s0 ^= skey[u << 2];
+		s1 ^= skey[(u << 2) + 1];
+		s2 ^= skey[(u << 2) + 2];
+		s3 ^= skey[(u << 2) + 3];
+	}
+	t0 = ((uint32_t)iS[s0 >> 24] << 24)
+		| ((uint32_t)iS[(s3 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s2 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s1 & 0xFF];
+	t1 = ((uint32_t)iS[s1 >> 24] << 24)
+		| ((uint32_t)iS[(s0 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s3 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s2 & 0xFF];
+	t2 = ((uint32_t)iS[s2 >> 24] << 24)
+		| ((uint32_t)iS[(s1 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s0 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s3 & 0xFF];
+	t3 = ((uint32_t)iS[s3 >> 24] << 24)
+		| ((uint32_t)iS[(s2 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s1 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s0 & 0xFF];
+	s0 = t0 ^ skey[0];
+	s1 = t1 ^ skey[1];
+	s2 = t2 ^ skey[2];
+	s3 = t3 ^ skey[3];
+	br_enc32be(buf, s0);
+	br_enc32be(buf + 4, s1);
+	br_enc32be(buf + 8, s2);
+	br_enc32be(buf + 12, s3);
+}
--- a/third_party/bearssl/src/aes_big_enc.c
+++ b/third_party/bearssl/src/aes_big_enc.c
@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define S   br_aes_S
+
+static const uint32_t Ssm0[] = {
+	0xC66363A5, 0xF87C7C84, 0xEE777799, 0xF67B7B8D, 0xFFF2F20D, 0xD66B6BBD,
+	0xDE6F6FB1, 0x91C5C554, 0x60303050, 0x02010103, 0xCE6767A9, 0x562B2B7D,
+	0xE7FEFE19, 0xB5D7D762, 0x4DABABE6, 0xEC76769A, 0x8FCACA45, 0x1F82829D,
+	0x89C9C940, 0xFA7D7D87, 0xEFFAFA15, 0xB25959EB, 0x8E4747C9, 0xFBF0F00B,
+	0x41ADADEC, 0xB3D4D467, 0x5FA2A2FD, 0x45AFAFEA, 0x239C9CBF, 0x53A4A4F7,
+	0xE4727296, 0x9BC0C05B, 0x75B7B7C2, 0xE1FDFD1C, 0x3D9393AE, 0x4C26266A,
+	0x6C36365A, 0x7E3F3F41, 0xF5F7F702, 0x83CCCC4F, 0x6834345C, 0x51A5A5F4,
+	0xD1E5E534, 0xF9F1F108, 0xE2717193, 0xABD8D873, 0x62313153, 0x2A15153F,
+	0x0804040C, 0x95C7C752, 0x46232365, 0x9DC3C35E, 0x30181828, 0x379696A1,
+	0x0A05050F, 0x2F9A9AB5, 0x0E070709, 0x24121236, 0x1B80809B, 0xDFE2E23D,
+	0xCDEBEB26, 0x4E272769, 0x7FB2B2CD, 0xEA75759F, 0x1209091B, 0x1D83839E,
+	0x582C2C74, 0x341A1A2E, 0x361B1B2D, 0xDC6E6EB2, 0xB45A5AEE, 0x5BA0A0FB,
+	0xA45252F6, 0x763B3B4D, 0xB7D6D661, 0x7DB3B3CE, 0x5229297B, 0xDDE3E33E,
+	0x5E2F2F71, 0x13848497, 0xA65353F5, 0xB9D1D168, 0x00000000, 0xC1EDED2C,
+	0x40202060, 0xE3FCFC1F, 0x79B1B1C8, 0xB65B5BED, 0xD46A6ABE, 0x8DCBCB46,
+	0x67BEBED9, 0x7239394B, 0x944A4ADE, 0x984C4CD4, 0xB05858E8, 0x85CFCF4A,
+	0xBBD0D06B, 0xC5EFEF2A, 0x4FAAAAE5, 0xEDFBFB16, 0x864343C5, 0x9A4D4DD7,
+	0x66333355, 0x11858594, 0x8A4545CF, 0xE9F9F910, 0x04020206, 0xFE7F7F81,
+	0xA05050F0, 0x783C3C44, 0x259F9FBA, 0x4BA8A8E3, 0xA25151F3, 0x5DA3A3FE,
+	0x804040C0, 0x058F8F8A, 0x3F9292AD, 0x219D9DBC, 0x70383848, 0xF1F5F504,
+	0x63BCBCDF, 0x77B6B6C1, 0xAFDADA75, 0x42212163, 0x20101030, 0xE5FFFF1A,
+	0xFDF3F30E, 0xBFD2D26D, 0x81CDCD4C, 0x180C0C14, 0x26131335, 0xC3ECEC2F,
+	0xBE5F5FE1, 0x359797A2, 0x884444CC, 0x2E171739, 0x93C4C457, 0x55A7A7F2,
+	0xFC7E7E82, 0x7A3D3D47, 0xC86464AC, 0xBA5D5DE7, 0x3219192B, 0xE6737395,
+	0xC06060A0, 0x19818198, 0x9E4F4FD1, 0xA3DCDC7F, 0x44222266, 0x542A2A7E,
+	0x3B9090AB, 0x0B888883, 0x8C4646CA, 0xC7EEEE29, 0x6BB8B8D3, 0x2814143C,
+	0xA7DEDE79, 0xBC5E5EE2, 0x160B0B1D, 0xADDBDB76, 0xDBE0E03B, 0x64323256,
+	0x743A3A4E, 0x140A0A1E, 0x924949DB, 0x0C06060A, 0x4824246C, 0xB85C5CE4,
+	0x9FC2C25D, 0xBDD3D36E, 0x43ACACEF, 0xC46262A6, 0x399191A8, 0x319595A4,
+	0xD3E4E437, 0xF279798B, 0xD5E7E732, 0x8BC8C843, 0x6E373759, 0xDA6D6DB7,
+	0x018D8D8C, 0xB1D5D564, 0x9C4E4ED2, 0x49A9A9E0, 0xD86C6CB4, 0xAC5656FA,
+	0xF3F4F407, 0xCFEAEA25, 0xCA6565AF, 0xF47A7A8E, 0x47AEAEE9, 0x10080818,
+	0x6FBABAD5, 0xF0787888, 0x4A25256F, 0x5C2E2E72, 0x381C1C24, 0x57A6A6F1,
+	0x73B4B4C7, 0x97C6C651, 0xCBE8E823, 0xA1DDDD7C, 0xE874749C, 0x3E1F1F21,
+	0x964B4BDD, 0x61BDBDDC, 0x0D8B8B86, 0x0F8A8A85, 0xE0707090, 0x7C3E3E42,
+	0x71B5B5C4, 0xCC6666AA, 0x904848D8, 0x06030305, 0xF7F6F601, 0x1C0E0E12,
+	0xC26161A3, 0x6A35355F, 0xAE5757F9, 0x69B9B9D0, 0x17868691, 0x99C1C158,
+	0x3A1D1D27, 0x279E9EB9, 0xD9E1E138, 0xEBF8F813, 0x2B9898B3, 0x22111133,
+	0xD26969BB, 0xA9D9D970, 0x078E8E89, 0x339494A7, 0x2D9B9BB6, 0x3C1E1E22,
+	0x15878792, 0xC9E9E920, 0x87CECE49, 0xAA5555FF, 0x50282878, 0xA5DFDF7A,
+	0x038C8C8F, 0x59A1A1F8, 0x09898980, 0x1A0D0D17, 0x65BFBFDA, 0xD7E6E631,
+	0x844242C6, 0xD06868B8, 0x824141C3, 0x299999B0, 0x5A2D2D77, 0x1E0F0F11,
+	0x7BB0B0CB, 0xA85454FC, 0x6DBBBBD6, 0x2C16163A
+};
+
+static inline uint32_t
+rotr(uint32_t x, int n)
+{
+	return (x << (32 - n)) | (x >> n);
+}
+
+#define SboxExt0(x)   (Ssm0[x])
+#define SboxExt1(x)   (rotr(Ssm0[x], 8))
+#define SboxExt2(x)   (rotr(Ssm0[x], 16))
+#define SboxExt3(x)   (rotr(Ssm0[x], 24))
+
+
+/* see bearssl.h */
+void
+br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	uint32_t s0, s1, s2, s3;
+	uint32_t t0, t1, t2, t3;
+	unsigned u;
+
+	buf = data;
+	s0 = br_dec32be(buf);
+	s1 = br_dec32be(buf + 4);
+	s2 = br_dec32be(buf + 8);
+	s3 = br_dec32be(buf + 12);
+	s0 ^= skey[0];
+	s1 ^= skey[1];
+	s2 ^= skey[2];
+	s3 ^= skey[3];
+	for (u = 1; u < num_rounds; u ++) {
+		uint32_t v0, v1, v2, v3;
+
+		v0 = SboxExt0(s0 >> 24)
+			^ SboxExt1((s1 >> 16) & 0xFF)
+			^ SboxExt2((s2 >> 8) & 0xFF)
+			^ SboxExt3(s3 & 0xFF);
+		v1 = SboxExt0(s1 >> 24)
+			^ SboxExt1((s2 >> 16) & 0xFF)
+			^ SboxExt2((s3 >> 8) & 0xFF)
+			^ SboxExt3(s0 & 0xFF);
+		v2 = SboxExt0(s2 >> 24)
+			^ SboxExt1((s3 >> 16) & 0xFF)
+			^ SboxExt2((s0 >> 8) & 0xFF)
+			^ SboxExt3(s1 & 0xFF);
+		v3 = SboxExt0(s3 >> 24)
+			^ SboxExt1((s0 >> 16) & 0xFF)
+			^ SboxExt2((s1 >> 8) & 0xFF)
+			^ SboxExt3(s2 & 0xFF);
+		s0 = v0;
+		s1 = v1;
+		s2 = v2;
+		s3 = v3;
+		s0 ^= skey[u << 2];
+		s1 ^= skey[(u << 2) + 1];
+		s2 ^= skey[(u << 2) + 2];
+		s3 ^= skey[(u << 2) + 3];
+	}
+	t0 = ((uint32_t)S[s0 >> 24] << 24)
+		| ((uint32_t)S[(s1 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s2 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s3 & 0xFF];
+	t1 = ((uint32_t)S[s1 >> 24] << 24)
+		| ((uint32_t)S[(s2 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s3 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s0 & 0xFF];
+	t2 = ((uint32_t)S[s2 >> 24] << 24)
+		| ((uint32_t)S[(s3 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s0 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s1 & 0xFF];
+	t3 = ((uint32_t)S[s3 >> 24] << 24)
+		| ((uint32_t)S[(s0 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s1 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s2 & 0xFF];
+	s0 = t0 ^ skey[num_rounds << 2];
+	s1 = t1 ^ skey[(num_rounds << 2) + 1];
+	s2 = t2 ^ skey[(num_rounds << 2) + 2];
+	s3 = t3 ^ skey[(num_rounds << 2) + 3];
+	br_enc32be(buf, s0);
+	br_enc32be(buf + 4, s1);
+	br_enc32be(buf + 8, s2);
+	br_enc32be(buf + 12, s3);
+}
--- a/third_party/bearssl/src/aes_common.c
+++ b/third_party/bearssl/src/aes_common.c
@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const uint32_t Rcon[] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+	0x40000000, 0x80000000, 0x1B000000, 0x36000000
+};
+
+#define S   br_aes_S
+
+/* see inner.h */
+const unsigned char br_aes_S[] = {
+	0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
+	0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+	0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26,
+	0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+	0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2,
+	0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+	0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED,
+	0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+	0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F,
+	0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+	0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC,
+	0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+	0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14,
+	0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+	0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D,
+	0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+	0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F,
+	0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+	0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11,
+	0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+	0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F,
+	0xB0, 0x54, 0xBB, 0x16
+};
+
+static uint32_t
+SubWord(uint32_t x)
+{
+	return ((uint32_t)S[x >> 24] << 24)
+		| ((uint32_t)S[(x >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(x >> 8) & 0xFF] << 8)
+		| (uint32_t)S[x & 0xFF];
+}
+
+/* see inner.h */
+unsigned
+br_aes_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	for (i = 0; i < nk; i ++) {
+		skey[i] = br_dec32be((const unsigned char *)key + (i << 2));
+	}
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		uint32_t tmp;
+
+		tmp = skey[i - 1];
+		if (j == 0) {
+			tmp = (tmp << 8) | (tmp >> 24);
+			tmp = SubWord(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = SubWord(tmp);
+		}
+		skey[i] = skey[i - nk] ^ tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+	return num_rounds;
+}
--- a/third_party/bearssl/src/aes_ct.c
+++ b/third_party/bearssl/src/aes_ct.c
@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_Sbox(uint32_t *q)
+{
+	/*
+	 * This S-box implementation is a straightforward translation of
+	 * the circuit described by Boyar and Peralta in "A new
+	 * combinational logic minimization technique with applications
+	 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+	 *
+	 * Note that variables x* (input) and s* (output) are numbered
+	 * in "reverse" order (x0 is the high bit, x7 is the low bit).
+	 */
+
+	uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
+	uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint32_t y20, y21;
+	uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
+	uint32_t z10, z11, z12, z13, z14, z15, z16, z17;
+	uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+	uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
+	uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
+	uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
+	uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
+	uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
+	uint32_t t60, t61, t62, t63, t64, t65, t66, t67;
+	uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+	x0 = q[7];
+	x1 = q[6];
+	x2 = q[5];
+	x3 = q[4];
+	x4 = q[3];
+	x5 = q[2];
+	x6 = q[1];
+	x7 = q[0];
+
+	/*
+	 * Top linear transformation.
+	 */
+	y14 = x3 ^ x5;
+	y13 = x0 ^ x6;
+	y9 = x0 ^ x3;
+	y8 = x0 ^ x5;
+	t0 = x1 ^ x2;
+	y1 = t0 ^ x7;
+	y4 = y1 ^ x3;
+	y12 = y13 ^ y14;
+	y2 = y1 ^ x0;
+	y5 = y1 ^ x6;
+	y3 = y5 ^ y8;
+	t1 = x4 ^ y12;
+	y15 = t1 ^ x5;
+	y20 = t1 ^ x1;
+	y6 = y15 ^ x7;
+	y10 = y15 ^ t0;
+	y11 = y20 ^ y9;
+	y7 = x7 ^ y11;
+	y17 = y10 ^ y11;
+	y19 = y10 ^ y8;
+	y16 = t0 ^ y11;
+	y21 = y13 ^ y16;
+	y18 = x0 ^ y16;
+
+	/*
+	 * Non-linear section.
+	 */
+	t2 = y12 & y15;
+	t3 = y3 & y6;
+	t4 = t3 ^ t2;
+	t5 = y4 & x7;
+	t6 = t5 ^ t2;
+	t7 = y13 & y16;
+	t8 = y5 & y1;
+	t9 = t8 ^ t7;
+	t10 = y2 & y7;
+	t11 = t10 ^ t7;
+	t12 = y9 & y11;
+	t13 = y14 & y17;
+	t14 = t13 ^ t12;
+	t15 = y8 & y10;
+	t16 = t15 ^ t12;
+	t17 = t4 ^ t14;
+	t18 = t6 ^ t16;
+	t19 = t9 ^ t14;
+	t20 = t11 ^ t16;
+	t21 = t17 ^ y20;
+	t22 = t18 ^ y19;
+	t23 = t19 ^ y21;
+	t24 = t20 ^ y18;
+
+	t25 = t21 ^ t22;
+	t26 = t21 & t23;
+	t27 = t24 ^ t26;
+	t28 = t25 & t27;
+	t29 = t28 ^ t22;
+	t30 = t23 ^ t24;
+	t31 = t22 ^ t26;
+	t32 = t31 & t30;
+	t33 = t32 ^ t24;
+	t34 = t23 ^ t33;
+	t35 = t27 ^ t33;
+	t36 = t24 & t35;
+	t37 = t36 ^ t34;
+	t38 = t27 ^ t36;
+	t39 = t29 & t38;
+	t40 = t25 ^ t39;
+
+	t41 = t40 ^ t37;
+	t42 = t29 ^ t33;
+	t43 = t29 ^ t40;
+	t44 = t33 ^ t37;
+	t45 = t42 ^ t41;
+	z0 = t44 & y15;
+	z1 = t37 & y6;
+	z2 = t33 & x7;
+	z3 = t43 & y16;
+	z4 = t40 & y1;
+	z5 = t29 & y7;
+	z6 = t42 & y11;
+	z7 = t45 & y17;
+	z8 = t41 & y10;
+	z9 = t44 & y12;
+	z10 = t37 & y3;
+	z11 = t33 & y4;
+	z12 = t43 & y13;
+	z13 = t40 & y5;
+	z14 = t29 & y2;
+	z15 = t42 & y9;
+	z16 = t45 & y14;
+	z17 = t41 & y8;
+
+	/*
+	 * Bottom linear transformation.
+	 */
+	t46 = z15 ^ z16;
+	t47 = z10 ^ z11;
+	t48 = z5 ^ z13;
+	t49 = z9 ^ z10;
+	t50 = z2 ^ z12;
+	t51 = z2 ^ z5;
+	t52 = z7 ^ z8;
+	t53 = z0 ^ z3;
+	t54 = z6 ^ z7;
+	t55 = z16 ^ z17;
+	t56 = z12 ^ t48;
+	t57 = t50 ^ t53;
+	t58 = z4 ^ t46;
+	t59 = z3 ^ t54;
+	t60 = t46 ^ t57;
+	t61 = z14 ^ t57;
+	t62 = t52 ^ t58;
+	t63 = t49 ^ t58;
+	t64 = z4 ^ t59;
+	t65 = t61 ^ t62;
+	t66 = z1 ^ t63;
+	s0 = t59 ^ t63;
+	s6 = t56 ^ ~t62;
+	s7 = t48 ^ ~t60;
+	t67 = t64 ^ t65;
+	s3 = t53 ^ t66;
+	s4 = t51 ^ t66;
+	s5 = t47 ^ t65;
+	s1 = t64 ^ ~s3;
+	s2 = t55 ^ ~t67;
+
+	q[7] = s0;
+	q[6] = s1;
+	q[5] = s2;
+	q[4] = s3;
+	q[3] = s4;
+	q[2] = s5;
+	q[1] = s6;
+	q[0] = s7;
+}
+
+/* see inner.h */
+void
+br_aes_ct_ortho(uint32_t *q)
+{
+#define SWAPN(cl, ch, s, x, y)   do { \
+		uint32_t a, b; \
+		a = (x); \
+		b = (y); \
+		(x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \
+		(y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \
+	} while (0)
+
+#define SWAP2(x, y)   SWAPN(0x55555555, 0xAAAAAAAA, 1, x, y)
+#define SWAP4(x, y)   SWAPN(0x33333333, 0xCCCCCCCC, 2, x, y)
+#define SWAP8(x, y)   SWAPN(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)
+
+	SWAP2(q[0], q[1]);
+	SWAP2(q[2], q[3]);
+	SWAP2(q[4], q[5]);
+	SWAP2(q[6], q[7]);
+
+	SWAP4(q[0], q[2]);
+	SWAP4(q[1], q[3]);
+	SWAP4(q[4], q[6]);
+	SWAP4(q[5], q[7]);
+
+	SWAP8(q[0], q[4]);
+	SWAP8(q[1], q[5]);
+	SWAP8(q[2], q[6]);
+	SWAP8(q[3], q[7]);
+}
+
+static const unsigned char Rcon[] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+};
+
+static uint32_t
+sub_word(uint32_t x)
+{
+	uint32_t q[8];
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] = x;
+	}
+	br_aes_ct_ortho(q);
+	br_aes_ct_bitslice_Sbox(q);
+	br_aes_ct_ortho(q);
+	return q[0];
+}
+
+/* see inner.h */
+unsigned
+br_aes_ct_keysched(uint32_t *comp_skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+	uint32_t tmp;
+	uint32_t skey[120];
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	tmp = 0;
+	for (i = 0; i < nk; i ++) {
+		tmp = br_dec32le((const unsigned char *)key + (i << 2));
+		skey[(i << 1) + 0] = tmp;
+		skey[(i << 1) + 1] = tmp;
+	}
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		if (j == 0) {
+			tmp = (tmp << 24) | (tmp >> 8);
+			tmp = sub_word(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = sub_word(tmp);
+		}
+		tmp ^= skey[(i - nk) << 1];
+		skey[(i << 1) + 0] = tmp;
+		skey[(i << 1) + 1] = tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+	for (i = 0; i < nkf; i += 4) {
+		br_aes_ct_ortho(skey + (i << 1));
+	}
+	for (i = 0, j = 0; i < nkf; i ++, j += 2) {
+		comp_skey[i] = (skey[j + 0] & 0x55555555)
+			| (skey[j + 1] & 0xAAAAAAAA);
+	}
+	return num_rounds;
+}
+
+/* see inner.h */
+void
+br_aes_ct_skey_expand(uint32_t *skey,
+	unsigned num_rounds, const uint32_t *comp_skey)
+{
+	unsigned u, v, n;
+
+	n = (num_rounds + 1) << 2;
+	for (u = 0, v = 0; u < n; u ++, v += 2) {
+		uint32_t x, y;
+
+		x = y = comp_skey[u];
+		x &= 0x55555555;
+		skey[v + 0] = x | (x << 1);
+		y &= 0xAAAAAAAA;
+		skey[v + 1] = y | (y >> 1);
+	}
+}
--- a/third_party/bearssl/src/aes_ct64.c
+++ b/third_party/bearssl/src/aes_ct64.c
@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_Sbox(uint64_t *q)
+{
+	/*
+	 * This S-box implementation is a straightforward translation of
+	 * the circuit described by Boyar and Peralta in "A new
+	 * combinational logic minimization technique with applications
+	 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+	 *
+	 * Note that variables x* (input) and s* (output) are numbered
+	 * in "reverse" order (x0 is the high bit, x7 is the low bit).
+	 */
+
+	uint64_t x0, x1, x2, x3, x4, x5, x6, x7;
+	uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint64_t y20, y21;
+	uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
+	uint64_t z10, z11, z12, z13, z14, z15, z16, z17;
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+	uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
+	uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
+	uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
+	uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
+	uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
+	uint64_t t60, t61, t62, t63, t64, t65, t66, t67;
+	uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+	x0 = q[7];
+	x1 = q[6];
+	x2 = q[5];
+	x3 = q[4];
+	x4 = q[3];
+	x5 = q[2];
+	x6 = q[1];
+	x7 = q[0];
+
+	/*
+	 * Top linear transformation.
+	 */
+	y14 = x3 ^ x5;
+	y13 = x0 ^ x6;
+	y9 = x0 ^ x3;
+	y8 = x0 ^ x5;
+	t0 = x1 ^ x2;
+	y1 = t0 ^ x7;
+	y4 = y1 ^ x3;
+	y12 = y13 ^ y14;
+	y2 = y1 ^ x0;
+	y5 = y1 ^ x6;
+	y3 = y5 ^ y8;
+	t1 = x4 ^ y12;
+	y15 = t1 ^ x5;
+	y20 = t1 ^ x1;
+	y6 = y15 ^ x7;
+	y10 = y15 ^ t0;
+	y11 = y20 ^ y9;
+	y7 = x7 ^ y11;
+	y17 = y10 ^ y11;
+	y19 = y10 ^ y8;
+	y16 = t0 ^ y11;
+	y21 = y13 ^ y16;
+	y18 = x0 ^ y16;
+
+	/*
+	 * Non-linear section.
+	 */
+	t2 = y12 & y15;
+	t3 = y3 & y6;
+	t4 = t3 ^ t2;
+	t5 = y4 & x7;
+	t6 = t5 ^ t2;
+	t7 = y13 & y16;
+	t8 = y5 & y1;
+	t9 = t8 ^ t7;
+	t10 = y2 & y7;
+	t11 = t10 ^ t7;
+	t12 = y9 & y11;
+	t13 = y14 & y17;
+	t14 = t13 ^ t12;
+	t15 = y8 & y10;
+	t16 = t15 ^ t12;
+	t17 = t4 ^ t14;
+	t18 = t6 ^ t16;
+	t19 = t9 ^ t14;
+	t20 = t11 ^ t16;
+	t21 = t17 ^ y20;
+	t22 = t18 ^ y19;
+	t23 = t19 ^ y21;
+	t24 = t20 ^ y18;
+
+	t25 = t21 ^ t22;
+	t26 = t21 & t23;
+	t27 = t24 ^ t26;
+	t28 = t25 & t27;
+	t29 = t28 ^ t22;
+	t30 = t23 ^ t24;
+	t31 = t22 ^ t26;
+	t32 = t31 & t30;
+	t33 = t32 ^ t24;
+	t34 = t23 ^ t33;
+	t35 = t27 ^ t33;
+	t36 = t24 & t35;
+	t37 = t36 ^ t34;
+	t38 = t27 ^ t36;
+	t39 = t29 & t38;
+	t40 = t25 ^ t39;
+
+	t41 = t40 ^ t37;
+	t42 = t29 ^ t33;
+	t43 = t29 ^ t40;
+	t44 = t33 ^ t37;
+	t45 = t42 ^ t41;
+	z0 = t44 & y15;
+	z1 = t37 & y6;
+	z2 = t33 & x7;
+	z3 = t43 & y16;
+	z4 = t40 & y1;
+	z5 = t29 & y7;
+	z6 = t42 & y11;
+	z7 = t45 & y17;
+	z8 = t41 & y10;
+	z9 = t44 & y12;
+	z10 = t37 & y3;
+	z11 = t33 & y4;
+	z12 = t43 & y13;
+	z13 = t40 & y5;
+	z14 = t29 & y2;
+	z15 = t42 & y9;
+	z16 = t45 & y14;
+	z17 = t41 & y8;
+
+	/*
+	 * Bottom linear transformation.
+	 */
+	t46 = z15 ^ z16;
+	t47 = z10 ^ z11;
+	t48 = z5 ^ z13;
+	t49 = z9 ^ z10;
+	t50 = z2 ^ z12;
+	t51 = z2 ^ z5;
+	t52 = z7 ^ z8;
+	t53 = z0 ^ z3;
+	t54 = z6 ^ z7;
+	t55 = z16 ^ z17;
+	t56 = z12 ^ t48;
+	t57 = t50 ^ t53;
+	t58 = z4 ^ t46;
+	t59 = z3 ^ t54;
+	t60 = t46 ^ t57;
+	t61 = z14 ^ t57;
+	t62 = t52 ^ t58;
+	t63 = t49 ^ t58;
+	t64 = z4 ^ t59;
+	t65 = t61 ^ t62;
+	t66 = z1 ^ t63;
+	s0 = t59 ^ t63;
+	s6 = t56 ^ ~t62;
+	s7 = t48 ^ ~t60;
+	t67 = t64 ^ t65;
+	s3 = t53 ^ t66;
+	s4 = t51 ^ t66;
+	s5 = t47 ^ t65;
+	s1 = t64 ^ ~s3;
+	s2 = t55 ^ ~t67;
+
+	q[7] = s0;
+	q[6] = s1;
+	q[5] = s2;
+	q[4] = s3;
+	q[3] = s4;
+	q[2] = s5;
+	q[1] = s6;
+	q[0] = s7;
+}
+
+/* see inner.h */
+void
+br_aes_ct64_ortho(uint64_t *q)
+{
+#define SWAPN(cl, ch, s, x, y)   do { \
+		uint64_t a, b; \
+		a = (x); \
+		b = (y); \
+		(x) = (a & (uint64_t)cl) | ((b & (uint64_t)cl) << (s)); \
+		(y) = ((a & (uint64_t)ch) >> (s)) | (b & (uint64_t)ch); \
+	} while (0)
+
+#define SWAP2(x, y)    SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA,  1, x, y)
+#define SWAP4(x, y)    SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC,  2, x, y)
+#define SWAP8(x, y)    SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0,  4, x, y)
+
+	SWAP2(q[0], q[1]);
+	SWAP2(q[2], q[3]);
+	SWAP2(q[4], q[5]);
+	SWAP2(q[6], q[7]);
+
+	SWAP4(q[0], q[2]);
+	SWAP4(q[1], q[3]);
+	SWAP4(q[4], q[6]);
+	SWAP4(q[5], q[7]);
+
+	SWAP8(q[0], q[4]);
+	SWAP8(q[1], q[5]);
+	SWAP8(q[2], q[6]);
+	SWAP8(q[3], q[7]);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w)
+{
+	uint64_t x0, x1, x2, x3;
+
+	x0 = w[0];
+	x1 = w[1];
+	x2 = w[2];
+	x3 = w[3];
+	x0 |= (x0 << 16);
+	x1 |= (x1 << 16);
+	x2 |= (x2 << 16);
+	x3 |= (x3 << 16);
+	x0 &= (uint64_t)0x0000FFFF0000FFFF;
+	x1 &= (uint64_t)0x0000FFFF0000FFFF;
+	x2 &= (uint64_t)0x0000FFFF0000FFFF;
+	x3 &= (uint64_t)0x0000FFFF0000FFFF;
+	x0 |= (x0 << 8);
+	x1 |= (x1 << 8);
+	x2 |= (x2 << 8);
+	x3 |= (x3 << 8);
+	x0 &= (uint64_t)0x00FF00FF00FF00FF;
+	x1 &= (uint64_t)0x00FF00FF00FF00FF;
+	x2 &= (uint64_t)0x00FF00FF00FF00FF;
+	x3 &= (uint64_t)0x00FF00FF00FF00FF;
+	*q0 = x0 | (x2 << 8);
+	*q1 = x1 | (x3 << 8);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1)
+{
+	uint64_t x0, x1, x2, x3;
+
+	x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;
+	x1 = q1 & (uint64_t)0x00FF00FF00FF00FF;
+	x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
+	x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
+	x0 |= (x0 >> 8);
+	x1 |= (x1 >> 8);
+	x2 |= (x2 >> 8);
+	x3 |= (x3 >> 8);
+	x0 &= (uint64_t)0x0000FFFF0000FFFF;
+	x1 &= (uint64_t)0x0000FFFF0000FFFF;
+	x2 &= (uint64_t)0x0000FFFF0000FFFF;
+	x3 &= (uint64_t)0x0000FFFF0000FFFF;
+	w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16);
+	w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16);
+	w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16);
+	w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);
+}
+
+static const unsigned char Rcon[] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+};
+
+static uint32_t
+sub_word(uint32_t x)
+{
+	uint64_t q[8];
+
+	memset(q, 0, sizeof q);
+	q[0] = x;
+	br_aes_ct64_ortho(q);
+	br_aes_ct64_bitslice_Sbox(q);
+	br_aes_ct64_ortho(q);
+	return (uint32_t)q[0];
+}
+
+/* see inner.h */
+unsigned
+br_aes_ct64_keysched(uint64_t *comp_skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+	uint32_t tmp;
+	uint32_t skey[60];
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	br_range_dec32le(skey, (key_len >> 2), key);
+	tmp = skey[(key_len >> 2) - 1];
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		if (j == 0) {
+			tmp = (tmp << 24) | (tmp >> 8);
+			tmp = sub_word(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = sub_word(tmp);
+		}
+		tmp ^= skey[i - nk];
+		skey[i] = tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+
+	for (i = 0, j = 0; i < nkf; i += 4, j += 2) {
+		uint64_t q[8];
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], skey + i);
+		q[1] = q[0];
+		q[2] = q[0];
+		q[3] = q[0];
+		q[5] = q[4];
+		q[6] = q[4];
+		q[7] = q[4];
+		br_aes_ct64_ortho(q);
+		comp_skey[j + 0] =
+			  (q[0] & (uint64_t)0x1111111111111111)
+			| (q[1] & (uint64_t)0x2222222222222222)
+			| (q[2] & (uint64_t)0x4444444444444444)
+			| (q[3] & (uint64_t)0x8888888888888888);
+		comp_skey[j + 1] =
+			  (q[4] & (uint64_t)0x1111111111111111)
+			| (q[5] & (uint64_t)0x2222222222222222)
+			| (q[6] & (uint64_t)0x4444444444444444)
+			| (q[7] & (uint64_t)0x8888888888888888);
+	}
+	return num_rounds;
+}
+
+/* see inner.h */
+void
+br_aes_ct64_skey_expand(uint64_t *skey,
+	unsigned num_rounds, const uint64_t *comp_skey)
+{
+	unsigned u, v, n;
+
+	n = (num_rounds + 1) << 1;
+	for (u = 0, v = 0; u < n; u ++, v += 4) {
+		uint64_t x0, x1, x2, x3;
+
+		x0 = x1 = x2 = x3 = comp_skey[u];
+		x0 &= (uint64_t)0x1111111111111111;
+		x1 &= (uint64_t)0x2222222222222222;
+		x2 &= (uint64_t)0x4444444444444444;
+		x3 &= (uint64_t)0x8888888888888888;
+		x1 >>= 1;
+		x2 >>= 2;
+		x3 >>= 3;
+		skey[v + 0] = (x0 << 4) - x0;
+		skey[v + 1] = (x1 << 4) - x1;
+		skey[v + 2] = (x2 << 4) - x2;
+		skey[v + 3] = (x3 << 4) - x3;
+	}
+}
--- a/third_party/bearssl/src/aes_ct64_cbcdec.c
+++ b/third_party/bearssl/src/aes_ct64_cbcdec.c
@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_cbcdec_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint64_t sk_exp[120];
+	uint32_t ivw[4];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 4, iv);
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w1[16], w2[16];
+		int i;
+
+		if (len >= 64) {
+			br_range_dec32le(w1, 16, buf);
+		} else {
+			br_range_dec32le(w1, len >> 2, buf);
+		}
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w1 + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w2 + (i << 2), q[i], q[i + 4]);
+		}
+		for (i = 0; i < 4; i ++) {
+			w2[i] ^= ivw[i];
+		}
+		if (len >= 64) {
+			for (i = 4; i < 16; i ++) {
+				w2[i] ^= w1[i - 4];
+			}
+			memcpy(ivw, w1 + 12, sizeof ivw);
+			br_range_enc32le(buf, w2, 16);
+		} else {
+			int j;
+
+			j = (int)(len >> 2);
+			for (i = 4; i < j; i ++) {
+				w2[i] ^= w1[i - 4];
+			}
+			memcpy(ivw, w1 + j - 4, sizeof ivw);
+			br_range_enc32le(buf, w2, j);
+			break;
+		}
+		buf += 64;
+		len -= 64;
+	}
+	br_range_enc32le(iv, ivw, 4);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable = {
+	sizeof(br_aes_ct64_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_ct64_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_ct64_cbcdec_run
+};
--- a/third_party/bearssl/src/aes_ct64_cbcenc.c
+++ b/third_party/bearssl/src/aes_ct64_cbcenc.c
@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcenc_init(br_aes_ct64_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_cbcenc_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcenc_run(const br_aes_ct64_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint64_t sk_exp[120];
+	uint32_t ivw[4];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 4, iv);
+	buf = data;
+	while (len > 0) {
+		uint32_t w[4];
+		uint64_t q[8];
+
+		w[0] = ivw[0] ^ br_dec32le(buf);
+		w[1] = ivw[1] ^ br_dec32le(buf + 4);
+		w[2] = ivw[2] ^ br_dec32le(buf + 8);
+		w[3] = ivw[3] ^ br_dec32le(buf + 12);
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		memcpy(ivw, w, sizeof w);
+		br_enc32le(buf, w[0]);
+		br_enc32le(buf + 4, w[1]);
+		br_enc32le(buf + 8, w[2]);
+		br_enc32le(buf + 12, w[3]);
+		buf += 16;
+		len -= 16;
+	}
+	br_range_enc32le(iv, ivw, 4);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_ct64_cbcenc_vtable = {
+	sizeof(br_aes_ct64_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_ct64_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_ct64_cbcenc_run
+};
--- a/third_party/bearssl/src/aes_ct64_ctr.c
+++ b/third_party/bearssl/src/aes_ct64_ctr.c
@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_ctr_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t ivw[16];
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 3, iv);
+	memcpy(ivw + 4, ivw, 3 * sizeof(uint32_t));
+	memcpy(ivw + 8, ivw, 3 * sizeof(uint32_t));
+	memcpy(ivw + 12, ivw, 3 * sizeof(uint32_t));
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w[16];
+		unsigned char tmp[64];
+		int i;
+
+		/*
+		 * TODO: see if we can save on the first br_aes_ct64_ortho()
+		 * call, since iv0/iv1/iv2 are constant for the whole run.
+		 */
+		memcpy(w, ivw, sizeof ivw);
+		w[3] = br_swap32(cc);
+		w[7] = br_swap32(cc + 1);
+		w[11] = br_swap32(cc + 2);
+		w[15] = br_swap32(cc + 3);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w + (i << 2), q[i], q[i + 4]);
+		}
+		br_range_enc32le(tmp, w, 16);
+		if (len <= 64) {
+			xorbuf(buf, tmp, len);
+			cc += (uint32_t)len >> 4;
+			break;
+		}
+		xorbuf(buf, tmp, 64);
+		buf += 64;
+		len -= 64;
+		cc += 4;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_ct64_ctr_vtable = {
+	sizeof(br_aes_ct64_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_ct64_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_ct64_ctr_run
+};
--- a/third_party/bearssl/src/aes_ct64_ctrcbc.c
+++ b/third_party/bearssl/src/aes_ct64_ctrcbc.c
@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w[16];
+		unsigned char tmp[64];
+		int i, j;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		j = (len >= 64) ? 16 : (int)(len >> 2);
+		for (i = 0; i < j; i += 4) {
+			uint32_t carry;
+
+			w[i + 0] = br_swap32(iv0);
+			w[i + 1] = br_swap32(iv1);
+			w[i + 2] = br_swap32(iv2);
+			w[i + 3] = br_swap32(iv3);
+			iv3 ++;
+			carry = ~(iv3 | -iv3) >> 31;
+			iv2 += carry;
+			carry &= -(~(iv2 | -iv2) >> 31);
+			iv1 += carry;
+			carry &= -(~(iv1 | -iv1) >> 31);
+			iv0 += carry;
+		}
+		memset(w + i, 0, (16 - i) * sizeof(uint32_t));
+
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w + (i << 2), q[i], q[i + 4]);
+		}
+
+		br_range_enc32le(tmp, w, 16);
+		if (len <= 64) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 64);
+		buf += 64;
+		len -= 64;
+	}
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t q[8];
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[4];
+
+		w[0] = cm0 ^ br_dec32le(buf +  0);
+		w[1] = cm1 ^ br_dec32le(buf +  4);
+		w[2] = cm2 ^ br_dec32le(buf +  8);
+		w[3] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+
+		cm0 = w[0];
+		cm1 = w[1];
+		cm2 = w[2];
+		cm3 = w[3];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	/*
+	 * When encrypting, the CBC-MAC processing must be lagging by
+	 * one block, since it operates on the encrypted values, so
+	 * it must wait for that encryption to complete.
+	 */
+
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t sk_exp[120];
+	uint64_t q[8];
+	int first_iter;
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	first_iter = 1;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[8], carry;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		w[0] = br_swap32(iv0);
+		w[1] = br_swap32(iv1);
+		w[2] = br_swap32(iv2);
+		w[3] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The block for CBC-MAC.
+		 */
+		w[4] = cm0;
+		w[5] = cm1;
+		w[6] = cm2;
+		w[7] = cm3;
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+		/*
+		 * We do the XOR with the plaintext in 32-bit registers,
+		 * so that the value are available for CBC-MAC processing
+		 * as well.
+		 */
+		w[0] ^= br_dec32le(buf +  0);
+		w[1] ^= br_dec32le(buf +  4);
+		w[2] ^= br_dec32le(buf +  8);
+		w[3] ^= br_dec32le(buf + 12);
+		br_enc32le(buf +  0, w[0]);
+		br_enc32le(buf +  4, w[1]);
+		br_enc32le(buf +  8, w[2]);
+		br_enc32le(buf + 12, w[3]);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * We set the cm* values to the block to encrypt in the
+		 * next iteration.
+		 */
+		if (first_iter) {
+			first_iter = 0;
+			cm0 ^= w[0];
+			cm1 ^= w[1];
+			cm2 ^= w[2];
+			cm3 ^= w[3];
+		} else {
+			cm0 = w[0] ^ w[4];
+			cm1 = w[1] ^ w[5];
+			cm2 = w[2] ^ w[6];
+			cm3 = w[3] ^ w[7];
+		}
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			w[0] = cm0;
+			w[1] = cm1;
+			w[2] = cm2;
+			w[3] = cm3;
+			br_aes_ct64_interleave_in(&q[0], &q[4], w);
+			br_aes_ct64_ortho(q);
+			br_aes_ct64_bitslice_encrypt(
+				ctx->num_rounds, sk_exp, q);
+			br_aes_ct64_ortho(q);
+			br_aes_ct64_interleave_out(w, q[0], q[4]);
+			cm0 = w[0];
+			cm1 = w[1];
+			cm2 = w[2];
+			cm3 = w[3];
+			break;
+		}
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t sk_exp[120];
+	uint64_t q[8];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[8], carry;
+		unsigned char tmp[16];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		w[0] = br_swap32(iv0);
+		w[1] = br_swap32(iv1);
+		w[2] = br_swap32(iv2);
+		w[3] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The block for CBC-MAC.
+		 */
+		w[4] = cm0 ^ br_dec32le(buf +  0);
+		w[5] = cm1 ^ br_dec32le(buf +  4);
+		w[6] = cm2 ^ br_dec32le(buf +  8);
+		w[7] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+		br_enc32le(tmp +  0, w[0]);
+		br_enc32le(tmp +  4, w[1]);
+		br_enc32le(tmp +  8, w[2]);
+		br_enc32le(tmp + 12, w[3]);
+		xorbuf(buf, tmp, 16);
+		cm0 = w[4];
+		cm1 = w[5];
+		cm2 = w[6];
+		cm3 = w[7];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = {
+	sizeof(br_aes_ct64_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_ct64_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_ct64_ctrcbc_mac
+};
--- a/third_party/bearssl/src/aes_ct64_dec.c
+++ b/third_party/bearssl/src/aes_ct64_dec.c
@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_invSbox(uint64_t *q)
+{
+	/*
+	 * See br_aes_ct_bitslice_invSbox(). This is the natural extension
+	 * to 64-bit registers.
+	 */
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+
+	br_aes_ct64_bitslice_Sbox(q);
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+}
+
+static void
+add_round_key(uint64_t *q, const uint64_t *sk)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] ^= sk[i];
+	}
+}
+
+static void
+inv_shift_rows(uint64_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint64_t x;
+
+		x = q[i];
+		q[i] = (x & (uint64_t)0x000000000000FFFF)
+			| ((x & (uint64_t)0x000000000FFF0000) << 4)
+			| ((x & (uint64_t)0x00000000F0000000) >> 12)
+			| ((x & (uint64_t)0x000000FF00000000) << 8)
+			| ((x & (uint64_t)0x0000FF0000000000) >> 8)
+			| ((x & (uint64_t)0x000F000000000000) << 12)
+			| ((x & (uint64_t)0xFFF0000000000000) >> 4);
+	}
+}
+
+static inline uint64_t
+rotr32(uint64_t x)
+{
+	return (x << 32) | (x >> 32);
+}
+
+static void
+inv_mix_columns(uint64_t *q)
+{
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 16) | (q0 << 48);
+	r1 = (q1 >> 16) | (q1 << 48);
+	r2 = (q2 >> 16) | (q2 << 48);
+	r3 = (q3 >> 16) | (q3 << 48);
+	r4 = (q4 >> 16) | (q4 << 48);
+	r5 = (q5 >> 16) | (q5 << 48);
+	r6 = (q6 >> 16) | (q6 << 48);
+	r7 = (q7 >> 16) | (q7 << 48);
+
+	q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr32(q0 ^ q5 ^ q6 ^ r0 ^ r5);
+	q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
+	q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr32(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
+	q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr32(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
+	q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
+	q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
+	q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr32(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
+	q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr32(q4 ^ q5 ^ q7 ^ r4 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_decrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey + (num_rounds << 3));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(q);
+		br_aes_ct64_bitslice_invSbox(q);
+		add_round_key(q, skey + (u << 3));
+		inv_mix_columns(q);
+	}
+	inv_shift_rows(q);
+	br_aes_ct64_bitslice_invSbox(q);
+	add_round_key(q, skey);
+}
--- a/third_party/bearssl/src/aes_ct64_enc.c
+++ b/third_party/bearssl/src/aes_ct64_enc.c
@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline void
+add_round_key(uint64_t *q, const uint64_t *sk)
+{
+	q[0] ^= sk[0];
+	q[1] ^= sk[1];
+	q[2] ^= sk[2];
+	q[3] ^= sk[3];
+	q[4] ^= sk[4];
+	q[5] ^= sk[5];
+	q[6] ^= sk[6];
+	q[7] ^= sk[7];
+}
+
+static inline void
+shift_rows(uint64_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint64_t x;
+
+		x = q[i];
+		q[i] = (x & (uint64_t)0x000000000000FFFF)
+			| ((x & (uint64_t)0x00000000FFF00000) >> 4)
+			| ((x & (uint64_t)0x00000000000F0000) << 12)
+			| ((x & (uint64_t)0x0000FF0000000000) >> 8)
+			| ((x & (uint64_t)0x000000FF00000000) << 8)
+			| ((x & (uint64_t)0xF000000000000000) >> 12)
+			| ((x & (uint64_t)0x0FFF000000000000) << 4);
+	}
+}
+
+static inline uint64_t
+rotr32(uint64_t x)
+{
+	return (x << 32) | (x >> 32);
+}
+
+static inline void
+mix_columns(uint64_t *q)
+{
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 16) | (q0 << 48);
+	r1 = (q1 >> 16) | (q1 << 48);
+	r2 = (q2 >> 16) | (q2 << 48);
+	r3 = (q3 >> 16) | (q3 << 48);
+	r4 = (q4 >> 16) | (q4 << 48);
+	r5 = (q5 >> 16) | (q5 << 48);
+	r6 = (q6 >> 16) | (q6 << 48);
+	r7 = (q7 >> 16) | (q7 << 48);
+
+	q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0);
+	q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1);
+	q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2);
+	q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3);
+	q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4);
+	q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5);
+	q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6);
+	q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_encrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		br_aes_ct64_bitslice_Sbox(q);
+		shift_rows(q);
+		mix_columns(q);
+		add_round_key(q, skey + (u << 3));
+	}
+	br_aes_ct64_bitslice_Sbox(q);
+	shift_rows(q);
+	add_round_key(q, skey + (num_rounds << 3));
+}
--- a/third_party/bearssl/src/aes_ct_cbcdec.c
+++ b/third_party/bearssl/src/aes_ct_cbcdec.c
@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_cbcdec_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	iv3 = br_dec32le(ivbuf + 12);
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], sq[8];
+
+		q[0] = br_dec32le(buf);
+		q[2] = br_dec32le(buf + 4);
+		q[4] = br_dec32le(buf + 8);
+		q[6] = br_dec32le(buf + 12);
+		if (len >= 32) {
+			q[1] = br_dec32le(buf + 16);
+			q[3] = br_dec32le(buf + 20);
+			q[5] = br_dec32le(buf + 24);
+			q[7] = br_dec32le(buf + 28);
+		} else {
+			q[1] = 0;
+			q[3] = 0;
+			q[5] = 0;
+			q[7] = 0;
+		}
+		memcpy(sq, q, sizeof q);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		br_enc32le(buf, q[0] ^ iv0);
+		br_enc32le(buf + 4, q[2] ^ iv1);
+		br_enc32le(buf + 8, q[4] ^ iv2);
+		br_enc32le(buf + 12, q[6] ^ iv3);
+		if (len < 32) {
+			iv0 = sq[0];
+			iv1 = sq[2];
+			iv2 = sq[4];
+			iv3 = sq[6];
+			break;
+		}
+		br_enc32le(buf + 16, q[1] ^ sq[0]);
+		br_enc32le(buf + 20, q[3] ^ sq[2]);
+		br_enc32le(buf + 24, q[5] ^ sq[4]);
+		br_enc32le(buf + 28, q[7] ^ sq[6]);
+		iv0 = sq[1];
+		iv1 = sq[3];
+		iv2 = sq[5];
+		iv3 = sq[7];
+		buf += 32;
+		len -= 32;
+	}
+	br_enc32le(ivbuf, iv0);
+	br_enc32le(ivbuf + 4, iv1);
+	br_enc32le(ivbuf + 8, iv2);
+	br_enc32le(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_ct_cbcdec_vtable = {
+	sizeof(br_aes_ct_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_ct_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_ct_cbcdec_run
+};
--- a/third_party/bearssl/src/aes_ct_cbcenc.c
+++ b/third_party/bearssl/src/aes_ct_cbcenc.c
@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcenc_init(br_aes_ct_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_cbcenc_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcenc_run(const br_aes_ct_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t q[8];
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	q[1] = 0;
+	q[3] = 0;
+	q[5] = 0;
+	q[7] = 0;
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	iv3 = br_dec32le(ivbuf + 12);
+	buf = data;
+	while (len > 0) {
+		q[0] = iv0 ^ br_dec32le(buf);
+		q[2] = iv1 ^ br_dec32le(buf + 4);
+		q[4] = iv2 ^ br_dec32le(buf + 8);
+		q[6] = iv3 ^ br_dec32le(buf + 12);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		iv0 = q[0];
+		iv1 = q[2];
+		iv2 = q[4];
+		iv3 = q[6];
+		br_enc32le(buf, iv0);
+		br_enc32le(buf + 4, iv1);
+		br_enc32le(buf + 8, iv2);
+		br_enc32le(buf + 12, iv3);
+		buf += 16;
+		len -= 16;
+	}
+	br_enc32le(ivbuf, iv0);
+	br_enc32le(ivbuf + 4, iv1);
+	br_enc32le(ivbuf + 8, iv2);
+	br_enc32le(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_ct_cbcenc_vtable = {
+	sizeof(br_aes_ct_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_ct_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_ct_cbcenc_run
+};
--- a/third_party/bearssl/src/aes_ct_ctr.c
+++ b/third_party/bearssl/src/aes_ct_ctr.c
@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_ctr_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	const unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8];
+		unsigned char tmp[32];
+
+		/*
+		 * TODO: see if we can save on the first br_aes_ct_ortho()
+		 * call, since iv0/iv1/iv2 are constant for the whole run.
+		 */
+		q[0] = q[1] = iv0;
+		q[2] = q[3] = iv1;
+		q[4] = q[5] = iv2;
+		q[6] = br_swap32(cc);
+		q[7] = br_swap32(cc + 1);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		br_enc32le(tmp, q[0]);
+		br_enc32le(tmp + 4, q[2]);
+		br_enc32le(tmp + 8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		br_enc32le(tmp + 16, q[1]);
+		br_enc32le(tmp + 20, q[3]);
+		br_enc32le(tmp + 24, q[5]);
+		br_enc32le(tmp + 28, q[7]);
+
+		if (len <= 32) {
+			xorbuf(buf, tmp, len);
+			cc ++;
+			if (len > 16) {
+				cc ++;
+			}
+			break;
+		}
+		xorbuf(buf, tmp, 32);
+		buf += 32;
+		len -= 32;
+		cc += 2;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_ct_ctr_vtable = {
+	sizeof(br_aes_ct_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_ct_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_ct_ctr_run
+};
--- a/third_party/bearssl/src/aes_ct_ctrcbc.c
+++ b/third_party/bearssl/src/aes_ct_ctrcbc.c
@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], carry;
+		unsigned char tmp[32];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+		q[1] = br_swap32(iv0);
+		q[3] = br_swap32(iv1);
+		q[5] = br_swap32(iv2);
+		q[7] = br_swap32(iv3);
+		if (len > 16) {
+			iv3 ++;
+			carry = ~(iv3 | -iv3) >> 31;
+			iv2 += carry;
+			carry &= -(~(iv2 | -iv2) >> 31);
+			iv1 += carry;
+			carry &= -(~(iv1 | -iv1) >> 31);
+			iv0 += carry;
+		}
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		br_enc32le(tmp, q[0]);
+		br_enc32le(tmp + 4, q[2]);
+		br_enc32le(tmp + 8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		br_enc32le(tmp + 16, q[1]);
+		br_enc32le(tmp + 20, q[3]);
+		br_enc32le(tmp + 24, q[5]);
+		br_enc32le(tmp + 28, q[7]);
+
+		if (len <= 32) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 32);
+		buf += 32;
+		len -= 32;
+	}
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t q[8];
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	buf = data;
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+	q[1] = 0;
+	q[3] = 0;
+	q[5] = 0;
+	q[7] = 0;
+
+	while (len > 0) {
+		q[0] = cm0 ^ br_dec32le(buf +  0);
+		q[2] = cm1 ^ br_dec32le(buf +  4);
+		q[4] = cm2 ^ br_dec32le(buf +  8);
+		q[6] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		cm0 = q[0];
+		cm1 = q[2];
+		cm2 = q[4];
+		cm3 = q[6];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	/*
+	 * When encrypting, the CBC-MAC processing must be lagging by
+	 * one block, since it operates on the encrypted values, so
+	 * it must wait for that encryption to complete.
+	 */
+
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t sk_exp[120];
+	int first_iter;
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	first_iter = 1;
+	while (len > 0) {
+		uint32_t q[8], carry;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The odd values are used for CBC-MAC.
+		 */
+		q[1] = cm0;
+		q[3] = cm1;
+		q[5] = cm2;
+		q[7] = cm3;
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		/*
+		 * We do the XOR with the plaintext in 32-bit registers,
+		 * so that the value are available for CBC-MAC processing
+		 * as well.
+		 */
+		q[0] ^= br_dec32le(buf +  0);
+		q[2] ^= br_dec32le(buf +  4);
+		q[4] ^= br_dec32le(buf +  8);
+		q[6] ^= br_dec32le(buf + 12);
+		br_enc32le(buf +  0, q[0]);
+		br_enc32le(buf +  4, q[2]);
+		br_enc32le(buf +  8, q[4]);
+		br_enc32le(buf + 12, q[6]);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * We set the cm* values to the block to encrypt in the
+		 * next iteration.
+		 */
+		if (first_iter) {
+			first_iter = 0;
+			cm0 ^= q[0];
+			cm1 ^= q[2];
+			cm2 ^= q[4];
+			cm3 ^= q[6];
+		} else {
+			cm0 = q[0] ^ q[1];
+			cm1 = q[2] ^ q[3];
+			cm2 = q[4] ^ q[5];
+			cm3 = q[6] ^ q[7];
+		}
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			q[0] = cm0;
+			q[2] = cm1;
+			q[4] = cm2;
+			q[6] = cm3;
+			br_aes_ct_ortho(q);
+			br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+			br_aes_ct_ortho(q);
+			cm0 = q[0];
+			cm1 = q[2];
+			cm2 = q[4];
+			cm3 = q[6];
+			break;
+		}
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], carry;
+		unsigned char tmp[16];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The odd values are used for CBC-MAC.
+		 */
+		q[1] = cm0 ^ br_dec32le(buf +  0);
+		q[3] = cm1 ^ br_dec32le(buf +  4);
+		q[5] = cm2 ^ br_dec32le(buf +  8);
+		q[7] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		br_enc32le(tmp +  0, q[0]);
+		br_enc32le(tmp +  4, q[2]);
+		br_enc32le(tmp +  8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		xorbuf(buf, tmp, 16);
+		cm0 = q[1];
+		cm1 = q[3];
+		cm2 = q[5];
+		cm3 = q[7];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = {
+	sizeof(br_aes_ct_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_ct_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_ct_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_ct_ctrcbc_mac
+};
--- a/third_party/bearssl/src/aes_ct_dec.c
+++ b/third_party/bearssl/src/aes_ct_dec.c
@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_invSbox(uint32_t *q)
+{
+	/*
+	 * AES S-box is:
+	 *   S(x) = A(I(x)) ^ 0x63
+	 * where I() is inversion in GF(256), and A() is a linear
+	 * transform (0 is formally defined to be its own inverse).
+	 * Since inversion is an involution, the inverse S-box can be
+	 * computed from the S-box as:
+	 *   iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
+	 * where B() is the inverse of A(). Indeed, for any y in GF(256):
+	 *   iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
+	 *
+	 * Note: we reuse the implementation of the forward S-box,
+	 * instead of duplicating it here, so that total code size is
+	 * lower. By merging the B() transforms into the S-box circuit
+	 * we could make faster CBC decryption, but CBC decryption is
+	 * already quite faster than CBC encryption because we can
+	 * process two blocks in parallel.
+	 */
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+
+	br_aes_ct_bitslice_Sbox(q);
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+}
+
+static void
+add_round_key(uint32_t *q, const uint32_t *sk)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] ^= sk[i];
+	}
+}
+
+static void
+inv_shift_rows(uint32_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint32_t x;
+
+		x = q[i];
+		q[i] = (x & 0x000000FF)
+			| ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6)
+			| ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4)
+			| ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2);
+	}
+}
+
+static inline uint32_t
+rotr16(uint32_t x)
+{
+	return (x << 16) | (x >> 16);
+}
+
+static void
+inv_mix_columns(uint32_t *q)
+{
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 8) | (q0 << 24);
+	r1 = (q1 >> 8) | (q1 << 24);
+	r2 = (q2 >> 8) | (q2 << 24);
+	r3 = (q3 >> 8) | (q3 << 24);
+	r4 = (q4 >> 8) | (q4 << 24);
+	r5 = (q5 >> 8) | (q5 << 24);
+	r6 = (q6 >> 8) | (q6 << 24);
+	r7 = (q7 >> 8) | (q7 << 24);
+
+	q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5);
+	q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
+	q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
+	q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
+	q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
+	q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
+	q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
+	q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_decrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey + (num_rounds << 3));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(q);
+		br_aes_ct_bitslice_invSbox(q);
+		add_round_key(q, skey + (u << 3));
+		inv_mix_columns(q);
+	}
+	inv_shift_rows(q);
+	br_aes_ct_bitslice_invSbox(q);
+	add_round_key(q, skey);
+}
--- a/third_party/bearssl/src/aes_ct_enc.c
+++ b/third_party/bearssl/src/aes_ct_enc.c
@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline void
+add_round_key(uint32_t *q, const uint32_t *sk)
+{
+	q[0] ^= sk[0];
+	q[1] ^= sk[1];
+	q[2] ^= sk[2];
+	q[3] ^= sk[3];
+	q[4] ^= sk[4];
+	q[5] ^= sk[5];
+	q[6] ^= sk[6];
+	q[7] ^= sk[7];
+}
+
+static inline void
+shift_rows(uint32_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint32_t x;
+
+		x = q[i];
+		q[i] = (x & 0x000000FF)
+			| ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6)
+			| ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4)
+			| ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2);
+	}
+}
+
+static inline uint32_t
+rotr16(uint32_t x)
+{
+	return (x << 16) | (x >> 16);
+}
+
+static inline void
+mix_columns(uint32_t *q)
+{
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 8) | (q0 << 24);
+	r1 = (q1 >> 8) | (q1 << 24);
+	r2 = (q2 >> 8) | (q2 << 24);
+	r3 = (q3 >> 8) | (q3 << 24);
+	r4 = (q4 >> 8) | (q4 << 24);
+	r5 = (q5 >> 8) | (q5 << 24);
+	r6 = (q6 >> 8) | (q6 << 24);
+	r7 = (q7 >> 8) | (q7 << 24);
+
+	q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0);
+	q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1);
+	q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2);
+	q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3);
+	q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4);
+	q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5);
+	q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6);
+	q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_encrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		br_aes_ct_bitslice_Sbox(q);
+		shift_rows(q);
+		mix_columns(q);
+		add_round_key(q, skey + (u << 3));
+	}
+	br_aes_ct_bitslice_Sbox(q);
+	shift_rows(q);
+	add_round_key(q, skey + (num_rounds << 3));
+}
--- a/third_party/bearssl/src/aes_pwr8.c
+++ b/third_party/bearssl/src/aes_pwr8.c
@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+/*
+ * This code contains the AES key schedule implementation using the
+ * POWER8 opcodes.
+ */
+
+#if BR_POWER8
+
+static void
+key_schedule_128(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+	static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B };
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2 = current subkey
+		 * v3 = Rcon (x4 words)
+		 * v6 = constant 8, copied into four words
+		 * v7 = constant 0x11B, copied into four words
+		 * v8 = constant for byteswapping words
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		lxvw4x(34, 0, %[key])
+		vspltisw(3, 1)
+		vspltisw(6, 8)
+		lxvw4x(39, 0, %[fmod])
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * First subkey is a copy of the key itself.
+		 */
+#if BR_POWER8_LE
+		vperm(4, 2, 2, 8)
+		stxvw4x(36, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+		/*
+		 * Loop must run 10 times.
+		 */
+		li(%[cc], 10)
+		mtctr(%[cc])
+	label(loop)
+		/* Increment subkey address */
+		addi(%[sk], %[sk], 16)
+
+		/* Compute SubWord(RotWord(temp)) xor Rcon  (into v4, splat) */
+		vrlw(4, 2, 1)
+		vsbox(4, 4)
+#if BR_POWER8_LE
+		vxor(4, 4, 3)
+#else
+		vsldoi(5, 3, 0, 3)
+		vxor(4, 4, 5)
+#endif
+		vspltw(4, 4, 3)
+
+		/* XOR words for next subkey */
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vxor(2, 2, 4)
+
+		/* Store next subkey */
+#if BR_POWER8_LE
+		vperm(4, 2, 2, 8)
+		stxvw4x(36, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+		/* Update Rcon */
+		vadduwm(3, 3, 3)
+		vsrw(4, 3, 6)
+		vsubuwm(4, 0, 4)
+		vand(4, 4, 7)
+		vxor(3, 3, 4)
+
+		bdnz(loop)
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key), [fmod] "b" (fmod)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory"
+	);
+}
+
+static void
+key_schedule_192(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2, v3 = current subkey
+		 * v5 = Rcon (x4 words) (already shifted on big-endian)
+		 * v6 = constant 8, copied into four words
+		 * v8 = constant for byteswapping words
+		 *
+		 * The left two words of v3 are ignored.
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		li(%[cc], 8)
+		lxvw4x(34, 0, %[key])
+		lxvw4x(35, %[cc], %[key])
+		vsldoi(3, 3, 0, 8)
+		vspltisw(5, 1)
+#if !BR_POWER8_LE
+		vsldoi(5, 5, 0, 3)
+#endif
+		vspltisw(6, 8)
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * Loop must run 8 times. Each iteration produces 256
+		 * bits of subkeys, with a 64-bit overlap.
+		 */
+		li(%[cc], 8)
+		mtctr(%[cc])
+		li(%[cc], 16)
+	label(loop)
+
+		/*
+		 * Last 6 words in v2:v3l. Compute next 6 words into
+		 * v3r:v4.
+		 */
+		vrlw(10, 3, 1)
+		vsbox(10, 10)
+		vxor(10, 10, 5)
+		vspltw(10, 10, 1)
+		vsldoi(11, 0, 10, 8)
+
+		vsldoi(12, 0, 2, 12)
+		vxor(12, 2, 12)
+		vsldoi(13, 0, 12, 12)
+		vxor(12, 12, 13)
+		vsldoi(13, 0, 12, 12)
+		vxor(12, 12, 13)
+
+		vspltw(13, 12, 3)
+		vxor(13, 13, 3)
+		vsldoi(14, 0, 3, 12)
+		vxor(13, 13, 14)
+
+		vsldoi(4, 12, 13, 8)
+		vsldoi(14, 0, 3, 8)
+		vsldoi(3, 14, 12, 8)
+
+		vxor(3, 3, 11)
+		vxor(4, 4, 10)
+
+		/*
+		 * Update Rcon. Since for a 192-bit key, we use only 8
+		 * such constants, we will not hit the field modulus,
+		 * so a simple shift (addition) works well.
+		 */
+		vadduwm(5, 5, 5)
+
+		/*
+		 * Write out the two left 128-bit words
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		vperm(11, 3, 3, 8)
+		stxvw4x(42, 0, %[sk])
+		stxvw4x(43, %[cc], %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+		stxvw4x(35, %[cc], %[sk])
+#endif
+		addi(%[sk], %[sk], 24)
+
+		/*
+		 * Shift words for next iteration.
+		 */
+		vsldoi(2, 3, 4, 8)
+		vsldoi(3, 4, 0, 8)
+
+		bdnz(loop)
+
+		/*
+		 * The loop wrote the first 50 subkey words, but we need
+		 * to produce 52, so we must do one last write.
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		stxvw4x(42, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+  "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
+	);
+}
+
+static void
+key_schedule_256(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2, v3 = current subkey
+		 * v6 = Rcon (x4 words) (already shifted on big-endian)
+		 * v7 = constant 8, copied into four words
+		 * v8 = constant for byteswapping words
+		 *
+		 * The left two words of v3 are ignored.
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		li(%[cc], 16)
+		lxvw4x(34, 0, %[key])
+		lxvw4x(35, %[cc], %[key])
+		vspltisw(6, 1)
+#if !BR_POWER8_LE
+		vsldoi(6, 6, 0, 3)
+#endif
+		vspltisw(7, 8)
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * Loop must run 7 times. Each iteration produces two
+		 * subkeys.
+		 */
+		li(%[cc], 7)
+		mtctr(%[cc])
+		li(%[cc], 16)
+	label(loop)
+
+		/*
+		 * Current words are in v2:v3. Compute next word in v4.
+		 */
+		vrlw(10, 3, 1)
+		vsbox(10, 10)
+		vxor(10, 10, 6)
+		vspltw(10, 10, 3)
+
+		vsldoi(4, 0, 2, 12)
+		vxor(4, 2, 4)
+		vsldoi(5, 0, 4, 12)
+		vxor(4, 4, 5)
+		vsldoi(5, 0, 4, 12)
+		vxor(4, 4, 5)
+		vxor(4, 4, 10)
+
+		/*
+		 * Then other word in v5.
+		 */
+		vsbox(10, 4)
+		vspltw(10, 10, 3)
+
+		vsldoi(5, 0, 3, 12)
+		vxor(5, 3, 5)
+		vsldoi(11, 0, 5, 12)
+		vxor(5, 5, 11)
+		vsldoi(11, 0, 5, 12)
+		vxor(5, 5, 11)
+		vxor(5, 5, 10)
+
+		/*
+		 * Update Rcon. Since for a 256-bit key, we use only 7
+		 * such constants, we will not hit the field modulus,
+		 * so a simple shift (addition) works well.
+		 */
+		vadduwm(6, 6, 6)
+
+		/*
+		 * Write out the two left 128-bit words
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		vperm(11, 3, 3, 8)
+		stxvw4x(42, 0, %[sk])
+		stxvw4x(43, %[cc], %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+		stxvw4x(35, %[cc], %[sk])
+#endif
+		addi(%[sk], %[sk], 32)
+
+		/*
+		 * Replace v2:v3 with v4:v5.
+		 */
+		vxor(2, 0, 4)
+		vxor(3, 0, 5)
+
+		bdnz(loop)
+
+		/*
+		 * The loop wrote the first 14 subkeys, but we need 15,
+		 * so we must do an extra write.
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		stxvw4x(42, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+  "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
+	);
+}
+
+/* see inner.h */
+int
+br_aes_pwr8_supported(void)
+{
+	return 1;
+}
+
+/* see inner.h */
+unsigned
+br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len)
+{
+	switch (len) {
+	case 16:
+		key_schedule_128(sk, key);
+		return 10;
+	case 24:
+		key_schedule_192(sk, key);
+		return 12;
+	default:
+		key_schedule_256(sk, key);
+		return 14;
+	}
+}
+
+#endif
--- a/third_party/bearssl/src/aes_pwr8_cbcdec.c
+++ b/third_party/bearssl/src/aes_pwr8_cbcdec.c
@ -0,0 +1,670 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+cbcdec_128(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 10)
+		vxor(17, 17, 10)
+		vxor(18, 18, 10)
+		vxor(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcdec_192(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 12)
+		vxor(17, 17, 12)
+		vxor(18, 18, 12)
+		vxor(19, 19, 12)
+		vncipher(16, 16, 11)
+		vncipher(17, 17, 11)
+		vncipher(18, 18, 11)
+		vncipher(19, 19, 11)
+		vncipher(16, 16, 10)
+		vncipher(17, 17, 10)
+		vncipher(18, 18, 10)
+		vncipher(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcdec_256(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(45, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(46, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 14)
+		vxor(17, 17, 14)
+		vxor(18, 18, 14)
+		vxor(19, 19, 14)
+		vncipher(16, 16, 13)
+		vncipher(17, 17, 13)
+		vncipher(18, 18, 13)
+		vncipher(19, 19, 13)
+		vncipher(16, 16, 12)
+		vncipher(17, 17, 12)
+		vncipher(18, 18, 12)
+		vncipher(19, 19, 12)
+		vncipher(16, 16, 11)
+		vncipher(17, 17, 11)
+		vncipher(18, 18, 11)
+		vncipher(19, 19, 11)
+		vncipher(16, 16, 10)
+		vncipher(17, 17, 10)
+		vncipher(18, 18, 10)
+		vncipher(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char nextiv[16];
+	unsigned char *buf;
+
+	if (len == 0) {
+		return;
+	}
+	buf = data;
+	memcpy(nextiv, buf + len - 16, 16);
+	if (len >= 64) {
+		size_t num_blocks;
+		unsigned char tmp[16];
+
+		num_blocks = (len >> 4) & ~(size_t)3;
+		memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		case 12:
+			cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		default:
+			cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		}
+		buf += num_blocks << 4;
+		len &= 63;
+		memcpy(iv, tmp, 16);
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		memcpy(tmp, buf, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcdec_128(ctx->skey.skni, iv, tmp, 4);
+			break;
+		case 12:
+			cbcdec_192(ctx->skey.skni, iv, tmp, 4);
+			break;
+		default:
+			cbcdec_256(ctx->skey.skni, iv, tmp, 4);
+			break;
+		}
+		memcpy(buf, tmp, len);
+	}
+	memcpy(iv, nextiv, 16);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
+	sizeof(br_aes_pwr8_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_pwr8_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_pwr8_cbcdec_run
+};
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_pwr8_cbcdec_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_pwr8_cbcdec_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
--- a/third_party/bearssl/src/aes_pwr8_cbcenc.c
+++ b/third_party/bearssl/src/aes_pwr8_cbcenc.c
@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+cbcenc_128(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipherlast(16, 16, 10)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcenc_192(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(43, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(44, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipher(16, 16, 10)
+		vcipher(16, 16, 11)
+		vcipherlast(16, 16, 12)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcenc_256(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(43, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(44, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(45, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(46, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipher(16, 16, 10)
+		vcipher(16, 16, 11)
+		vcipher(16, 16, 12)
+		vcipher(16, 16, 13)
+		vcipherlast(16, 16, 14)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	if (len > 0) {
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcenc_128(ctx->skey.skni, iv, data, len);
+			break;
+		case 12:
+			cbcenc_192(ctx->skey.skni, iv, data, len);
+			break;
+		default:
+			cbcenc_256(ctx->skey.skni, iv, data, len);
+			break;
+		}
+		memcpy(iv, (unsigned char *)data + (len - 16), 16);
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
+	sizeof(br_aes_pwr8_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_pwr8_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_pwr8_cbcenc_run
+};
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_pwr8_cbcenc_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_pwr8_cbcenc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
--- a/third_party/bearssl/src/aes_pwr8_ctr.c
+++ b/third_party/bearssl/src/aes_pwr8_ctr.c
@ -0,0 +1,717 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_ctr_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipherlast(16, 16, 10)
+		vcipherlast(17, 17, 10)
+		vcipherlast(18, 18, 10)
+		vcipherlast(19, 19, 10)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipher(16, 16, 10)
+		vcipher(17, 17, 10)
+		vcipher(18, 18, 10)
+		vcipher(19, 19, 10)
+		vcipher(16, 16, 11)
+		vcipher(17, 17, 11)
+		vcipher(18, 18, 11)
+		vcipher(19, 19, 11)
+		vcipherlast(16, 16, 12)
+		vcipherlast(17, 17, 12)
+		vcipherlast(18, 18, 12)
+		vcipherlast(19, 19, 12)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(45, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(46, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipher(16, 16, 10)
+		vcipher(17, 17, 10)
+		vcipher(18, 18, 10)
+		vcipher(19, 19, 10)
+		vcipher(16, 16, 11)
+		vcipher(17, 17, 11)
+		vcipher(18, 18, 11)
+		vcipher(19, 19, 11)
+		vcipher(16, 16, 12)
+		vcipher(17, 17, 12)
+		vcipher(18, 18, 12)
+		vcipher(19, 19, 12)
+		vcipher(16, 16, 13)
+		vcipher(17, 17, 13)
+		vcipher(18, 18, 13)
+		vcipher(19, 19, 13)
+		vcipherlast(16, 16, 14)
+		vcipherlast(17, 17, 14)
+		vcipherlast(18, 18, 14)
+		vcipherlast(19, 19, 14)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char ivbuf[64];
+
+	buf = data;
+	memcpy(ivbuf +  0, iv, 12);
+	memcpy(ivbuf + 16, iv, 12);
+	memcpy(ivbuf + 32, iv, 12);
+	memcpy(ivbuf + 48, iv, 12);
+	if (len >= 64) {
+		br_enc32be(ivbuf + 12, cc + 0);
+		br_enc32be(ivbuf + 28, cc + 1);
+		br_enc32be(ivbuf + 44, cc + 2);
+		br_enc32be(ivbuf + 60, cc + 3);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		}
+		cc += (len >> 4) & ~(size_t)3;
+		buf += len & ~(size_t)63;
+		len &= 63;
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		memcpy(tmp, buf, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		br_enc32be(ivbuf + 12, cc + 0);
+		br_enc32be(ivbuf + 28, cc + 1);
+		br_enc32be(ivbuf + 44, cc + 2);
+		br_enc32be(ivbuf + 60, cc + 3);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		}
+		memcpy(buf, tmp, len);
+		cc += (len + 15) >> 4;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
+	sizeof(br_aes_pwr8_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_pwr8_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_pwr8_ctr_run
+};
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_pwr8_ctr_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_pwr8_ctr_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
--- a/third_party/bearssl/src/aes_pwr8_ctrcbc.c
+++ b/third_party/bearssl/src/aes_pwr8_ctrcbc.c
@ -0,0 +1,946 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_pwr8_ctrcbc_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+/*
+ * Register conventions for CTR + CBC-MAC:
+ *
+ *   AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
+ *   Register v15 contains the byteswap index register (little-endian only)
+ *   Register v16 contains the CTR counter value
+ *   Register v17 contains the CBC-MAC current value
+ *   Registers v18 to v27 are scratch
+ *   Counter increment uses v28, v29 and v30
+ *
+ * For CTR alone:
+ *  
+ *   AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
+ *   Register v15 contains the byteswap index register (little-endian only)
+ *   Registers v16 to v19 contain the CTR counter values (four blocks)
+ *   Registers v20 to v27 are scratch
+ *   Counter increment uses v28, v29 and v30
+ */
+
+#define LOAD_SUBKEYS_128 \
+		lxvw4x(32, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(33, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(34, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(35, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(36, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(37, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(38, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(39, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(40, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(41, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(42, %[cc], %[sk])
+
+#define LOAD_SUBKEYS_192 \
+		LOAD_SUBKEYS_128 \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(43, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(44, %[cc], %[sk])
+
+#define LOAD_SUBKEYS_256 \
+		LOAD_SUBKEYS_192 \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(45, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(46, %[cc], %[sk])
+
+#define BLOCK_ENCRYPT_128(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipherlast(x, x, 10)
+
+#define BLOCK_ENCRYPT_192(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipher(x, x, 10) \
+		vcipher(x, x, 11) \
+		vcipherlast(x, x, 12)
+
+#define BLOCK_ENCRYPT_256(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipher(x, x, 10) \
+		vcipher(x, x, 11) \
+		vcipher(x, x, 12) \
+		vcipher(x, x, 13) \
+		vcipherlast(x, x, 14)
+
+#define BLOCK_ENCRYPT_X2_128(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipherlast(x, x, 10) \
+		vcipherlast(y, y, 10)
+
+#define BLOCK_ENCRYPT_X2_192(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipher(x, x, 10) \
+		vcipher(y, y, 10) \
+		vcipher(x, x, 11) \
+		vcipher(y, y, 11) \
+		vcipherlast(x, x, 12) \
+		vcipherlast(y, y, 12)
+
+#define BLOCK_ENCRYPT_X2_256(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipher(x, x, 10) \
+		vcipher(y, y, 10) \
+		vcipher(x, x, 11) \
+		vcipher(y, y, 11) \
+		vcipher(x, x, 12) \
+		vcipher(y, y, 12) \
+		vcipher(x, x, 13) \
+		vcipher(y, y, 13) \
+		vcipherlast(x, x, 14) \
+		vcipherlast(y, y, 14)
+
+#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipherlast(x0, x0, 10) \
+		vcipherlast(x1, x1, 10) \
+		vcipherlast(x2, x2, 10) \
+		vcipherlast(x3, x3, 10)
+
+#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipher(x0, x0, 10) \
+		vcipher(x1, x1, 10) \
+		vcipher(x2, x2, 10) \
+		vcipher(x3, x3, 10) \
+		vcipher(x0, x0, 11) \
+		vcipher(x1, x1, 11) \
+		vcipher(x2, x2, 11) \
+		vcipher(x3, x3, 11) \
+		vcipherlast(x0, x0, 12) \
+		vcipherlast(x1, x1, 12) \
+		vcipherlast(x2, x2, 12) \
+		vcipherlast(x3, x3, 12)
+
+#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipher(x0, x0, 10) \
+		vcipher(x1, x1, 10) \
+		vcipher(x2, x2, 10) \
+		vcipher(x3, x3, 10) \
+		vcipher(x0, x0, 11) \
+		vcipher(x1, x1, 11) \
+		vcipher(x2, x2, 11) \
+		vcipher(x3, x3, 11) \
+		vcipher(x0, x0, 12) \
+		vcipher(x1, x1, 12) \
+		vcipher(x2, x2, 12) \
+		vcipher(x3, x3, 12) \
+		vcipher(x0, x0, 13) \
+		vcipher(x1, x1, 13) \
+		vcipher(x2, x2, 13) \
+		vcipher(x3, x3, 13) \
+		vcipherlast(x0, x0, 14) \
+		vcipherlast(x1, x1, 14) \
+		vcipherlast(x2, x2, 14) \
+		vcipherlast(x3, x3, 14)
+
+#if BR_POWER8_LE
+static const uint32_t idx2be[] = {
+	0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+};
+#define BYTESWAP_INIT     lxvw4x(47, 0, %[idx2be])
+#define BYTESWAP(x)       vperm(x, x, x, 15)
+#define BYTESWAPX(d, s)   vperm(d, s, s, 15)
+#define BYTESWAP_REG      , [idx2be] "b" (idx2be)
+#else
+#define BYTESWAP_INIT
+#define BYTESWAP(x)
+#define BYTESWAPX(d, s)   vand(d, s, s)
+#define BYTESWAP_REG
+#endif
+
+static const uint32_t ctrinc[] = {
+	0, 0, 0, 1
+};
+static const uint32_t ctrinc_x4[] = {
+	0, 0, 0, 4
+};
+#define INCR_128_INIT      lxvw4x(60, 0, %[ctrinc])
+#define INCR_128_X4_INIT   lxvw4x(60, 0, %[ctrinc_x4])
+#define INCR_128(d, s) \
+		vaddcuw(29, s, 28) \
+		vadduwm(d, s, 28) \
+		vsldoi(30, 29, 29, 4) \
+		vaddcuw(29, d, 30) \
+		vadduwm(d, d, 30) \
+		vsldoi(30, 29, 29, 4) \
+		vaddcuw(29, d, 30) \
+		vadduwm(d, d, 30) \
+		vsldoi(30, 29, 29, 4) \
+		vadduwm(d, d, 30)
+
+#define MKCTR(size) \
+static void \
+ctr_ ## size(const unsigned char *sk, \
+	unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
+{ \
+	long cc, cc0, cc1, cc2, cc3; \
+ \
+	cc = 0; \
+	cc0 = 0; \
+	cc1 = 16; \
+	cc2 = 32; \
+	cc3 = 48; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_X4_INIT \
+ \
+		/* \
+		 * Load current CTR counters into v16 to v19. \
+		 */ \
+		lxvw4x(48, %[cc0], %[ctrbuf]) \
+		lxvw4x(49, %[cc1], %[ctrbuf]) \
+		lxvw4x(50, %[cc2], %[ctrbuf]) \
+		lxvw4x(51, %[cc3], %[ctrbuf]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+ \
+		mtctr(%[num_blocks_x4]) \
+ \
+	label(loop) \
+		/* \
+		 * Compute next counter values into v20..v23. \
+		 */ \
+		INCR_128(20, 16) \
+		INCR_128(21, 17) \
+		INCR_128(22, 18) \
+		INCR_128(23, 19) \
+ \
+		/* \
+		 * Encrypt counter values and XOR into next data blocks. \
+		 */ \
+		lxvw4x(56, %[cc0], %[buf]) \
+		lxvw4x(57, %[cc1], %[buf]) \
+		lxvw4x(58, %[cc2], %[buf]) \
+		lxvw4x(59, %[cc3], %[buf]) \
+		BYTESWAP(24) \
+		BYTESWAP(25) \
+		BYTESWAP(26) \
+		BYTESWAP(27) \
+		BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
+		vxor(16, 16, 24) \
+		vxor(17, 17, 25) \
+		vxor(18, 18, 26) \
+		vxor(19, 19, 27) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+		stxvw4x(48, %[cc0], %[buf]) \
+		stxvw4x(49, %[cc1], %[buf]) \
+		stxvw4x(50, %[cc2], %[buf]) \
+		stxvw4x(51, %[cc3], %[buf]) \
+ \
+		/* \
+		 * Update counters and data pointer. \
+		 */ \
+		vand(16, 20, 20) \
+		vand(17, 21, 21) \
+		vand(18, 22, 22) \
+		vand(19, 23, 23) \
+		addi(%[buf], %[buf], 64) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Write back new counter values. \
+		 */ \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+		stxvw4x(48, %[cc0], %[ctrbuf]) \
+		stxvw4x(49, %[cc1], %[ctrbuf]) \
+		stxvw4x(50, %[cc2], %[ctrbuf]) \
+		stxvw4x(51, %[cc3], %[ctrbuf]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf), \
+	[cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
+: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
+	[num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKCTR(128)
+MKCTR(192)
+MKCTR(256)
+
+#define MKCBCMAC(size) \
+static void \
+cbcmac_ ## size(const unsigned char *sk, \
+	unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+ \
+		/* \
+		 * Load current CBC-MAC value into v16. \
+		 */ \
+		lxvw4x(48, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+ \
+		mtctr(%[num_blocks]) \
+ \
+	label(loop) \
+		/* \
+		 * Load next block, XOR into current CBC-MAC value, \
+		 * and then encrypt it. \
+		 */ \
+		lxvw4x(49, %[cc], %[buf]) \
+		BYTESWAP(17) \
+		vxor(16, 16, 17) \
+		BLOCK_ENCRYPT_ ## size(16) \
+		addi(%[buf], %[buf], 16) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Write back new CBC-MAC value. \
+		 */ \
+		BYTESWAP(16) \
+		stxvw4x(48, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKCBCMAC(128)
+MKCBCMAC(192)
+MKCBCMAC(256)
+
+#define MKENCRYPT(size) \
+static void \
+ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
+	unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
+	size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_INIT \
+ \
+		/* \
+		 * Load current CTR counter into v16, and current \
+		 * CBC-MAC IV into v17. \
+		 */ \
+		lxvw4x(48, %[cc], %[ctr]) \
+		lxvw4x(49, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+ \
+		/* \
+		 * At each iteration, we do two parallel encryption: \
+		 *  - new counter value for encryption of the next block; \
+		 *  - CBC-MAC over the previous encrypted block. \
+		 * Thus, each plaintext block implies two AES instances, \
+		 * over two successive iterations. This requires a single \
+		 * counter encryption before the loop, and a single \
+		 * CBC-MAC encryption after the loop. \
+		 */ \
+ \
+		/* \
+		 * Encrypt first block (into v20). \
+		 */ \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		INCR_128(22, 16) \
+		BLOCK_ENCRYPT_ ## size(16) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		vand(16, 22, 22) \
+		addi(%[buf], %[buf], 16) \
+ \
+		/* \
+		 * Load loop counter; skip the loop if there is only \
+		 * one block in total (already handled by the boundary \
+		 * conditions). \
+		 */ \
+		mtctr(%[num_blocks]) \
+		bdz(fastexit) \
+ \
+	label(loop) \
+		/* \
+		 * Upon loop entry: \
+		 *    v16   counter value for next block \
+		 *    v17   current CBC-MAC value \
+		 *    v20   encrypted previous block \
+		 */ \
+		vxor(17, 17, 20) \
+		INCR_128(22, 16) \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		BLOCK_ENCRYPT_X2_ ## size(16, 17) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		addi(%[buf], %[buf], 16) \
+		vand(16, 22, 22) \
+ \
+		bdnz(loop) \
+ \
+	label(fastexit) \
+		vxor(17, 17, 20) \
+		BLOCK_ENCRYPT_ ## size(17) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		stxvw4x(48, %[cc], %[ctr]) \
+		stxvw4x(49, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
+	[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKENCRYPT(128)
+MKENCRYPT(192)
+MKENCRYPT(256)
+
+#define MKDECRYPT(size) \
+static void \
+ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
+	unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
+	size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_INIT \
+ \
+		/* \
+		 * Load current CTR counter into v16, and current \
+		 * CBC-MAC IV into v17. \
+		 */ \
+		lxvw4x(48, %[cc], %[ctr]) \
+		lxvw4x(49, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+ \
+		/* \
+		 * At each iteration, we do two parallel encryption: \
+		 *  - new counter value for decryption of the next block; \
+		 *  - CBC-MAC over the next encrypted block. \
+		 * Each iteration performs the two AES instances related \
+		 * to the current block; there is thus no need for some \
+		 * extra pre-loop and post-loop work as in encryption. \
+		 */ \
+ \
+		mtctr(%[num_blocks]) \
+ \
+	label(loop) \
+		/* \
+		 * Upon loop entry: \
+		 *    v16   counter value for next block \
+		 *    v17   current CBC-MAC value \
+		 */ \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		vxor(17, 17, 20) \
+		INCR_128(22, 16) \
+		BLOCK_ENCRYPT_X2_ ## size(16, 17) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		addi(%[buf], %[buf], 16) \
+		vand(16, 22, 22) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Store back counter and CBC-MAC value. \
+		 */ \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		stxvw4x(48, %[cc], %[ctr]) \
+		stxvw4x(49, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
+	[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKDECRYPT(128)
+MKDECRYPT(192)
+MKDECRYPT(256)
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	if (len == 0) {
+		return;
+	}
+	switch (ctx->num_rounds) {
+	case 10:
+		ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	case 12:
+		ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	default:
+		ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	if (len == 0) {
+		return;
+	}
+	switch (ctx->num_rounds) {
+	case 10:
+		ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	case 12:
+		ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	default:
+		ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	}
+}
+
+static inline void
+incr_ctr(void *dst, const void *src)
+{
+	uint64_t hi, lo;
+
+	hi = br_dec64be(src);
+	lo = br_dec64be((const unsigned char *)src + 8);
+	lo ++;
+	hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
+	br_enc64be(dst, hi);
+	br_enc64be((unsigned char *)dst + 8, lo);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char ctrbuf[64];
+
+	memcpy(ctrbuf, ctr, 16);
+	incr_ctr(ctrbuf + 16, ctrbuf);
+	incr_ctr(ctrbuf + 32, ctrbuf + 16);
+	incr_ctr(ctrbuf + 48, ctrbuf + 32);
+	if (len >= 64) {
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		}
+		data = (unsigned char *)data + (len & ~(size_t)63);
+		len &= 63;
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		if (len >= 32) {
+			if (len >= 48) {
+				memcpy(ctr, ctrbuf + 48, 16);
+			} else {
+				memcpy(ctr, ctrbuf + 32, 16);
+			}
+		} else {
+			if (len >= 16) {
+				memcpy(ctr, ctrbuf + 16, 16);
+			}
+		}
+		memcpy(tmp, data, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		}
+		memcpy(data, tmp, len);
+	} else {
+		memcpy(ctr, ctrbuf, 16);
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	if (len > 0) {
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		case 12:
+			cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		default:
+			cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
+	sizeof(br_aes_pwr8_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_pwr8_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_pwr8_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_pwr8_ctrcbc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
--- a/third_party/bearssl/src/aes_small_cbcdec.c
+++ b/third_party/bearssl/src/aes_small_cbcdec.c
@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_cbcdec_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+		int i;
+
+		memcpy(tmp, buf, 16);
+		br_aes_small_decrypt(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_small_cbcdec_vtable = {
+	sizeof(br_aes_small_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_small_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_small_cbcdec_run
+};
--- a/third_party/bearssl/src/aes_small_cbcenc.c
+++ b/third_party/bearssl/src/aes_small_cbcenc.c
@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcenc_init(br_aes_small_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_cbcenc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcenc_run(const br_aes_small_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_small_cbcenc_vtable = {
+	sizeof(br_aes_small_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_small_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_small_cbcenc_run
+};
--- a/third_party/bearssl/src/aes_small_ctr.c
+++ b/third_party/bearssl/src/aes_small_ctr.c
@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_ctr_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+
+		memcpy(tmp, iv, 12);
+		br_enc32be(tmp + 12, cc ++);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		if (len <= 16) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_small_ctr_vtable = {
+	sizeof(br_aes_small_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_small_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_small_ctr_run
+};
--- a/third_party/bearssl/src/aes_small_ctrcbc.c
+++ b/third_party/bearssl/src/aes_small_ctrcbc.c
@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf, *bctr;
+	uint32_t cc0, cc1, cc2, cc3;
+
+	buf = data;
+	bctr = ctr;
+	cc3 = br_dec32be(bctr +  0);
+	cc2 = br_dec32be(bctr +  4);
+	cc1 = br_dec32be(bctr +  8);
+	cc0 = br_dec32be(bctr + 12);
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t carry;
+
+		br_enc32be(tmp +  0, cc3);
+		br_enc32be(tmp +  4, cc2);
+		br_enc32be(tmp +  8, cc1);
+		br_enc32be(tmp + 12, cc0);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+		cc0 ++;
+		carry = (~(cc0 | -cc0)) >> 31;
+		cc1 += carry;
+		carry &= (~(cc1 | -cc1)) >> 31;
+		cc2 += carry;
+		carry &= (~(cc2 | -cc2)) >> 31;
+		cc3 += carry;
+	}
+	br_enc32be(bctr +  0, cc3);
+	br_enc32be(bctr +  4, cc2);
+	br_enc32be(bctr +  8, cc1);
+	br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		xorbuf(cbcmac, buf, 16);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+	br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+	br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable = {
+	sizeof(br_aes_small_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_small_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_small_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_small_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_small_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_small_ctrcbc_mac
+};
--- a/third_party/bearssl/src/aes_small_dec.c
+++ b/third_party/bearssl/src/aes_small_dec.c
@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Inverse S-box.
+ */
+static const unsigned char iS[] = {
+	0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+	0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+	0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+	0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+	0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+	0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+	0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+	0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+	0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+	0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+	0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+	0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+	0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+	0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+	0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+	0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+	0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+	0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+	0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+	0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+	0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+	0x55, 0x21, 0x0C, 0x7D
+};
+
+static void
+add_round_key(unsigned *state, const uint32_t *skeys)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		uint32_t k;
+
+		k = *skeys ++;
+		state[i + 0] ^= (unsigned)(k >> 24);
+		state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
+		state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
+		state[i + 3] ^= (unsigned)k & 0xFF;
+	}
+}
+
+static void
+inv_sub_bytes(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		state[i] = iS[state[i]];
+	}
+}
+
+static void
+inv_shift_rows(unsigned *state)
+{
+	unsigned tmp;
+
+	tmp = state[13];
+	state[13] = state[9];
+	state[9] = state[5];
+	state[5] = state[1];
+	state[1] = tmp;
+
+	tmp = state[2];
+	state[2] = state[10];
+	state[10] = tmp;
+	tmp = state[6];
+	state[6] = state[14];
+	state[14] = tmp;
+
+	tmp = state[3];
+	state[3] = state[7];
+	state[7] = state[11];
+	state[11] = state[15];
+	state[15] = tmp;
+}
+
+static inline unsigned
+gf256red(unsigned x)
+{
+	unsigned y;
+
+	y = x >> 8;
+	return (x ^ y ^ (y << 1) ^ (y << 3) ^ (y << 4)) & 0xFF;
+}
+
+static void
+inv_mix_columns(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		unsigned s0, s1, s2, s3;
+		unsigned t0, t1, t2, t3;
+
+		s0 = state[i + 0];
+		s1 = state[i + 1];
+		s2 = state[i + 2];
+		s3 = state[i + 3];
+		t0 = (s0 << 1) ^ (s0 << 2) ^ (s0 << 3)
+			^ s1 ^ (s1 << 1) ^ (s1 << 3)
+			^ s2 ^ (s2 << 2) ^ (s2 << 3)
+			^ s3 ^ (s3 << 3);
+		t1 = s0 ^ (s0 << 3)
+			^ (s1 << 1) ^ (s1 << 2) ^ (s1 << 3)
+			^ s2 ^ (s2 << 1) ^ (s2 << 3)
+			^ s3 ^ (s3 << 2) ^ (s3 << 3);
+		t2 = s0 ^ (s0 << 2) ^ (s0 << 3)
+			^ s1 ^ (s1 << 3)
+			^ (s2 << 1) ^ (s2 << 2) ^ (s2 << 3)
+			^ s3 ^ (s3 << 1) ^ (s3 << 3);
+		t3 = s0 ^ (s0 << 1) ^ (s0 << 3)
+			^ s1 ^ (s1 << 2) ^ (s1 << 3)
+			^ s2 ^ (s2 << 3)
+			^ (s3 << 1) ^ (s3 << 2) ^ (s3 << 3);
+		state[i + 0] = gf256red(t0);
+		state[i + 1] = gf256red(t1);
+		state[i + 2] = gf256red(t2);
+		state[i + 3] = gf256red(t3);
+	}
+}
+
+/* see inner.h */
+void
+br_aes_small_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	unsigned state[16];
+	unsigned u;
+
+	buf = data;
+	for (u = 0; u < 16; u ++) {
+		state[u] = buf[u];
+	}
+	add_round_key(state, skey + (num_rounds << 2));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(state);
+		inv_sub_bytes(state);
+		add_round_key(state, skey + (u << 2));
+		inv_mix_columns(state);
+	}
+	inv_shift_rows(state);
+	inv_sub_bytes(state);
+	add_round_key(state, skey);
+	for (u = 0; u < 16; u ++) {
+		buf[u] = state[u];
+	}
+}
--- a/third_party/bearssl/src/aes_small_enc.c
+++ b/third_party/bearssl/src/aes_small_enc.c
@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define S   br_aes_S
+
+static void
+add_round_key(unsigned *state, const uint32_t *skeys)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		uint32_t k;
+
+		k = *skeys ++;
+		state[i + 0] ^= (unsigned)(k >> 24);
+		state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
+		state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
+		state[i + 3] ^= (unsigned)k & 0xFF;
+	}
+}
+
+static void
+sub_bytes(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		state[i] = S[state[i]];
+	}
+}
+
+static void
+shift_rows(unsigned *state)
+{
+	unsigned tmp;
+
+	tmp = state[1];
+	state[1] = state[5];
+	state[5] = state[9];
+	state[9] = state[13];
+	state[13] = tmp;
+
+	tmp = state[2];
+	state[2] = state[10];
+	state[10] = tmp;
+	tmp = state[6];
+	state[6] = state[14];
+	state[14] = tmp;
+
+	tmp = state[15];
+	state[15] = state[11];
+	state[11] = state[7];
+	state[7] = state[3];
+	state[3] = tmp;
+}
+
+static void
+mix_columns(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		unsigned s0, s1, s2, s3;
+		unsigned t0, t1, t2, t3;
+
+		s0 = state[i + 0];
+		s1 = state[i + 1];
+		s2 = state[i + 2];
+		s3 = state[i + 3];
+		t0 = (s0 << 1) ^ s1 ^ (s1 << 1) ^ s2 ^ s3;
+		t1 = s0 ^ (s1 << 1) ^ s2 ^ (s2 << 1) ^ s3;
+		t2 = s0 ^ s1 ^ (s2 << 1) ^ s3 ^ (s3 << 1);
+		t3 = s0 ^ (s0 << 1) ^ s1 ^ s2 ^ (s3 << 1);
+		state[i + 0] = t0 ^ ((unsigned)(-(int)(t0 >> 8)) & 0x11B);
+		state[i + 1] = t1 ^ ((unsigned)(-(int)(t1 >> 8)) & 0x11B);
+		state[i + 2] = t2 ^ ((unsigned)(-(int)(t2 >> 8)) & 0x11B);
+		state[i + 3] = t3 ^ ((unsigned)(-(int)(t3 >> 8)) & 0x11B);
+	}
+}
+
+/* see inner.h */
+void
+br_aes_small_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	unsigned state[16];
+	unsigned u;
+
+	buf = data;
+	for (u = 0; u < 16; u ++) {
+		state[u] = buf[u];
+	}
+	add_round_key(state, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		sub_bytes(state);
+		shift_rows(state);
+		mix_columns(state);
+		add_round_key(state, skey + (u << 2));
+	}
+	sub_bytes(state);
+	shift_rows(state);
+	add_round_key(state, skey + (num_rounds << 2));
+	for (u = 0; u < 16; u ++) {
+		buf[u] = state[u];
+	}
+}
--- a/third_party/bearssl/src/aes_x86ni.c
+++ b/third_party/bearssl/src/aes_x86ni.c
@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+/*
+ * This code contains the AES key schedule implementation using the
+ * AES-NI opcodes.
+ */
+
+#if BR_AES_X86NI
+
+/* see inner.h */
+int
+br_aes_x86ni_supported(void)
+{
+	/*
+	 * Bit mask for features in ECX:
+	 *   19   SSE4.1 (used for _mm_insert_epi32(), for AES-CTR)
+	 *   25   AES-NI
+	 */
+	return br_cpuid(0, 0, 0x02080000, 0);
+}
+
+BR_TARGETS_X86_UP
+
+BR_TARGET("sse2,aes")
+static inline __m128i
+expand_step128(__m128i k, __m128i k2)
+{
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k2 = _mm_shuffle_epi32(k2, 0xFF);
+	return _mm_xor_si128(k, k2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step192(__m128i *t1, __m128i *t2, __m128i *t3)
+{
+	__m128i t4;
+
+	*t2 = _mm_shuffle_epi32(*t2, 0x55);
+	t4 = _mm_slli_si128(*t1, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	*t1 = _mm_xor_si128(*t1, *t2);
+	*t2 = _mm_shuffle_epi32(*t1, 0xFF);
+	t4 = _mm_slli_si128(*t3, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	*t3 = _mm_xor_si128(*t3, *t2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step256_1(__m128i *t1, __m128i *t2)
+{
+	__m128i t4;
+
+	*t2 = _mm_shuffle_epi32(*t2, 0xFF);
+	t4 = _mm_slli_si128(*t1, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	*t1 = _mm_xor_si128(*t1, *t2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step256_2(__m128i *t1, __m128i *t3)
+{
+	__m128i t2, t4;
+
+	t4 = _mm_aeskeygenassist_si128(*t1, 0x0);
+	t2 = _mm_shuffle_epi32(t4, 0xAA);
+	t4 = _mm_slli_si128(*t3, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	*t3 = _mm_xor_si128(*t3, t2);
+}
+
+/*
+ * Perform key schedule for AES, encryption direction. Subkeys are written
+ * in sk[], and the number of rounds is returned. Key length MUST be 16,
+ * 24 or 32 bytes.
+ */
+BR_TARGET("sse2,aes")
+static unsigned
+x86ni_keysched(__m128i *sk, const void *key, size_t len)
+{
+	const unsigned char *kb;
+
+#define KEXP128(k, i, rcon)   do { \
+		k = expand_step128(k, _mm_aeskeygenassist_si128(k, rcon)); \
+		sk[i] = k; \
+	} while (0)
+
+#define KEXP192(i, rcon1, rcon2)   do { \
+		sk[(i) + 0] = t1; \
+		sk[(i) + 1] = t3; \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon1); \
+		expand_step192(&t1, &t2, &t3); \
+		sk[(i) + 1] = _mm_castpd_si128(_mm_shuffle_pd( \
+			_mm_castsi128_pd(sk[(i) + 1]), \
+			_mm_castsi128_pd(t1), 0)); \
+		sk[(i) + 2] = _mm_castpd_si128(_mm_shuffle_pd( \
+			_mm_castsi128_pd(t1), \
+			_mm_castsi128_pd(t3), 1)); \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon2); \
+		expand_step192(&t1, &t2, &t3); \
+	} while (0)
+
+#define KEXP256(i, rcon)   do { \
+		sk[(i) + 0] = t3; \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon); \
+		expand_step256_1(&t1, &t2); \
+		sk[(i) + 1] = t1; \
+		expand_step256_2(&t1, &t3); \
+	} while (0)
+
+	kb = key;
+	switch (len) {
+		__m128i t1, t2, t3;
+
+	case 16:
+		t1 = _mm_loadu_si128((const void *)kb);
+		sk[0] = t1;
+		KEXP128(t1,  1, 0x01);
+		KEXP128(t1,  2, 0x02);
+		KEXP128(t1,  3, 0x04);
+		KEXP128(t1,  4, 0x08);
+		KEXP128(t1,  5, 0x10);
+		KEXP128(t1,  6, 0x20);
+		KEXP128(t1,  7, 0x40);
+		KEXP128(t1,  8, 0x80);
+		KEXP128(t1,  9, 0x1B);
+		KEXP128(t1, 10, 0x36);
+		return 10;
+
+	case 24:
+		t1 = _mm_loadu_si128((const void *)kb);
+		t3 = _mm_loadu_si128((const void *)(kb + 8));
+		t3 = _mm_shuffle_epi32(t3, 0x4E);
+		KEXP192(0, 0x01, 0x02);
+		KEXP192(3, 0x04, 0x08);
+		KEXP192(6, 0x10, 0x20);
+		KEXP192(9, 0x40, 0x80);
+		sk[12] = t1;
+		return 12;
+
+	case 32:
+		t1 = _mm_loadu_si128((const void *)kb);
+		t3 = _mm_loadu_si128((const void *)(kb + 16));
+		sk[0] = t1;
+		KEXP256( 1, 0x01);
+		KEXP256( 3, 0x02);
+		KEXP256( 5, 0x04);
+		KEXP256( 7, 0x08);
+		KEXP256( 9, 0x10);
+		KEXP256(11, 0x20);
+		sk[13] = t3;
+		t2 = _mm_aeskeygenassist_si128(t3, 0x40);
+		expand_step256_1(&t1, &t2);
+		sk[14] = t1;
+		return 14;
+
+	default:
+		return 0;
+	}
+
+#undef KEXP128
+#undef KEXP192
+#undef KEXP256
+}
+
+/* see inner.h */
+BR_TARGET("sse2,aes")
+unsigned
+br_aes_x86ni_keysched_enc(unsigned char *skni, const void *key, size_t len)
+{
+	__m128i sk[15];
+	unsigned num_rounds;
+
+	num_rounds = x86ni_keysched(sk, key, len);
+	memcpy(skni, sk, (num_rounds + 1) << 4);
+	return num_rounds;
+}
+
+/* see inner.h */
+BR_TARGET("sse2,aes")
+unsigned
+br_aes_x86ni_keysched_dec(unsigned char *skni, const void *key, size_t len)
+{
+	__m128i sk[15];
+	unsigned u, num_rounds;
+
+	num_rounds = x86ni_keysched(sk, key, len);
+	_mm_storeu_si128((void *)skni, sk[num_rounds]);
+	for (u = 1; u < num_rounds; u ++) {
+		_mm_storeu_si128((void *)(skni + (u << 4)),
+			_mm_aesimc_si128(sk[num_rounds - u]));
+	}
+	_mm_storeu_si128((void *)(skni + (num_rounds << 4)), sk[0]);
+	return num_rounds;
+}
+
+BR_TARGETS_X86_DOWN
+
+#endif
--- a/third_party/bearssl/src/aes_x86ni_cbcdec.c
+++ b/third_party/bearssl/src/aes_x86ni_cbcdec.c
@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_x86ni_cbcdec_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcdec_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_cbcdec_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_dec(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,aes")
+void
+br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(iv);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x0, x1, x2, x3, e0, e1, e2, e3;
+
+		x0 = _mm_loadu_si128((void *)(buf +  0));
+		if (len >= 64) {
+			x1 = _mm_loadu_si128((void *)(buf + 16));
+			x2 = _mm_loadu_si128((void *)(buf + 32));
+			x3 = _mm_loadu_si128((void *)(buf + 48));
+		} else {
+			x0 = _mm_loadu_si128((void *)(buf +  0));
+			if (len >= 32) {
+				x1 = _mm_loadu_si128((void *)(buf + 16));
+				if (len >= 48) {
+					x2 = _mm_loadu_si128(
+						(void *)(buf + 32));
+					x3 = x2;
+				} else {
+					x2 = x0;
+					x3 = x1;
+				}
+			} else {
+				x1 = x0;
+				x2 = x0;
+				x3 = x0;
+			}
+		}
+		e0 = x0;
+		e1 = x1;
+		e2 = x2;
+		e3 = x3;
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesdec_si128(x0, sk[1]);
+		x1 = _mm_aesdec_si128(x1, sk[1]);
+		x2 = _mm_aesdec_si128(x2, sk[1]);
+		x3 = _mm_aesdec_si128(x3, sk[1]);
+		x0 = _mm_aesdec_si128(x0, sk[2]);
+		x1 = _mm_aesdec_si128(x1, sk[2]);
+		x2 = _mm_aesdec_si128(x2, sk[2]);
+		x3 = _mm_aesdec_si128(x3, sk[2]);
+		x0 = _mm_aesdec_si128(x0, sk[3]);
+		x1 = _mm_aesdec_si128(x1, sk[3]);
+		x2 = _mm_aesdec_si128(x2, sk[3]);
+		x3 = _mm_aesdec_si128(x3, sk[3]);
+		x0 = _mm_aesdec_si128(x0, sk[4]);
+		x1 = _mm_aesdec_si128(x1, sk[4]);
+		x2 = _mm_aesdec_si128(x2, sk[4]);
+		x3 = _mm_aesdec_si128(x3, sk[4]);
+		x0 = _mm_aesdec_si128(x0, sk[5]);
+		x1 = _mm_aesdec_si128(x1, sk[5]);
+		x2 = _mm_aesdec_si128(x2, sk[5]);
+		x3 = _mm_aesdec_si128(x3, sk[5]);
+		x0 = _mm_aesdec_si128(x0, sk[6]);
+		x1 = _mm_aesdec_si128(x1, sk[6]);
+		x2 = _mm_aesdec_si128(x2, sk[6]);
+		x3 = _mm_aesdec_si128(x3, sk[6]);
+		x0 = _mm_aesdec_si128(x0, sk[7]);
+		x1 = _mm_aesdec_si128(x1, sk[7]);
+		x2 = _mm_aesdec_si128(x2, sk[7]);
+		x3 = _mm_aesdec_si128(x3, sk[7]);
+		x0 = _mm_aesdec_si128(x0, sk[8]);
+		x1 = _mm_aesdec_si128(x1, sk[8]);
+		x2 = _mm_aesdec_si128(x2, sk[8]);
+		x3 = _mm_aesdec_si128(x3, sk[8]);
+		x0 = _mm_aesdec_si128(x0, sk[9]);
+		x1 = _mm_aesdec_si128(x1, sk[9]);
+		x2 = _mm_aesdec_si128(x2, sk[9]);
+		x3 = _mm_aesdec_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesdeclast_si128(x0, sk[10]);
+			x1 = _mm_aesdeclast_si128(x1, sk[10]);
+			x2 = _mm_aesdeclast_si128(x2, sk[10]);
+			x3 = _mm_aesdeclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesdec_si128(x0, sk[10]);
+			x1 = _mm_aesdec_si128(x1, sk[10]);
+			x2 = _mm_aesdec_si128(x2, sk[10]);
+			x3 = _mm_aesdec_si128(x3, sk[10]);
+			x0 = _mm_aesdec_si128(x0, sk[11]);
+			x1 = _mm_aesdec_si128(x1, sk[11]);
+			x2 = _mm_aesdec_si128(x2, sk[11]);
+			x3 = _mm_aesdec_si128(x3, sk[11]);
+			x0 = _mm_aesdeclast_si128(x0, sk[12]);
+			x1 = _mm_aesdeclast_si128(x1, sk[12]);
+			x2 = _mm_aesdeclast_si128(x2, sk[12]);
+			x3 = _mm_aesdeclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesdec_si128(x0, sk[10]);
+			x1 = _mm_aesdec_si128(x1, sk[10]);
+			x2 = _mm_aesdec_si128(x2, sk[10]);
+			x3 = _mm_aesdec_si128(x3, sk[10]);
+			x0 = _mm_aesdec_si128(x0, sk[11]);
+			x1 = _mm_aesdec_si128(x1, sk[11]);
+			x2 = _mm_aesdec_si128(x2, sk[11]);
+			x3 = _mm_aesdec_si128(x3, sk[11]);
+			x0 = _mm_aesdec_si128(x0, sk[12]);
+			x1 = _mm_aesdec_si128(x1, sk[12]);
+			x2 = _mm_aesdec_si128(x2, sk[12]);
+			x3 = _mm_aesdec_si128(x3, sk[12]);
+			x0 = _mm_aesdec_si128(x0, sk[13]);
+			x1 = _mm_aesdec_si128(x1, sk[13]);
+			x2 = _mm_aesdec_si128(x2, sk[13]);
+			x3 = _mm_aesdec_si128(x3, sk[13]);
+			x0 = _mm_aesdeclast_si128(x0, sk[14]);
+			x1 = _mm_aesdeclast_si128(x1, sk[14]);
+			x2 = _mm_aesdeclast_si128(x2, sk[14]);
+			x3 = _mm_aesdeclast_si128(x3, sk[14]);
+		}
+		x0 = _mm_xor_si128(x0, ivx);
+		x1 = _mm_xor_si128(x1, e0);
+		x2 = _mm_xor_si128(x2, e1);
+		x3 = _mm_xor_si128(x3, e2);
+		ivx = e3;
+		_mm_storeu_si128((void *)(buf +  0), x0);
+		if (len >= 64) {
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+		} else {
+			if (len >= 32) {
+				_mm_storeu_si128((void *)(buf + 16), x1);
+				if (len >= 48) {
+					_mm_storeu_si128(
+						(void *)(buf + 32), x2);
+				}
+			}
+			break;
+		}
+	}
+	_mm_storeu_si128(iv, ivx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable = {
+	sizeof(br_aes_x86ni_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_x86ni_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_x86ni_cbcdec_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_x86ni_cbcdec_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
--- a/third_party/bearssl/src/aes_x86ni_cbcenc.c
+++ b/third_party/bearssl/src/aes_x86ni_cbcenc.c
@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_x86ni_cbcenc_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_cbcenc_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,aes")
+void
+br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(iv);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x;
+
+		x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+		x = _mm_xor_si128(x, sk[0]);
+		x = _mm_aesenc_si128(x, sk[1]);
+		x = _mm_aesenc_si128(x, sk[2]);
+		x = _mm_aesenc_si128(x, sk[3]);
+		x = _mm_aesenc_si128(x, sk[4]);
+		x = _mm_aesenc_si128(x, sk[5]);
+		x = _mm_aesenc_si128(x, sk[6]);
+		x = _mm_aesenc_si128(x, sk[7]);
+		x = _mm_aesenc_si128(x, sk[8]);
+		x = _mm_aesenc_si128(x, sk[9]);
+		if (num_rounds == 10) {
+			x = _mm_aesenclast_si128(x, sk[10]);
+		} else if (num_rounds == 12) {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenclast_si128(x, sk[12]);
+		} else {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenc_si128(x, sk[12]);
+			x = _mm_aesenc_si128(x, sk[13]);
+			x = _mm_aesenclast_si128(x, sk[14]);
+		}
+		ivx = x;
+		_mm_storeu_si128((void *)buf, x);
+		buf += 16;
+		len -= 16;
+	}
+	_mm_storeu_si128(iv, ivx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = {
+	sizeof(br_aes_x86ni_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_x86ni_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_x86ni_cbcenc_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_x86ni_cbcenc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
--- a/third_party/bearssl/src/aes_x86ni_ctr.c
+++ b/third_party/bearssl/src/aes_x86ni_ctr.c
@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_x86ni_ctr_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_ctr_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_ctr_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+uint32_t
+br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char ivbuf[16];
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx;
+	unsigned u;
+
+	buf = data;
+	memcpy(ivbuf, iv, 12);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	ivx = _mm_loadu_si128((void *)ivbuf);
+	while (len > 0) {
+		__m128i x0, x1, x2, x3;
+
+		x0 = _mm_insert_epi32(ivx, br_bswap32(cc + 0), 3);
+		x1 = _mm_insert_epi32(ivx, br_bswap32(cc + 1), 3);
+		x2 = _mm_insert_epi32(ivx, br_bswap32(cc + 2), 3);
+		x3 = _mm_insert_epi32(ivx, br_bswap32(cc + 3), 3);
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x2 = _mm_aesenc_si128(x2, sk[1]);
+		x3 = _mm_aesenc_si128(x3, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x2 = _mm_aesenc_si128(x2, sk[2]);
+		x3 = _mm_aesenc_si128(x3, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x2 = _mm_aesenc_si128(x2, sk[3]);
+		x3 = _mm_aesenc_si128(x3, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x2 = _mm_aesenc_si128(x2, sk[4]);
+		x3 = _mm_aesenc_si128(x3, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x2 = _mm_aesenc_si128(x2, sk[5]);
+		x3 = _mm_aesenc_si128(x3, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x2 = _mm_aesenc_si128(x2, sk[6]);
+		x3 = _mm_aesenc_si128(x3, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x2 = _mm_aesenc_si128(x2, sk[7]);
+		x3 = _mm_aesenc_si128(x3, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x2 = _mm_aesenc_si128(x2, sk[8]);
+		x3 = _mm_aesenc_si128(x3, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		x2 = _mm_aesenc_si128(x2, sk[9]);
+		x3 = _mm_aesenc_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+			x2 = _mm_aesenclast_si128(x2, sk[10]);
+			x3 = _mm_aesenclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+			x2 = _mm_aesenclast_si128(x2, sk[12]);
+			x3 = _mm_aesenclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x2 = _mm_aesenc_si128(x2, sk[12]);
+			x3 = _mm_aesenc_si128(x3, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x2 = _mm_aesenc_si128(x2, sk[13]);
+			x3 = _mm_aesenc_si128(x3, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+			x2 = _mm_aesenclast_si128(x2, sk[14]);
+			x3 = _mm_aesenclast_si128(x3, sk[14]);
+		}
+		if (len >= 64) {
+			x0 = _mm_xor_si128(x0,
+				_mm_loadu_si128((void *)(buf +  0)));
+			x1 = _mm_xor_si128(x1,
+				_mm_loadu_si128((void *)(buf + 16)));
+			x2 = _mm_xor_si128(x2,
+				_mm_loadu_si128((void *)(buf + 32)));
+			x3 = _mm_xor_si128(x3,
+				_mm_loadu_si128((void *)(buf + 48)));
+			_mm_storeu_si128((void *)(buf +  0), x0);
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+			cc += 4;
+		} else {
+			unsigned char tmp[64];
+
+			_mm_storeu_si128((void *)(tmp +  0), x0);
+			_mm_storeu_si128((void *)(tmp + 16), x1);
+			_mm_storeu_si128((void *)(tmp + 32), x2);
+			_mm_storeu_si128((void *)(tmp + 48), x3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			cc += (uint32_t)len >> 4;
+			break;
+		}
+	}
+	return cc;
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_x86ni_ctr_vtable = {
+	sizeof(br_aes_x86ni_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_x86ni_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_x86ni_ctr_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_x86ni_ctr_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
--- a/third_party/bearssl/src/aes_x86ni_ctrcbc.c
+++ b/third_party/bearssl/src/aes_x86ni_ctrcbc.c
@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx0, ivx1, ivx2, ivx3;
+	__m128i erev, zero, one, four, notthree;
+	unsigned u;
+
+	buf = data;
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+	four = _mm_set_epi64x(0, 4);
+	notthree = _mm_sub_epi64(zero, four);
+
+	/*
+	 * Decode the counter in big-endian and pre-increment the other
+	 * three counters.
+	 */
+	ivx0 = _mm_shuffle_epi8(_mm_loadu_si128((void *)ctr), erev);
+	ivx1 = _mm_add_epi64(ivx0, one);
+	ivx1 = _mm_sub_epi64(ivx1,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx1, zero), 8));
+	ivx2 = _mm_add_epi64(ivx1, one);
+	ivx2 = _mm_sub_epi64(ivx2,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx2, zero), 8));
+	ivx3 = _mm_add_epi64(ivx2, one);
+	ivx3 = _mm_sub_epi64(ivx3,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx3, zero), 8));
+	while (len > 0) {
+		__m128i x0, x1, x2, x3;
+
+		/*
+		 * Load counter values; we need to byteswap them because
+		 * the specification says that they use big-endian.
+		 */
+		x0 = _mm_shuffle_epi8(ivx0, erev);
+		x1 = _mm_shuffle_epi8(ivx1, erev);
+		x2 = _mm_shuffle_epi8(ivx2, erev);
+		x3 = _mm_shuffle_epi8(ivx3, erev);
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x2 = _mm_aesenc_si128(x2, sk[1]);
+		x3 = _mm_aesenc_si128(x3, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x2 = _mm_aesenc_si128(x2, sk[2]);
+		x3 = _mm_aesenc_si128(x3, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x2 = _mm_aesenc_si128(x2, sk[3]);
+		x3 = _mm_aesenc_si128(x3, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x2 = _mm_aesenc_si128(x2, sk[4]);
+		x3 = _mm_aesenc_si128(x3, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x2 = _mm_aesenc_si128(x2, sk[5]);
+		x3 = _mm_aesenc_si128(x3, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x2 = _mm_aesenc_si128(x2, sk[6]);
+		x3 = _mm_aesenc_si128(x3, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x2 = _mm_aesenc_si128(x2, sk[7]);
+		x3 = _mm_aesenc_si128(x3, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x2 = _mm_aesenc_si128(x2, sk[8]);
+		x3 = _mm_aesenc_si128(x3, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		x2 = _mm_aesenc_si128(x2, sk[9]);
+		x3 = _mm_aesenc_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+			x2 = _mm_aesenclast_si128(x2, sk[10]);
+			x3 = _mm_aesenclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+			x2 = _mm_aesenclast_si128(x2, sk[12]);
+			x3 = _mm_aesenclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x2 = _mm_aesenc_si128(x2, sk[12]);
+			x3 = _mm_aesenc_si128(x3, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x2 = _mm_aesenc_si128(x2, sk[13]);
+			x3 = _mm_aesenc_si128(x3, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+			x2 = _mm_aesenclast_si128(x2, sk[14]);
+			x3 = _mm_aesenclast_si128(x3, sk[14]);
+		}
+		if (len >= 64) {
+			x0 = _mm_xor_si128(x0,
+				_mm_loadu_si128((void *)(buf +  0)));
+			x1 = _mm_xor_si128(x1,
+				_mm_loadu_si128((void *)(buf + 16)));
+			x2 = _mm_xor_si128(x2,
+				_mm_loadu_si128((void *)(buf + 32)));
+			x3 = _mm_xor_si128(x3,
+				_mm_loadu_si128((void *)(buf + 48)));
+			_mm_storeu_si128((void *)(buf +  0), x0);
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+		} else {
+			unsigned char tmp[64];
+
+			_mm_storeu_si128((void *)(tmp +  0), x0);
+			_mm_storeu_si128((void *)(tmp + 16), x1);
+			_mm_storeu_si128((void *)(tmp + 32), x2);
+			_mm_storeu_si128((void *)(tmp + 48), x3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			switch (len) {
+			case 16:
+				ivx0 = ivx1;
+				break;
+			case 32:
+				ivx0 = ivx2;
+				break;
+			case 48:
+				ivx0 = ivx3;
+				break;
+			}
+			break;
+		}
+
+		/*
+		 * Add 4 to each counter value. For carry propagation
+		 * into the upper 64-bit words, we would need to compare
+		 * the results with 4, but SSE2+ has only _signed_
+		 * comparisons. Instead, we mask out the low two bits,
+		 * and check whether the remaining bits are zero.
+		 */
+		ivx0 = _mm_add_epi64(ivx0, four);
+		ivx1 = _mm_add_epi64(ivx1, four);
+		ivx2 = _mm_add_epi64(ivx2, four);
+		ivx3 = _mm_add_epi64(ivx3, four);
+		ivx0 = _mm_sub_epi64(ivx0,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx0, notthree), zero), 8));
+		ivx1 = _mm_sub_epi64(ivx1,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx1, notthree), zero), 8));
+		ivx2 = _mm_sub_epi64(ivx2,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx2, notthree), zero), 8));
+		ivx3 = _mm_sub_epi64(ivx3,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx3, notthree), zero), 8));
+	}
+
+	/*
+	 * Write back new counter value. The loop took care to put the
+	 * right counter value in ivx0.
+	 */
+	_mm_storeu_si128((void *)ctr, _mm_shuffle_epi8(ivx0, erev));
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(cbcmac);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x;
+
+		x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+		x = _mm_xor_si128(x, sk[0]);
+		x = _mm_aesenc_si128(x, sk[1]);
+		x = _mm_aesenc_si128(x, sk[2]);
+		x = _mm_aesenc_si128(x, sk[3]);
+		x = _mm_aesenc_si128(x, sk[4]);
+		x = _mm_aesenc_si128(x, sk[5]);
+		x = _mm_aesenc_si128(x, sk[6]);
+		x = _mm_aesenc_si128(x, sk[7]);
+		x = _mm_aesenc_si128(x, sk[8]);
+		x = _mm_aesenc_si128(x, sk[9]);
+		if (num_rounds == 10) {
+			x = _mm_aesenclast_si128(x, sk[10]);
+		} else if (num_rounds == 12) {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenclast_si128(x, sk[12]);
+		} else {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenc_si128(x, sk[12]);
+			x = _mm_aesenc_si128(x, sk[13]);
+			x = _mm_aesenclast_si128(x, sk[14]);
+		}
+		ivx = x;
+		buf += 16;
+		len -= 16;
+	}
+	_mm_storeu_si128(cbcmac, ivx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx, cmx;
+	__m128i erev, zero, one;
+	unsigned u;
+	int first_iter;
+
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+
+	/*
+	 * Decode the counter in big-endian.
+	 */
+	ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+	cmx = _mm_loadu_si128(cbcmac);
+
+	buf = data;
+	first_iter = 1;
+	while (len > 0) {
+		__m128i dx, x0, x1;
+
+		/*
+		 * Load initial values:
+		 *   dx   encrypted block of data
+		 *   x0   counter (for CTR encryption)
+		 *   x1   input for CBC-MAC
+		 */
+		dx = _mm_loadu_si128((void *)buf);
+		x0 = _mm_shuffle_epi8(ivx, erev);
+		x1 = cmx;
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+		}
+
+		x0 = _mm_xor_si128(x0, dx);
+		if (first_iter) {
+			cmx = _mm_xor_si128(cmx, x0);
+			first_iter = 0;
+		} else {
+			cmx = _mm_xor_si128(x1, x0);
+		}
+		_mm_storeu_si128((void *)buf, x0);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * Increment the counter value.
+		 */
+		ivx = _mm_add_epi64(ivx, one);
+		ivx = _mm_sub_epi64(ivx,
+			_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			cmx = _mm_xor_si128(cmx, sk[0]);
+			cmx = _mm_aesenc_si128(cmx, sk[1]);
+			cmx = _mm_aesenc_si128(cmx, sk[2]);
+			cmx = _mm_aesenc_si128(cmx, sk[3]);
+			cmx = _mm_aesenc_si128(cmx, sk[4]);
+			cmx = _mm_aesenc_si128(cmx, sk[5]);
+			cmx = _mm_aesenc_si128(cmx, sk[6]);
+			cmx = _mm_aesenc_si128(cmx, sk[7]);
+			cmx = _mm_aesenc_si128(cmx, sk[8]);
+			cmx = _mm_aesenc_si128(cmx, sk[9]);
+			if (num_rounds == 10) {
+				cmx = _mm_aesenclast_si128(cmx, sk[10]);
+			} else if (num_rounds == 12) {
+				cmx = _mm_aesenc_si128(cmx, sk[10]);
+				cmx = _mm_aesenc_si128(cmx, sk[11]);
+				cmx = _mm_aesenclast_si128(cmx, sk[12]);
+			} else {
+				cmx = _mm_aesenc_si128(cmx, sk[10]);
+				cmx = _mm_aesenc_si128(cmx, sk[11]);
+				cmx = _mm_aesenc_si128(cmx, sk[12]);
+				cmx = _mm_aesenc_si128(cmx, sk[13]);
+				cmx = _mm_aesenclast_si128(cmx, sk[14]);
+			}
+			break;
+		}
+	}
+
+	/*
+	 * Write back new counter value and CBC-MAC value.
+	 */
+	_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+	_mm_storeu_si128(cbcmac, cmx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx, cmx;
+	__m128i erev, zero, one;
+	unsigned u;
+
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+
+	/*
+	 * Decode the counter in big-endian.
+	 */
+	ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+	cmx = _mm_loadu_si128(cbcmac);
+
+	buf = data;
+	while (len > 0) {
+		__m128i dx, x0, x1;
+
+		/*
+		 * Load initial values:
+		 *   dx   encrypted block of data
+		 *   x0   counter (for CTR encryption)
+		 *   x1   input for CBC-MAC
+		 */
+		dx = _mm_loadu_si128((void *)buf);
+		x0 = _mm_shuffle_epi8(ivx, erev);
+		x1 = _mm_xor_si128(cmx, dx);
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+		}
+		x0 = _mm_xor_si128(x0, dx);
+		cmx = x1;
+		_mm_storeu_si128((void *)buf, x0);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * Increment the counter value.
+		 */
+		ivx = _mm_add_epi64(ivx, one);
+		ivx = _mm_sub_epi64(ivx,
+			_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+	}
+
+	/*
+	 * Write back new counter value and CBC-MAC value.
+	 */
+	_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+	_mm_storeu_si128(cbcmac, cmx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable = {
+	sizeof(br_aes_x86ni_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_x86ni_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_x86ni_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
--- a/third_party/bearssl/src/aesctr_drbg.c
+++ b/third_party/bearssl/src/aesctr_drbg.c
@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_init(br_aesctr_drbg_context *ctx,
+	const br_block_ctr_class *aesctr,
+	const void *seed, size_t len)
+{
+	unsigned char tmp[16];
+
+	ctx->vtable = &br_aesctr_drbg_vtable;
+	memset(tmp, 0, sizeof tmp);
+	aesctr->init(&ctx->sk.vtable, tmp, 16);
+	ctx->cc = 0;
+	br_aesctr_drbg_update(ctx, seed, len);
+}
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_generate(br_aesctr_drbg_context *ctx, void *out, size_t len)
+{
+	unsigned char *buf;
+	unsigned char iv[12];
+
+	buf = out;
+	memset(iv, 0, sizeof iv);
+	while (len > 0) {
+		size_t clen;
+
+		/*
+		 * We generate data by blocks of at most 65280 bytes. This
+		 * allows for unambiguously testing the counter overflow
+		 * condition; also, it should work on 16-bit architectures
+		 * (where 'size_t' is 16 bits only).
+		 */
+		clen = len;
+		if (clen > 65280) {
+			clen = 65280;
+		}
+
+		/*
+		 * We make sure that the counter won't exceed the configured
+		 * limit.
+		 */
+		if ((uint32_t)(ctx->cc + ((clen + 15) >> 4)) > 32768) {
+			clen = (32768 - ctx->cc) << 4;
+			if (clen > len) {
+				clen = len;
+			}
+		}
+
+		/*
+		 * Run CTR.
+		 */
+		memset(buf, 0, clen);
+		ctx->cc = ctx->sk.vtable->run(&ctx->sk.vtable,
+			iv, ctx->cc, buf, clen);
+		buf += clen;
+		len -= clen;
+
+		/*
+		 * Every 32768 blocks, we force a state update.
+		 */
+		if (ctx->cc >= 32768) {
+			br_aesctr_drbg_update(ctx, NULL, 0);
+		}
+	}
+}
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_update(br_aesctr_drbg_context *ctx, const void *seed, size_t len)
+{
+	/*
+	 * We use a Hirose construction on AES-256 to make a hash function.
+	 * Function definition:
+	 *  - running state consists in two 16-byte blocks G and H
+	 *  - initial values of G and H are conventional
+	 *  - there is a fixed block-sized constant C
+	 *  - for next data block m:
+	 *      set AES key to H||m
+	 *      G' = E(G) xor G
+	 *      H' = E(G xor C) xor G xor C
+	 *      G <- G', H <- H'
+	 *  - once all blocks have been processed, output is H||G
+	 *
+	 * Constants:
+	 *   G_init = B6 B6 ... B6
+	 *   H_init = A5 A5 ... A5
+	 *   C      = 01 00 ... 00
+	 *
+	 * With this hash function h(), we compute the new state as
+	 * follows:
+	 *  - produce a state-dependent value s as encryption of an
+	 *    all-one block with AES and the current key
+	 *  - compute the new key as the first 128 bits of h(s||seed)
+	 *
+	 * Original Hirose article:
+	 *    https://www.iacr.org/archive/fse2006/40470213/40470213.pdf
+	 */
+
+	unsigned char s[16], iv[12];
+	unsigned char G[16], H[16];
+	int first;
+
+	/*
+	 * Use an all-one IV to get a fresh output block that depends on the
+	 * current seed.
+	 */
+	memset(iv, 0xFF, sizeof iv);
+	memset(s, 0, 16);
+	ctx->sk.vtable->run(&ctx->sk.vtable, iv, 0xFFFFFFFF, s, 16);
+
+	/*
+	 * Set G[] and H[] to conventional start values.
+	 */
+	memset(G, 0xB6, sizeof G);
+	memset(H, 0x5A, sizeof H);
+
+	/*
+	 * Process the concatenation of the current state and the seed
+	 * with the custom hash function.
+	 */
+	first = 1;
+	for (;;) {
+		unsigned char tmp[32];
+		unsigned char newG[16];
+
+		/*
+		 * Assemble new key H||m into tmp[].
+		 */
+		memcpy(tmp, H, 16);
+		if (first) {
+			memcpy(tmp + 16, s, 16);
+			first = 0;
+		} else {
+			size_t clen;
+
+			if (len == 0) {
+				break;
+			}
+			clen = len < 16 ? len : 16;
+			memcpy(tmp + 16, seed, clen);
+			memset(tmp + 16 + clen, 0, 16 - clen);
+			seed = (const unsigned char *)seed + clen;
+			len -= clen;
+		}
+		ctx->sk.vtable->init(&ctx->sk.vtable, tmp, 32);
+
+		/*
+		 * Compute new G and H values.
+		 */
+		memcpy(iv, G, 12);
+		memcpy(newG, G, 16);
+		ctx->sk.vtable->run(&ctx->sk.vtable, iv,
+			br_dec32be(G + 12), newG, 16);
+		iv[0] ^= 0x01;
+		memcpy(H, G, 16);
+		H[0] ^= 0x01;
+		ctx->sk.vtable->run(&ctx->sk.vtable, iv,
+			br_dec32be(G + 12), H, 16);
+		memcpy(G, newG, 16);
+	}
+
+	/*
+	 * Output hash value is H||G. We truncate it to its first 128 bits,
+	 * i.e. H; that's our new AES key.
+	 */
+	ctx->sk.vtable->init(&ctx->sk.vtable, H, 16);
+	ctx->cc = 0;
+}
+
+/* see bearssl_rand.h */
+const br_prng_class br_aesctr_drbg_vtable = {
+	sizeof(br_aesctr_drbg_context),
+	(void (*)(const br_prng_class **, const void *, const void *, size_t))
+		&br_aesctr_drbg_init,
+	(void (*)(const br_prng_class **, void *, size_t))
+		&br_aesctr_drbg_generate,
+	(void (*)(const br_prng_class **, const void *, size_t))
+		&br_aesctr_drbg_update
+};
--- a/third_party/bearssl/src/asn1enc.c
+++ b/third_party/bearssl/src/asn1enc.c
@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+br_asn1_uint
+br_asn1_uint_prepare(const void *xdata, size_t xlen)
+{
+	const unsigned char *x;
+	br_asn1_uint t;
+
+	x = xdata;
+	while (xlen > 0 && *x == 0) {
+		x ++;
+		xlen --;
+	}
+	t.data = x;
+	t.len = xlen;
+	t.asn1len = xlen;
+	if (xlen == 0 || x[0] >= 0x80) {
+		t.asn1len ++;
+	}
+	return t;
+}
+
+/* see inner.h */
+size_t
+br_asn1_encode_length(void *dest, size_t len)
+{
+	unsigned char *buf;
+	size_t z;
+	int i, j;
+
+	buf = dest;
+	if (len < 0x80) {
+		if (buf != NULL) {
+			*buf = len;
+		}
+		return 1;
+	}
+	i = 0;
+	for (z = len; z != 0; z >>= 8) {
+		i ++;
+	}
+	if (buf != NULL) {
+		*buf ++ = 0x80 + i;
+		for (j = i - 1; j >= 0; j --) {
+			*buf ++ = len >> (j << 3);
+		}
+	}
+	return i + 1;
+}
+
+/* see inner.h */
+size_t
+br_asn1_encode_uint(void *dest, br_asn1_uint pp)
+{
+	unsigned char *buf;
+	size_t lenlen;
+
+	if (dest == NULL) {
+		return 1 + br_asn1_encode_length(NULL, pp.asn1len) + pp.asn1len;
+	}
+	buf = dest;
+	*buf ++ = 0x02;
+	lenlen = br_asn1_encode_length(buf, pp.asn1len);
+	buf += lenlen;
+	*buf = 0x00;
+	memcpy(buf + pp.asn1len - pp.len, pp.data, pp.len);
+	return 1 + lenlen + pp.asn1len;
+}
--- a/third_party/bearssl/src/ccm.c
+++ b/third_party/bearssl/src/ccm.c
@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * The combined CTR + CBC-MAC functions can only handle full blocks,
+ * so some buffering is necessary.
+ *
+ *  - 'ptr' contains a value from 0 to 15, which is the number of bytes
+ *    accumulated in buf[] that still needs to be processed with the
+ *    current CBC-MAC computation.
+ *
+ *  - When processing the message itself, CTR encryption/decryption is
+ *    also done at the same time. The first 'ptr' bytes of buf[] then
+ *    contains the plaintext bytes, while the last '16 - ptr' bytes of
+ *    buf[] are the remnants of the stream block, to be used against
+ *    the next input bytes, when available. When 'ptr' is 0, the
+ *    contents of buf[] are to be ignored.
+ *
+ *  - The current counter and running CBC-MAC values are kept in 'ctr'
+ *    and 'cbcmac', respectively.
+ */
+
+/* see bearssl_block.h */
+void
+br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx)
+{
+	ctx->bctx = bctx;
+}
+
+/* see bearssl_block.h */
+int
+br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len,
+	uint64_t aad_len, uint64_t data_len, size_t tag_len)
+{
+	unsigned char tmp[16];
+	unsigned u, q;
+
+	if (nonce_len < 7 || nonce_len > 13) {
+		return 0;
+	}
+	if (tag_len < 4 || tag_len > 16 || (tag_len & 1) != 0) {
+		return 0;
+	}
+	q = 15 - (unsigned)nonce_len;
+	ctx->tag_len = tag_len;
+
+	/*
+	 * Block B0, to start CBC-MAC.
+	 */
+	tmp[0] = (aad_len > 0 ? 0x40 : 0x00)
+		| (((unsigned)tag_len - 2) << 2)
+		| (q - 1);
+	memcpy(tmp + 1, nonce, nonce_len);
+	for (u = 0; u < q; u ++) {
+		tmp[15 - u] = (unsigned char)data_len;
+		data_len >>= 8;
+	}
+	if (data_len != 0) {
+		/*
+		 * If the data length was not entirely consumed in the
+		 * loop above, then it exceeds the maximum limit of
+		 * q bytes (when encoded).
+		 */
+		return 0;
+	}
+
+	/*
+	 * Start CBC-MAC.
+	 */
+	memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, tmp, sizeof tmp);
+
+	/*
+	 * Assemble AAD length header.
+	 */
+	if ((aad_len >> 32) != 0) {
+		ctx->buf[0] = 0xFF;
+		ctx->buf[1] = 0xFF;
+		br_enc64be(ctx->buf + 2, aad_len);
+		ctx->ptr = 10;
+	} else if (aad_len >= 0xFF00) {
+		ctx->buf[0] = 0xFF;
+		ctx->buf[1] = 0xFE;
+		br_enc32be(ctx->buf + 2, (uint32_t)aad_len);
+		ctx->ptr = 6;
+	} else if (aad_len > 0) {
+		br_enc16be(ctx->buf, (unsigned)aad_len);
+		ctx->ptr = 2;
+	} else {
+		ctx->ptr = 0;
+	}
+
+	/*
+	 * Make initial counter value and compute tag mask.
+	 */
+	ctx->ctr[0] = q - 1;
+	memcpy(ctx->ctr + 1, nonce, nonce_len);
+	memset(ctx->ctr + 1 + nonce_len, 0, q);
+	memset(ctx->tagmask, 0, sizeof ctx->tagmask);
+	(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
+		ctx->tagmask, sizeof ctx->tagmask);
+
+	return 1;
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len)
+{
+	const unsigned char *dbuf;
+	size_t ptr;
+
+	dbuf = data;
+
+	/*
+	 * Complete partial block, if needed.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		size_t clen;
+
+		clen = (sizeof ctx->buf) - ptr;
+		if (clen > len) {
+			memcpy(ctx->buf + ptr, dbuf, len);
+			ctx->ptr = ptr + len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, dbuf, clen);
+		dbuf += clen;
+		len -= clen;
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Process complete blocks.
+	 */
+	ptr = len & 15;
+	len -= ptr;
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, dbuf, len);
+	dbuf += len;
+
+	/*
+	 * Copy last partial block in the context buffer.
+	 */
+	memcpy(ctx->buf, dbuf, ptr);
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_flip(br_ccm_context *ctx)
+{
+	size_t ptr;
+
+	/*
+	 * Complete AAD partial block with zeros, if necessary.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+		ctx->ptr = 0;
+	}
+
+	/*
+	 * Counter was already set by br_ccm_reset().
+	 */
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *dbuf;
+	size_t ptr;
+
+	dbuf = data;
+
+	/*
+	 * Complete a partial block, if any: ctx->buf[] contains
+	 * ctx->ptr plaintext bytes (already reported), and the other
+	 * bytes are CTR stream output.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		size_t clen;
+		size_t u;
+
+		clen = (sizeof ctx->buf) - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		if (encrypt) {
+			for (u = 0; u < clen; u ++) {
+				unsigned w, x;
+
+				w = ctx->buf[ptr + u];
+				x = dbuf[u];
+				ctx->buf[ptr + u] = x;
+				dbuf[u] = w ^ x;
+			}
+		} else {
+			for (u = 0; u < clen; u ++) {
+				unsigned w;
+
+				w = ctx->buf[ptr + u] ^ dbuf[u];
+				dbuf[u] = w;
+				ctx->buf[ptr + u] = w;
+			}
+		}
+		dbuf += clen;
+		len -= clen;
+		ptr += clen;
+		if (ptr < sizeof ctx->buf) {
+			ctx->ptr = ptr;
+			return;
+		}
+		(*ctx->bctx)->mac(ctx->bctx,
+			ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Process all complete blocks. Note that the ctrcbc API is for
+	 * encrypt-then-MAC (CBC-MAC is computed over the encrypted
+	 * blocks) while CCM uses MAC-and-encrypt (CBC-MAC is computed
+	 * over the plaintext blocks). Therefore, we need to use the
+	 * _decryption_ function for encryption, and the encryption
+	 * function for decryption (this works because CTR encryption
+	 * and decryption are identical, so the choice really is about
+	 * computing the CBC-MAC before or after XORing with the CTR
+	 * stream).
+	 */
+	ptr = len & 15;
+	len -= ptr;
+	if (encrypt) {
+		(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	} else {
+		(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	}
+	dbuf += len;
+
+	/*
+	 * If there is some remaining data, then we need to compute an
+	 * extra block of CTR stream.
+	 */
+	if (ptr != 0) {
+		size_t u;
+
+		memset(ctx->buf, 0, sizeof ctx->buf);
+		(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
+			ctx->buf, sizeof ctx->buf);
+		if (encrypt) {
+			for (u = 0; u < ptr; u ++) {
+				unsigned w, x;
+
+				w = ctx->buf[u];
+				x = dbuf[u];
+				ctx->buf[u] = x;
+				dbuf[u] = w ^ x;
+			}
+		} else {
+			for (u = 0; u < ptr; u ++) {
+				unsigned w;
+
+				w = ctx->buf[u] ^ dbuf[u];
+				dbuf[u] = w;
+				ctx->buf[u] = w;
+			}
+		}
+	}
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_block.h */
+size_t
+br_ccm_get_tag(br_ccm_context *ctx, void *tag)
+{
+	size_t ptr;
+	size_t u;
+
+	/*
+	 * If there is some buffered data, then we need to pad it with
+	 * zeros and finish up CBC-MAC.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * XOR the tag mask into the CBC-MAC output.
+	 */
+	for (u = 0; u < ctx->tag_len; u ++) {
+		ctx->cbcmac[u] ^= ctx->tagmask[u];
+	}
+	memcpy(tag, ctx->cbcmac, ctx->tag_len);
+	return ctx->tag_len;
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_ccm_check_tag(br_ccm_context *ctx, const void *tag)
+{
+	unsigned char tmp[16];
+	size_t u, tag_len;
+	uint32_t z;
+
+	tag_len = br_ccm_get_tag(ctx, tmp);
+	z = 0;
+	for (u = 0; u < tag_len; u ++) {
+		z |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(z);
+}
--- a/third_party/bearssl/src/ccopy.c
+++ b/third_party/bearssl/src/ccopy.c
@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		uint32_t x, y;
+
+		x = *s ++;
+		y = *d;
+		*d = MUX(ctl, x, y);
+		d ++;
+	}
+}
--- a/third_party/bearssl/src/chacha20_ct.c
+++ b/third_party/bearssl/src/chacha20_ct.c
@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+uint32_t
+br_chacha20_ct_run(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t kw[8], ivw[3];
+	size_t u;
+
+	static const uint32_t CW[] = {
+		0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+	};
+
+	buf = data;
+	for (u = 0; u < 8; u ++) {
+		kw[u] = br_dec32le((const unsigned char *)key + (u << 2));
+	}
+	for (u = 0; u < 3; u ++) {
+		ivw[u] = br_dec32le((const unsigned char *)iv + (u << 2));
+	}
+	while (len > 0) {
+		uint32_t state[16];
+		int i;
+		size_t clen;
+		unsigned char tmp[64];
+
+		memcpy(&state[0], CW, sizeof CW);
+		memcpy(&state[4], kw, sizeof kw);
+		state[12] = cc;
+		memcpy(&state[13], ivw, sizeof ivw);
+		for (i = 0; i < 10; i ++) {
+
+#define QROUND(a, b, c, d)   do { \
+		state[a] += state[b]; \
+		state[d] ^= state[a]; \
+		state[d] = (state[d] << 16) | (state[d] >> 16); \
+		state[c] += state[d]; \
+		state[b] ^= state[c]; \
+		state[b] = (state[b] << 12) | (state[b] >> 20); \
+		state[a] += state[b]; \
+		state[d] ^= state[a]; \
+		state[d] = (state[d] <<  8) | (state[d] >> 24); \
+		state[c] += state[d]; \
+		state[b] ^= state[c]; \
+		state[b] = (state[b] <<  7) | (state[b] >> 25); \
+	} while (0)
+
+			QROUND( 0,  4,  8, 12);
+			QROUND( 1,  5,  9, 13);
+			QROUND( 2,  6, 10, 14);
+			QROUND( 3,  7, 11, 15);
+			QROUND( 0,  5, 10, 15);
+			QROUND( 1,  6, 11, 12);
+			QROUND( 2,  7,  8, 13);
+			QROUND( 3,  4,  9, 14);
+
+#undef QROUND
+
+		}
+		for (u = 0; u < 4; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + CW[u]);
+		}
+		for (u = 4; u < 12; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + kw[u - 4]);
+		}
+		br_enc32le(&tmp[48], state[12] + cc);
+		for (u = 13; u < 16; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + ivw[u - 13]);
+		}
+
+		clen = len < 64 ? len : 64;
+		for (u = 0; u < clen; u ++) {
+			buf[u] ^= tmp[u];
+		}
+		buf += clen;
+		len -= clen;
+		cc ++;
+	}
+	return cc;
+}
--- a/third_party/bearssl/src/chacha20_sse2.c
+++ b/third_party/bearssl/src/chacha20_sse2.c
@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_SSE2
+
+/*
+ * This file contains a ChaCha20 implementation that leverages SSE2
+ * opcodes for better performance.
+ */
+
+/* see bearssl_block.h */
+br_chacha20_run
+br_chacha20_sse2_get(void)
+{
+	/*
+	 * If using 64-bit mode, then SSE2 opcodes should be automatically
+	 * available, since they are part of the ABI.
+	 *
+	 * In 32-bit mode, we use CPUID to detect the SSE2 feature.
+	 */
+
+#if BR_amd64
+	return &br_chacha20_sse2_run;
+#else
+
+	/*
+	 * SSE2 support is indicated by bit 26 in EDX.
+	 */
+	if (br_cpuid(0, 0, 0, 0x04000000)) {
+		return &br_chacha20_sse2_run;
+	} else {
+		return 0;
+	}
+#endif
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2")
+uint32_t
+br_chacha20_sse2_run(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t ivtmp[4];
+	__m128i kw0, kw1;
+	__m128i iw, cw;
+	__m128i one;
+
+	static const uint32_t CW[] = {
+		0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+	};
+
+	buf = data;
+	kw0 = _mm_loadu_si128(key);
+	kw1 = _mm_loadu_si128((const void *)((const unsigned char *)key + 16));
+	ivtmp[0] = cc;
+	memcpy(ivtmp + 1, iv, 12);
+	iw = _mm_loadu_si128((const void *)ivtmp);
+	cw = _mm_loadu_si128((const void *)CW);
+	one = _mm_set_epi32(0, 0, 0, 1);
+
+	while (len > 0) {
+		/*
+		 * sj contains state words 4*j to 4*j+3.
+		 */
+		__m128i s0, s1, s2, s3;
+		int i;
+
+		s0 = cw;
+		s1 = kw0;
+		s2 = kw1;
+		s3 = iw;
+		for (i = 0; i < 10; i ++) {
+			/*
+			 * Even round is straightforward application on
+			 * the state words.
+			 */
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 16),
+				_mm_srli_epi32(s3, 16));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 12),
+				_mm_srli_epi32(s1, 20));
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 8),
+				_mm_srli_epi32(s3, 24));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 7),
+				_mm_srli_epi32(s1, 25));
+
+			/*
+			 * For the odd round, we must rotate some state
+			 * words so that the computations apply on the
+			 * right combinations of words.
+			 */
+			s1 = _mm_shuffle_epi32(s1, 0x39);
+			s2 = _mm_shuffle_epi32(s2, 0x4E);
+			s3 = _mm_shuffle_epi32(s3, 0x93);
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 16),
+				_mm_srli_epi32(s3, 16));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 12),
+				_mm_srli_epi32(s1, 20));
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 8),
+				_mm_srli_epi32(s3, 24));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 7),
+				_mm_srli_epi32(s1, 25));
+
+			/*
+			 * After the odd round, we rotate back the values
+			 * to undo the rotate at the start of the odd round.
+			 */
+			s1 = _mm_shuffle_epi32(s1, 0x93);
+			s2 = _mm_shuffle_epi32(s2, 0x4E);
+			s3 = _mm_shuffle_epi32(s3, 0x39);
+		}
+
+		/*
+		 * Addition with the initial state.
+		 */
+		s0 = _mm_add_epi32(s0, cw);
+		s1 = _mm_add_epi32(s1, kw0);
+		s2 = _mm_add_epi32(s2, kw1);
+		s3 = _mm_add_epi32(s3, iw);
+
+		/*
+		 * Increment block counter.
+		 */
+		iw = _mm_add_epi32(iw, one);
+
+		/*
+		 * XOR final state with the data.
+		 */
+		if (len < 64) {
+			unsigned char tmp[64];
+			size_t u;
+
+			_mm_storeu_si128((void *)(tmp +  0), s0);
+			_mm_storeu_si128((void *)(tmp + 16), s1);
+			_mm_storeu_si128((void *)(tmp + 32), s2);
+			_mm_storeu_si128((void *)(tmp + 48), s3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			break;
+		} else {
+			__m128i b0, b1, b2, b3;
+
+			b0 = _mm_loadu_si128((const void *)(buf +  0));
+			b1 = _mm_loadu_si128((const void *)(buf + 16));
+			b2 = _mm_loadu_si128((const void *)(buf + 32));
+			b3 = _mm_loadu_si128((const void *)(buf + 48));
+			b0 = _mm_xor_si128(b0, s0);
+			b1 = _mm_xor_si128(b1, s1);
+			b2 = _mm_xor_si128(b2, s2);
+			b3 = _mm_xor_si128(b3, s3);
+			_mm_storeu_si128((void *)(buf +  0), b0);
+			_mm_storeu_si128((void *)(buf + 16), b1);
+			_mm_storeu_si128((void *)(buf + 32), b2);
+			_mm_storeu_si128((void *)(buf + 48), b3);
+			buf += 64;
+			len -= 64;
+		}
+	}
+
+	/*
+	 * _mm_extract_epi32() requires SSE4.1. We prefer to stick to
+	 * raw SSE2, thus we use _mm_extract_epi16().
+	 */
+	return (uint32_t)_mm_extract_epi16(iw, 0)
+		| ((uint32_t)_mm_extract_epi16(iw, 1) << 16);
+}
+
+BR_TARGETS_X86_DOWN
+
+#else
+
+/* see bearssl_block.h */
+br_chacha20_run
+br_chacha20_sse2_get(void)
+{
+	return 0;
+}
+
+#endif
--- a/third_party/bearssl/src/config.h
+++ b/third_party/bearssl/src/config.h
@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CONFIG_H__
+#define CONFIG_H__
+
+/*
+ * This file contains compile-time flags that can override the
+ * autodetection performed in relevant files. Each flag is a macro; it
+ * deactivates the feature if defined to 0, activates it if defined to a
+ * non-zero integer (normally 1). If the macro is not defined, then
+ * autodetection applies.
+ */
+
+/*
+ * When BR_64 is enabled, 64-bit integer types are assumed to be
+ * efficient (i.e. the architecture has 64-bit registers and can
+ * do 64-bit operations as fast as 32-bit operations).
+ *
+#define BR_64   1
+ */
+
+/*
+ * When BR_LOMUL is enabled, then multiplications of 32-bit values whose
+ * result are truncated to the low 32 bits are assumed to be
+ * substantially more efficient than 32-bit multiplications that yield
+ * 64-bit results. This is typically the case on low-end ARM Cortex M
+ * systems (M0, M0+, M1, and arguably M3 and M4 as well).
+ *
+#define BR_LOMUL   1
+ */
+
+/*
+ * When BR_SLOW_MUL is enabled, multiplications are assumed to be
+ * substantially slow with regards to other integer operations, thus
+ * making it worth to make more operations for a given task if it allows
+ * using less multiplications.
+ *
+#define BR_SLOW_MUL   1
+ */
+
+/*
+ * When BR_SLOW_MUL15 is enabled, short multplications (on 15-bit words)
+ * are assumed to be substantially slow with regards to other integer
+ * operations, thus making it worth to make more integer operations if
+ * it allows using less multiplications.
+ *
+#define BR_SLOW_MUL15   1
+ */
+
+/*
+ * When BR_CT_MUL31 is enabled, multiplications of 31-bit values (used
+ * in the "i31" big integer implementation) use an alternate implementation
+ * which is slower and larger than the normal multiplication, but should
+ * ensure constant-time multiplications even on architectures where the
+ * multiplication opcode takes a variable number of cycles to complete.
+ *
+#define BR_CT_MUL31   1
+ */
+
+/*
+ * When BR_CT_MUL15 is enabled, multiplications of 15-bit values (held
+ * in 32-bit words) use an alternate implementation which is slower and
+ * larger than the normal multiplication, but should ensure
+ * constant-time multiplications on most/all architectures where the
+ * basic multiplication is not constant-time.
+#define BR_CT_MUL15   1
+ */
+
+/*
+ * When BR_NO_ARITH_SHIFT is enabled, arithmetic right shifts (with sign
+ * extension) are performed with a sequence of operations which is bigger
+ * and slower than a simple right shift on a signed value. This avoids
+ * relying on an implementation-defined behaviour. However, most if not
+ * all C compilers use sign extension for right shifts on signed values,
+ * so this alternate macro is disabled by default.
+#define BR_NO_ARITH_SHIFT   1
+ */
+
+/*
+ * When BR_RDRAND is enabled, the SSL engine will use the RDRAND opcode
+ * to automatically obtain quality randomness for seeding its internal
+ * PRNG. Since that opcode is present only in recent x86 CPU, its
+ * support is dynamically tested; if the current CPU does not support
+ * it, then another random source will be used, such as /dev/urandom or
+ * CryptGenRandom().
+ *
+#define BR_RDRAND   1
+ */
+
+/*
+ * When BR_USE_GETENTROPY is enabled, the SSL engine will use the
+ * getentropy() function to obtain quality randomness for seeding its
+ * internal PRNG. On Linux and FreeBSD, getentropy() is implemented by
+ * the standard library with the system call getrandom(); on OpenBSD,
+ * getentropy() is the system call, and there is no getrandom() wrapper,
+ * hence the use of the getentropy() function for maximum portability.
+ *
+ * If the getentropy() call fails, and BR_USE_URANDOM is not explicitly
+ * disabled, then /dev/urandom will be used as a fallback mechanism. On
+ * FreeBSD and OpenBSD, this does not change much, since /dev/urandom
+ * will block if not enough entropy has been obtained since last boot.
+ * On Linux, /dev/urandom might not block, which can be troublesome in
+ * early boot stages, which is why getentropy() is preferred.
+ *
+#define BR_USE_GETENTROPY   1
+ */
+
+/*
+ * When BR_USE_URANDOM is enabled, the SSL engine will use /dev/urandom
+ * to automatically obtain quality randomness for seeding its internal
+ * PRNG.
+ *
+#define BR_USE_URANDOM   1
+ */
+
+/*
+ * When BR_USE_WIN32_RAND is enabled, the SSL engine will use the Win32
+ * (CryptoAPI) functions (CryptAcquireContext(), CryptGenRandom()...) to
+ * automatically obtain quality randomness for seeding its internal PRNG.
+ *
+ * Note: if both BR_USE_URANDOM and BR_USE_WIN32_RAND are defined, the
+ * former takes precedence.
+ *
+#define BR_USE_WIN32_RAND   1
+ */
+
+/*
+ * When BR_USE_UNIX_TIME is enabled, the X.509 validation engine obtains
+ * the current time from the OS by calling time(), and assuming that the
+ * returned value (a 'time_t') is an integer that counts time in seconds
+ * since the Unix Epoch (Jan 1st, 1970, 00:00 UTC).
+ *
+#define BR_USE_UNIX_TIME   1
+ */
+
+/*
+ * When BR_USE_WIN32_TIME is enabled, the X.509 validation engine obtains
+ * the current time from the OS by calling the Win32 function
+ * GetSystemTimeAsFileTime().
+ *
+ * Note: if both BR_USE_UNIX_TIME and BR_USE_WIN32_TIME are defined, the
+ * former takes precedence.
+ *
+#define BR_USE_WIN32_TIME   1
+ */
+
+/*
+ * When BR_ARMEL_CORTEXM_GCC is enabled, some operations are replaced with
+ * inline assembly which is shorter and/or faster. This should be used
+ * only when all of the following are true:
+ *   - target architecture is ARM in Thumb mode
+ *   - target endianness is little-endian
+ *   - compiler is GCC (or GCC-compatible for inline assembly syntax)
+ *
+ * This is meant for the low-end cores (Cortex M0, M0+, M1, M3).
+ * Note: if BR_LOMUL is not explicitly enabled or disabled, then
+ * enabling BR_ARMEL_CORTEXM_GCC also enables BR_LOMUL.
+ *
+#define BR_ARMEL_CORTEXM_GCC   1
+ */
+
+/*
+ * When BR_AES_X86NI is enabled, the AES implementation using the x86 "NI"
+ * instructions (dedicated AES opcodes) will be compiled. If this is not
+ * enabled explicitly, then that AES implementation will be compiled only
+ * if a compatible compiler is detected. If set explicitly to 0, the
+ * implementation will not be compiled at all.
+ *
+#define BR_AES_X86NI   1
+ */
+
+/*
+ * When BR_SSE2 is enabled, SSE2 intrinsics will be used for some
+ * algorithm implementations that use them (e.g. chacha20_sse2). If this
+ * is not enabled explicitly, then support for SSE2 intrinsics will be
+ * automatically detected. If set explicitly to 0, then SSE2 code will
+ * not be compiled at all.
+ *
+#define BR_SSE2   1
+ */
+
+/*
+ * When BR_POWER8 is enabled, the AES implementation using the POWER ISA
+ * 2.07 opcodes (available on POWER8 processors and later) is compiled.
+ * If this is not enabled explicitly, then that implementation will be
+ * compiled only if a compatible compiler is detected, _and_ the target
+ * architecture is POWER8 or later.
+ *
+#define BR_POWER8   1
+ */
+
+/*
+ * When BR_INT128 is enabled, then code using the 'unsigned __int64'
+ * and 'unsigned __int128' types will be used to leverage 64x64->128
+ * unsigned multiplications. This should work with GCC and compatible
+ * compilers on 64-bit architectures.
+ *
+#define BR_INT128   1
+ */
+
+/*
+ * When BR_UMUL128 is enabled, then code using the '_umul128()' and
+ * '_addcarry_u64()' intrinsics will be used to implement 64x64->128
+ * unsigned multiplications. This should work on Visual C on x64 systems.
+ *
+#define BR_UMUL128   1
+ */
+
+/*
+ * When BR_LE_UNALIGNED is enabled, then the current architecture is
+ * assumed to use little-endian encoding for integers, and to tolerate
+ * unaligned accesses with no or minimal time penalty.
+ *
+#define BR_LE_UNALIGNED   1
+ */
+
+/*
+ * When BR_BE_UNALIGNED is enabled, then the current architecture is
+ * assumed to use big-endian encoding for integers, and to tolerate
+ * unaligned accesses with no or minimal time penalty.
+ *
+#define BR_BE_UNALIGNED   1
+ */
+
+#endif
--- a/third_party/bearssl/src/dec16be.c
+++ b/third_party/bearssl/src/dec16be.c
@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec16be(uint16_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec16be(buf);
+		buf += 2;
+	}
+}
--- a/third_party/bearssl/src/dec16le.c
+++ b/third_party/bearssl/src/dec16le.c
@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec16le(uint16_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec16le(buf);
+		buf += 2;
+	}
+}
--- a/third_party/bearssl/src/dec32be.c
+++ b/third_party/bearssl/src/dec32be.c
@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec32be(uint32_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec32be(buf);
+		buf += 4;
+	}
+}
--- a/third_party/bearssl/src/dec32le.c
+++ b/third_party/bearssl/src/dec32le.c
@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec32le(uint32_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec32le(buf);
+		buf += 4;
+	}
+}
--- a/third_party/bearssl/src/dec64be.c
+++ b/third_party/bearssl/src/dec64be.c
@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec64be(uint64_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec64be(buf);
+		buf += 8;
+	}
+}
--- a/third_party/bearssl/src/dec64le.c
+++ b/third_party/bearssl/src/dec64le.c
@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec64le(uint64_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec64le(buf);
+		buf += 8;
+	}
+}
--- a/third_party/bearssl/src/des_ct.c
+++ b/third_party/bearssl/src/des_ct.c
@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * During key schedule, we need to apply bit extraction PC-2 then permute
+ * things into our bitslice representation. PC-2 extracts 48 bits out
+ * of two 28-bit words (kl and kr), and we store these bits into two
+ * 32-bit words sk0 and sk1.
+ *
+ *  -- bit 16+x of sk0 comes from bit QL0[x] of kl
+ *  -- bit x of sk0 comes from bit QR0[x] of kr
+ *  -- bit 16+x of sk1 comes from bit QL1[x] of kl
+ *  -- bit x of sk1 comes from bit QR1[x] of kr
+ */
+
+static const unsigned char QL0[] = {
+	17,  4, 27, 23, 13, 22,  7, 18,
+	16, 24,  2, 20,  1,  8, 15, 26
+};
+
+static const unsigned char QR0[] = {
+	25, 19,  9,  1,  5, 11, 23,  8,
+	17,  0, 22,  3,  6, 20, 27, 24
+};
+
+static const unsigned char QL1[] = {
+	28, 28, 14, 11, 28, 28, 25,  0,
+	28, 28,  5,  9, 28, 28, 12, 21
+};
+
+static const unsigned char QR1[] = {
+	28, 28, 15,  4, 28, 28, 26, 16,
+	28, 28, 12,  7, 28, 28, 10, 14
+};
+
+/*
+ * 32-bit rotation. The C compiler is supposed to recognize it as a
+ * rotation and use the local architecture rotation opcode (if available).
+ */
+static inline uint32_t
+rotl(uint32_t x, int n)
+{
+	return (x << n) | (x >> (32 - n));
+}
+
+/*
+ * Compute key schedule for 8 key bytes (produces 32 subkey words).
+ */
+static void
+keysched_unit(uint32_t *skey, const void *key)
+{
+	int i;
+
+	br_des_keysched_unit(skey, key);
+
+	/*
+	 * Apply PC-2 + bitslicing.
+	 */
+	for (i = 0; i < 16; i ++) {
+		uint32_t kl, kr, sk0, sk1;
+		int j;
+
+		kl = skey[(i << 1) + 0];
+		kr = skey[(i << 1) + 1];
+		sk0 = 0;
+		sk1 = 0;
+		for (j = 0; j < 16; j ++) {
+			sk0 <<= 1;
+			sk1 <<= 1;
+			sk0 |= ((kl >> QL0[j]) & (uint32_t)1) << 16;
+			sk0 |= (kr >> QR0[j]) & (uint32_t)1;
+			sk1 |= ((kl >> QL1[j]) & (uint32_t)1) << 16;
+			sk1 |= (kr >> QR1[j]) & (uint32_t)1;
+		}
+
+		skey[(i << 1) + 0] = sk0;
+		skey[(i << 1) + 1] = sk1;
+	}
+
+#if 0
+		/*
+		 * Speed-optimized version for PC-2 + bitslicing.
+		 * (Unused. Kept for reference only.)
+		 */
+		sk0 = kl & (uint32_t)0x00100000;
+		sk0 |= (kl & (uint32_t)0x08008000) << 2;
+		sk0 |= (kl & (uint32_t)0x00400000) << 4;
+		sk0 |= (kl & (uint32_t)0x00800000) << 5;
+		sk0 |= (kl & (uint32_t)0x00040000) << 6;
+		sk0 |= (kl & (uint32_t)0x00010000) << 7;
+		sk0 |= (kl & (uint32_t)0x00000100) << 10;
+		sk0 |= (kl & (uint32_t)0x00022000) << 14;
+		sk0 |= (kl & (uint32_t)0x00000082) << 18;
+		sk0 |= (kl & (uint32_t)0x00000004) << 19;
+		sk0 |= (kl & (uint32_t)0x04000000) >> 10;
+		sk0 |= (kl & (uint32_t)0x00000010) << 26;
+		sk0 |= (kl & (uint32_t)0x01000000) >> 2;
+
+		sk0 |= kr & (uint32_t)0x00000100;
+		sk0 |= (kr & (uint32_t)0x00000008) << 1;
+		sk0 |= (kr & (uint32_t)0x00000200) << 4;
+		sk0 |= rotl(kr & (uint32_t)0x08000021, 6);
+		sk0 |= (kr & (uint32_t)0x01000000) >> 24;
+		sk0 |= (kr & (uint32_t)0x00000002) << 11;
+		sk0 |= (kr & (uint32_t)0x00100000) >> 18;
+		sk0 |= (kr & (uint32_t)0x00400000) >> 17;
+		sk0 |= (kr & (uint32_t)0x00800000) >> 14;
+		sk0 |= (kr & (uint32_t)0x02020000) >> 10;
+		sk0 |= (kr & (uint32_t)0x00080000) >> 5;
+		sk0 |= (kr & (uint32_t)0x00000040) >> 3;
+		sk0 |= (kr & (uint32_t)0x00000800) >> 1;
+
+		sk1 = kl & (uint32_t)0x02000000;
+		sk1 |= (kl & (uint32_t)0x00001000) << 5;
+		sk1 |= (kl & (uint32_t)0x00000200) << 11;
+		sk1 |= (kl & (uint32_t)0x00004000) << 15;
+		sk1 |= (kl & (uint32_t)0x00000020) << 16;
+		sk1 |= (kl & (uint32_t)0x00000800) << 17;
+		sk1 |= (kl & (uint32_t)0x00000001) << 24;
+		sk1 |= (kl & (uint32_t)0x00200000) >> 5;
+
+		sk1 |= (kr & (uint32_t)0x00000010) << 8;
+		sk1 |= (kr & (uint32_t)0x04000000) >> 17;
+		sk1 |= (kr & (uint32_t)0x00004000) >> 14;
+		sk1 |= (kr & (uint32_t)0x00000400) >> 9;
+		sk1 |= (kr & (uint32_t)0x00010000) >> 8;
+		sk1 |= (kr & (uint32_t)0x00001000) >> 7;
+		sk1 |= (kr & (uint32_t)0x00000080) >> 3;
+		sk1 |= (kr & (uint32_t)0x00008000) >> 2;
+#endif
+}
+
+/* see inner.h */
+unsigned
+br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	switch (key_len) {
+	case 8:
+		keysched_unit(skey, key);
+		return 1;
+	case 16:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		memcpy(skey + 64, skey, 32 * sizeof *skey);
+		return 3;
+	default:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		keysched_unit(skey + 64, (const unsigned char *)key + 16);
+		return 3;
+	}
+}
+
+/*
+ * DES confusion function. This function performs expansion E (32 to
+ * 48 bits), XOR with subkey, S-boxes, and permutation P.
+ */
+static inline uint32_t
+Fconf(uint32_t r0, const uint32_t *sk)
+{
+	/*
+	 * Each 6->4 S-box is virtually turned into four 6->1 boxes; we
+	 * thus end up with 32 boxes that we call "T-boxes" here. We will
+	 * evaluate them with bitslice code.
+	 *
+	 * Each T-box is a circuit of multiplexers (sort of) and thus
+	 * takes 70 inputs: the 6 actual T-box inputs, and 64 constants
+	 * that describe the T-box output for all combinations of the
+	 * 6 inputs. With this model, all T-boxes are identical (with
+	 * distinct inputs) and thus can be executed in parallel with
+	 * bitslice code.
+	 *
+	 * T-boxes are numbered from 0 to 31, in least-to-most
+	 * significant order. Thus, S-box S1 corresponds to T-boxes 31,
+	 * 30, 29 and 28, in that order. T-box 'n' is computed with the
+	 * bits at rank 'n' in the 32-bit words.
+	 *
+	 * Words x0 to x5 contain the T-box inputs 0 to 5.
+	 */
+	uint32_t x0, x1, x2, x3, x4, x5, z0;
+	uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint32_t y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
+	uint32_t y30;
+
+	/*
+	 * Spread input bits over the 6 input words x*.
+	 */
+	x1 = r0 & (uint32_t)0x11111111;
+	x2 = (r0 >> 1) & (uint32_t)0x11111111;
+	x3 = (r0 >> 2) & (uint32_t)0x11111111;
+	x4 = (r0 >> 3) & (uint32_t)0x11111111;
+	x1 = (x1 << 4) - x1;
+	x2 = (x2 << 4) - x2;
+	x3 = (x3 << 4) - x3;
+	x4 = (x4 << 4) - x4;
+	x0 = (x4 << 4) | (x4 >> 28);
+	x5 = (x1 >> 4) | (x1 << 28);
+
+	/*
+	 * XOR with the subkey for this round.
+	 */
+	x0 ^= sk[0];
+	x1 ^= sk[1];
+	x2 ^= sk[2];
+	x3 ^= sk[3];
+	x4 ^= sk[4];
+	x5 ^= sk[5];
+
+	/*
+	 * The T-boxes are done in parallel, since they all use a
+	 * "tree of multiplexer". We use "fake multiplexers":
+	 *
+	 *   y = a ^ (x & b)
+	 *
+	 * computes y as either 'a' (if x == 0) or 'a ^ b' (if x == 1).
+	 */
+	y0 = (uint32_t)0xEFA72C4D ^ (x0 & (uint32_t)0xEC7AC69C);
+	y1 = (uint32_t)0xAEAAEDFF ^ (x0 & (uint32_t)0x500FB821);
+	y2 = (uint32_t)0x37396665 ^ (x0 & (uint32_t)0x40EFA809);
+	y3 = (uint32_t)0x68D7B833 ^ (x0 & (uint32_t)0xA5EC0B28);
+	y4 = (uint32_t)0xC9C755BB ^ (x0 & (uint32_t)0x252CF820);
+	y5 = (uint32_t)0x73FC3606 ^ (x0 & (uint32_t)0x40205801);
+	y6 = (uint32_t)0xA2A0A918 ^ (x0 & (uint32_t)0xE220F929);
+	y7 = (uint32_t)0x8222BD90 ^ (x0 & (uint32_t)0x44A3F9E1);
+	y8 = (uint32_t)0xD6B6AC77 ^ (x0 & (uint32_t)0x794F104A);
+	y9 = (uint32_t)0x3069300C ^ (x0 & (uint32_t)0x026F320B);
+	y10 = (uint32_t)0x6CE0D5CC ^ (x0 & (uint32_t)0x7640B01A);
+	y11 = (uint32_t)0x59A9A22D ^ (x0 & (uint32_t)0x238F1572);
+	y12 = (uint32_t)0xAC6D0BD4 ^ (x0 & (uint32_t)0x7A63C083);
+	y13 = (uint32_t)0x21C83200 ^ (x0 & (uint32_t)0x11CCA000);
+	y14 = (uint32_t)0xA0E62188 ^ (x0 & (uint32_t)0x202F69AA);
+	/* y15 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
+	y16 = (uint32_t)0xAF7D655A ^ (x0 & (uint32_t)0x51B33BE9);
+	y17 = (uint32_t)0xF0168AA3 ^ (x0 & (uint32_t)0x3B0FE8AE);
+	y18 = (uint32_t)0x90AA30C6 ^ (x0 & (uint32_t)0x90BF8816);
+	y19 = (uint32_t)0x5AB2750A ^ (x0 & (uint32_t)0x09E34F9B);
+	y20 = (uint32_t)0x5391BE65 ^ (x0 & (uint32_t)0x0103BE88);
+	y21 = (uint32_t)0x93372BAF ^ (x0 & (uint32_t)0x49AC8E25);
+	y22 = (uint32_t)0xF288210C ^ (x0 & (uint32_t)0x922C313D);
+	y23 = (uint32_t)0x920AF5C0 ^ (x0 & (uint32_t)0x70EF31B0);
+	y24 = (uint32_t)0x63D312C0 ^ (x0 & (uint32_t)0x6A707100);
+	y25 = (uint32_t)0x537B3006 ^ (x0 & (uint32_t)0xB97C9011);
+	y26 = (uint32_t)0xA2EFB0A5 ^ (x0 & (uint32_t)0xA320C959);
+	y27 = (uint32_t)0xBC8F96A5 ^ (x0 & (uint32_t)0x6EA0AB4A);
+	y28 = (uint32_t)0xFAD176A5 ^ (x0 & (uint32_t)0x6953DDF8);
+	y29 = (uint32_t)0x665A14A3 ^ (x0 & (uint32_t)0xF74F3E2B);
+	y30 = (uint32_t)0xF2EFF0CC ^ (x0 & (uint32_t)0xF0306CAD);
+	/* y31 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
+
+	y0 = y0 ^ (x1 & y1);
+	y1 = y2 ^ (x1 & y3);
+	y2 = y4 ^ (x1 & y5);
+	y3 = y6 ^ (x1 & y7);
+	y4 = y8 ^ (x1 & y9);
+	y5 = y10 ^ (x1 & y11);
+	y6 = y12 ^ (x1 & y13);
+	y7 = y14; /* was: y14 ^ (x1 & y15) */
+	y8 = y16 ^ (x1 & y17);
+	y9 = y18 ^ (x1 & y19);
+	y10 = y20 ^ (x1 & y21);
+	y11 = y22 ^ (x1 & y23);
+	y12 = y24 ^ (x1 & y25);
+	y13 = y26 ^ (x1 & y27);
+	y14 = y28 ^ (x1 & y29);
+	y15 = y30; /* was: y30 ^ (x1 & y31) */
+
+	y0 = y0 ^ (x2 & y1);
+	y1 = y2 ^ (x2 & y3);
+	y2 = y4 ^ (x2 & y5);
+	y3 = y6 ^ (x2 & y7);
+	y4 = y8 ^ (x2 & y9);
+	y5 = y10 ^ (x2 & y11);
+	y6 = y12 ^ (x2 & y13);
+	y7 = y14 ^ (x2 & y15);
+
+	y0 = y0 ^ (x3 & y1);
+	y1 = y2 ^ (x3 & y3);
+	y2 = y4 ^ (x3 & y5);
+	y3 = y6 ^ (x3 & y7);
+
+	y0 = y0 ^ (x4 & y1);
+	y1 = y2 ^ (x4 & y3);
+
+	y0 = y0 ^ (x5 & y1);
+
+	/*
+	 * The P permutation:
+	 * -- Each bit move is converted into a mask + left rotation.
+	 * -- Rotations that use the same movement are coalesced together.
+	 * -- Left and right shifts are used as alternatives to a rotation
+	 * where appropriate (this will help architectures that do not have
+	 * a rotation opcode).
+	 */
+	z0 = (y0 & (uint32_t)0x00000004) << 3;
+	z0 |= (y0 & (uint32_t)0x00004000) << 4;
+	z0 |= rotl(y0 & 0x12020120, 5);
+	z0 |= (y0 & (uint32_t)0x00100000) << 6;
+	z0 |= (y0 & (uint32_t)0x00008000) << 9;
+	z0 |= (y0 & (uint32_t)0x04000000) >> 22;
+	z0 |= (y0 & (uint32_t)0x00000001) << 11;
+	z0 |= rotl(y0 & 0x20000200, 12);
+	z0 |= (y0 & (uint32_t)0x00200000) >> 19;
+	z0 |= (y0 & (uint32_t)0x00000040) << 14;
+	z0 |= (y0 & (uint32_t)0x00010000) << 15;
+	z0 |= (y0 & (uint32_t)0x00000002) << 16;
+	z0 |= rotl(y0 & 0x40801800, 17);
+	z0 |= (y0 & (uint32_t)0x00080000) >> 13;
+	z0 |= (y0 & (uint32_t)0x00000010) << 21;
+	z0 |= (y0 & (uint32_t)0x01000000) >> 10;
+	z0 |= rotl(y0 & 0x88000008, 24);
+	z0 |= (y0 & (uint32_t)0x00000480) >> 7;
+	z0 |= (y0 & (uint32_t)0x00442000) >> 6;
+	return z0;
+}
+
+/*
+ * Process one block through 16 successive rounds, omitting the swap
+ * in the final round.
+ */
+static void
+process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *sk_exp)
+{
+	int i;
+	uint32_t l, r;
+
+	l = *pl;
+	r = *pr;
+	for (i = 0; i < 16; i ++) {
+		uint32_t t;
+
+		t = l ^ Fconf(r, sk_exp);
+		l = r;
+		r = t;
+		sk_exp += 6;
+	}
+	*pl = r;
+	*pr = l;
+}
+
+/* see inner.h */
+void
+br_des_ct_process_block(unsigned num_rounds,
+	const uint32_t *sk_exp, void *block)
+{
+	unsigned char *buf;
+	uint32_t l, r;
+
+	buf = block;
+	l = br_dec32be(buf);
+	r = br_dec32be(buf + 4);
+	br_des_do_IP(&l, &r);
+	while (num_rounds -- > 0) {
+		process_block_unit(&l, &r, sk_exp);
+		sk_exp += 96;
+	}
+	br_des_do_invIP(&l, &r);
+	br_enc32be(buf, l);
+	br_enc32be(buf + 4, r);
+}
+
+/* see inner.h */
+void
+br_des_ct_skey_expand(uint32_t *sk_exp,
+	unsigned num_rounds, const uint32_t *skey)
+{
+	num_rounds <<= 4;
+	while (num_rounds -- > 0) {
+		uint32_t v, w0, w1, w2, w3;
+
+		v = *skey ++;
+		w0 = v & 0x11111111;
+		w1 = (v >> 1) & 0x11111111;
+		w2 = (v >> 2) & 0x11111111;
+		w3 = (v >> 3) & 0x11111111;
+		*sk_exp ++ = (w0 << 4) - w0;
+		*sk_exp ++ = (w1 << 4) - w1;
+		*sk_exp ++ = (w2 << 4) - w2;
+		*sk_exp ++ = (w3 << 4) - w3;
+		v = *skey ++;
+		w0 = v & 0x11111111;
+		w1 = (v >> 1) & 0x11111111;
+		*sk_exp ++ = (w0 << 4) - w0;
+		*sk_exp ++ = (w1 << 4) - w1;
+	}
+}
--- a/third_party/bearssl/src/des_ct_cbcdec.c
+++ b/third_party/bearssl/src/des_ct_cbcdec.c
@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcdec_init(br_des_ct_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_ct_cbcdec_vtable;
+	ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
+	if (len == 8) {
+		br_des_rev_skey(ctx->skey);
+	} else {
+		int i;
+
+		for (i = 0; i < 48; i += 2) {
+			uint32_t t;
+
+			t = ctx->skey[i];
+			ctx->skey[i] = ctx->skey[94 - i];
+			ctx->skey[94 - i] = t;
+			t = ctx->skey[i + 1];
+			ctx->skey[i + 1] = ctx->skey[95 - i];
+			ctx->skey[95 - i] = t;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcdec_run(const br_des_ct_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t sk_exp[288];
+
+	br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[8];
+		int i;
+
+		memcpy(tmp, buf, 8);
+		br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_des_ct_cbcdec_vtable = {
+	sizeof(br_des_ct_cbcdec_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_des_ct_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_des_ct_cbcdec_run
+};
--- a/third_party/bearssl/src/des_ct_cbcenc.c
+++ b/third_party/bearssl/src/des_ct_cbcenc.c
@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcenc_init(br_des_ct_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_ct_cbcenc_vtable;
+	ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcenc_run(const br_des_ct_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t sk_exp[288];
+
+	br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
+		memcpy(ivbuf, buf, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_des_ct_cbcenc_vtable = {
+	sizeof(br_des_ct_cbcenc_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_des_ct_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_des_ct_cbcenc_run
+};
--- a/third_party/bearssl/src/des_support.c
+++ b/third_party/bearssl/src/des_support.c
@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_des_do_IP(uint32_t *xl, uint32_t *xr)
+{
+	/*
+	 * Permutation algorithm is initially from Richard Outerbridge;
+	 * implementation here is adapted from Crypto++ "des.cpp" file
+	 * (which is in public domain).
+	 */
+	uint32_t l, r, t;
+
+	l = *xl;
+	r = *xr;
+	t = ((l >>  4) ^ r) & (uint32_t)0x0F0F0F0F;
+	r ^= t;
+	l ^= t <<  4;
+	t = ((l >> 16) ^ r) & (uint32_t)0x0000FFFF;
+	r ^= t;
+	l ^= t << 16;
+	t = ((r >>  2) ^ l) & (uint32_t)0x33333333;
+	l ^= t;
+	r ^= t <<  2;
+	t = ((r >>  8) ^ l) & (uint32_t)0x00FF00FF;
+	l ^= t;
+	r ^= t <<  8;
+	t = ((l >>  1) ^ r) & (uint32_t)0x55555555;
+	r ^= t;
+	l ^= t <<  1;
+	*xl = l;
+	*xr = r;
+}
+
+/* see inner.h */
+void
+br_des_do_invIP(uint32_t *xl, uint32_t *xr)
+{
+	/*
+	 * See br_des_do_IP().
+	 */
+	uint32_t l, r, t;
+
+	l = *xl;
+	r = *xr;
+	t = ((l >>  1) ^ r) & 0x55555555;
+	r ^= t;
+	l ^= t <<  1;
+	t = ((r >>  8) ^ l) & 0x00FF00FF;
+	l ^= t;
+	r ^= t <<  8;
+	t = ((r >>  2) ^ l) & 0x33333333;
+	l ^= t;
+	r ^= t <<  2;
+	t = ((l >> 16) ^ r) & 0x0000FFFF;
+	r ^= t;
+	l ^= t << 16;
+	t = ((l >>  4) ^ r) & 0x0F0F0F0F;
+	r ^= t;
+	l ^= t <<  4;
+	*xl = l;
+	*xr = r;
+}
+
+/* see inner.h */
+void
+br_des_keysched_unit(uint32_t *skey, const void *key)
+{
+	uint32_t xl, xr, kl, kr;
+	int i;
+
+	xl = br_dec32be(key);
+	xr = br_dec32be((const unsigned char *)key + 4);
+
+	/*
+	 * Permutation PC-1 is quite similar to the IP permutation.
+	 * Definition of IP (in FIPS 46-3 notations) is:
+	 *   58 50 42 34 26 18 10 2
+	 *   60 52 44 36 28 20 12 4
+	 *   62 54 46 38 30 22 14 6
+	 *   64 56 48 40 32 24 16 8
+	 *   57 49 41 33 25 17  9 1
+	 *   59 51 43 35 27 19 11 3
+	 *   61 53 45 37 29 21 13 5
+	 *   63 55 47 39 31 23 15 7
+	 *
+	 * Definition of PC-1 is:
+	 *   57 49 41 33 25 17  9 1
+	 *   58 50 42 34 26 18 10 2
+	 *   59 51 43 35 27 19 11 3
+	 *   60 52 44 36
+	 *   63 55 47 39 31 23 15 7
+	 *   62 54 46 38 30 22 14 6
+	 *   61 53 45 37 29 21 13 5
+	 *   28 20 12  4
+	 */
+	br_des_do_IP(&xl, &xr);
+	kl = ((xr & (uint32_t)0xFF000000) >> 4)
+		| ((xl & (uint32_t)0xFF000000) >> 12)
+		| ((xr & (uint32_t)0x00FF0000) >> 12)
+		| ((xl & (uint32_t)0x00FF0000) >> 20);
+	kr = ((xr & (uint32_t)0x000000FF) << 20)
+		| ((xl & (uint32_t)0x0000FF00) << 4)
+		| ((xr & (uint32_t)0x0000FF00) >> 4)
+		| ((xl & (uint32_t)0x000F0000) >> 16);
+
+	/*
+	 * For each round, rotate the two 28-bit words kl and kr.
+	 * The extraction of the 48-bit subkey (PC-2) is not done yet.
+	 */
+	for (i = 0; i < 16; i ++) {
+		if ((1 << i) & 0x8103) {
+			kl = (kl << 1) | (kl >> 27);
+			kr = (kr << 1) | (kr >> 27);
+		} else {
+			kl = (kl << 2) | (kl >> 26);
+			kr = (kr << 2) | (kr >> 26);
+		}
+		kl &= (uint32_t)0x0FFFFFFF;
+		kr &= (uint32_t)0x0FFFFFFF;
+		skey[(i << 1) + 0] = kl;
+		skey[(i << 1) + 1] = kr;
+	}
+}
+
+/* see inner.h */
+void
+br_des_rev_skey(uint32_t *skey)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 2) {
+		uint32_t t;
+
+		t = skey[i + 0];
+		skey[i + 0] = skey[30 - i];
+		skey[30 - i] = t;
+		t = skey[i + 1];
+		skey[i + 1] = skey[31 - i];
+		skey[31 - i] = t;
+	}
+}
--- a/third_party/bearssl/src/des_tab.c
+++ b/third_party/bearssl/src/des_tab.c
@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * PC2left[x] tells where bit x goes when applying PC-2. 'x' is a bit
+ * position in the left rotated key word. Both position are in normal
+ * order (rightmost bit is 0).
+ */
+static const unsigned char PC2left[] = {
+	16,  3,  7, 24, 20, 11, 24,
+	13,  2, 10, 24, 22,  5, 15,
+	23,  1,  9, 21, 12, 24,  6,
+	 4, 14, 18,  8, 17,  0, 19
+};
+
+/*
+ * Similar to PC2left[x], for the right rotated key word.
+ */
+static const unsigned char PC2right[] = {
+	 8, 18, 24,  6, 22, 15,  3,
+	10, 12, 19,  5, 14, 11, 24,
+	 4, 23, 16,  9, 24, 20,  2,
+	24,  7, 13,  0, 21, 17,  1
+};
+
+/*
+ * S-boxes and PC-1 merged.
+ */
+static const uint32_t S1[] = {
+	0x00808200, 0x00000000, 0x00008000, 0x00808202,
+	0x00808002, 0x00008202, 0x00000002, 0x00008000,
+	0x00000200, 0x00808200, 0x00808202, 0x00000200,
+	0x00800202, 0x00808002, 0x00800000, 0x00000002,
+	0x00000202, 0x00800200, 0x00800200, 0x00008200,
+	0x00008200, 0x00808000, 0x00808000, 0x00800202,
+	0x00008002, 0x00800002, 0x00800002, 0x00008002,
+	0x00000000, 0x00000202, 0x00008202, 0x00800000,
+	0x00008000, 0x00808202, 0x00000002, 0x00808000,
+	0x00808200, 0x00800000, 0x00800000, 0x00000200,
+	0x00808002, 0x00008000, 0x00008200, 0x00800002,
+	0x00000200, 0x00000002, 0x00800202, 0x00008202,
+	0x00808202, 0x00008002, 0x00808000, 0x00800202,
+	0x00800002, 0x00000202, 0x00008202, 0x00808200,
+	0x00000202, 0x00800200, 0x00800200, 0x00000000,
+	0x00008002, 0x00008200, 0x00000000, 0x00808002
+};
+
+static const uint32_t S2[] = {
+	0x40084010, 0x40004000, 0x00004000, 0x00084010,
+	0x00080000, 0x00000010, 0x40080010, 0x40004010,
+	0x40000010, 0x40084010, 0x40084000, 0x40000000,
+	0x40004000, 0x00080000, 0x00000010, 0x40080010,
+	0x00084000, 0x00080010, 0x40004010, 0x00000000,
+	0x40000000, 0x00004000, 0x00084010, 0x40080000,
+	0x00080010, 0x40000010, 0x00000000, 0x00084000,
+	0x00004010, 0x40084000, 0x40080000, 0x00004010,
+	0x00000000, 0x00084010, 0x40080010, 0x00080000,
+	0x40004010, 0x40080000, 0x40084000, 0x00004000,
+	0x40080000, 0x40004000, 0x00000010, 0x40084010,
+	0x00084010, 0x00000010, 0x00004000, 0x40000000,
+	0x00004010, 0x40084000, 0x00080000, 0x40000010,
+	0x00080010, 0x40004010, 0x40000010, 0x00080010,
+	0x00084000, 0x00000000, 0x40004000, 0x00004010,
+	0x40000000, 0x40080010, 0x40084010, 0x00084000
+};
+
+static const uint32_t S3[] = {
+	0x00000104, 0x04010100, 0x00000000, 0x04010004,
+	0x04000100, 0x00000000, 0x00010104, 0x04000100,
+	0x00010004, 0x04000004, 0x04000004, 0x00010000,
+	0x04010104, 0x00010004, 0x04010000, 0x00000104,
+	0x04000000, 0x00000004, 0x04010100, 0x00000100,
+	0x00010100, 0x04010000, 0x04010004, 0x00010104,
+	0x04000104, 0x00010100, 0x00010000, 0x04000104,
+	0x00000004, 0x04010104, 0x00000100, 0x04000000,
+	0x04010100, 0x04000000, 0x00010004, 0x00000104,
+	0x00010000, 0x04010100, 0x04000100, 0x00000000,
+	0x00000100, 0x00010004, 0x04010104, 0x04000100,
+	0x04000004, 0x00000100, 0x00000000, 0x04010004,
+	0x04000104, 0x00010000, 0x04000000, 0x04010104,
+	0x00000004, 0x00010104, 0x00010100, 0x04000004,
+	0x04010000, 0x04000104, 0x00000104, 0x04010000,
+	0x00010104, 0x00000004, 0x04010004, 0x00010100
+};
+
+static const uint32_t S4[] = {
+	0x80401000, 0x80001040, 0x80001040, 0x00000040,
+	0x00401040, 0x80400040, 0x80400000, 0x80001000,
+	0x00000000, 0x00401000, 0x00401000, 0x80401040,
+	0x80000040, 0x00000000, 0x00400040, 0x80400000,
+	0x80000000, 0x00001000, 0x00400000, 0x80401000,
+	0x00000040, 0x00400000, 0x80001000, 0x00001040,
+	0x80400040, 0x80000000, 0x00001040, 0x00400040,
+	0x00001000, 0x00401040, 0x80401040, 0x80000040,
+	0x00400040, 0x80400000, 0x00401000, 0x80401040,
+	0x80000040, 0x00000000, 0x00000000, 0x00401000,
+	0x00001040, 0x00400040, 0x80400040, 0x80000000,
+	0x80401000, 0x80001040, 0x80001040, 0x00000040,
+	0x80401040, 0x80000040, 0x80000000, 0x00001000,
+	0x80400000, 0x80001000, 0x00401040, 0x80400040,
+	0x80001000, 0x00001040, 0x00400000, 0x80401000,
+	0x00000040, 0x00400000, 0x00001000, 0x00401040
+};
+
+static const uint32_t S5[] = {
+	0x00000080, 0x01040080, 0x01040000, 0x21000080,
+	0x00040000, 0x00000080, 0x20000000, 0x01040000,
+	0x20040080, 0x00040000, 0x01000080, 0x20040080,
+	0x21000080, 0x21040000, 0x00040080, 0x20000000,
+	0x01000000, 0x20040000, 0x20040000, 0x00000000,
+	0x20000080, 0x21040080, 0x21040080, 0x01000080,
+	0x21040000, 0x20000080, 0x00000000, 0x21000000,
+	0x01040080, 0x01000000, 0x21000000, 0x00040080,
+	0x00040000, 0x21000080, 0x00000080, 0x01000000,
+	0x20000000, 0x01040000, 0x21000080, 0x20040080,
+	0x01000080, 0x20000000, 0x21040000, 0x01040080,
+	0x20040080, 0x00000080, 0x01000000, 0x21040000,
+	0x21040080, 0x00040080, 0x21000000, 0x21040080,
+	0x01040000, 0x00000000, 0x20040000, 0x21000000,
+	0x00040080, 0x01000080, 0x20000080, 0x00040000,
+	0x00000000, 0x20040000, 0x01040080, 0x20000080
+};
+
+static const uint32_t S6[] = {
+	0x10000008, 0x10200000, 0x00002000, 0x10202008,
+	0x10200000, 0x00000008, 0x10202008, 0x00200000,
+	0x10002000, 0x00202008, 0x00200000, 0x10000008,
+	0x00200008, 0x10002000, 0x10000000, 0x00002008,
+	0x00000000, 0x00200008, 0x10002008, 0x00002000,
+	0x00202000, 0x10002008, 0x00000008, 0x10200008,
+	0x10200008, 0x00000000, 0x00202008, 0x10202000,
+	0x00002008, 0x00202000, 0x10202000, 0x10000000,
+	0x10002000, 0x00000008, 0x10200008, 0x00202000,
+	0x10202008, 0x00200000, 0x00002008, 0x10000008,
+	0x00200000, 0x10002000, 0x10000000, 0x00002008,
+	0x10000008, 0x10202008, 0x00202000, 0x10200000,
+	0x00202008, 0x10202000, 0x00000000, 0x10200008,
+	0x00000008, 0x00002000, 0x10200000, 0x00202008,
+	0x00002000, 0x00200008, 0x10002008, 0x00000000,
+	0x10202000, 0x10000000, 0x00200008, 0x10002008
+};
+
+static const uint32_t S7[] = {
+	0x00100000, 0x02100001, 0x02000401, 0x00000000,
+	0x00000400, 0x02000401, 0x00100401, 0x02100400,
+	0x02100401, 0x00100000, 0x00000000, 0x02000001,
+	0x00000001, 0x02000000, 0x02100001, 0x00000401,
+	0x02000400, 0x00100401, 0x00100001, 0x02000400,
+	0x02000001, 0x02100000, 0x02100400, 0x00100001,
+	0x02100000, 0x00000400, 0x00000401, 0x02100401,
+	0x00100400, 0x00000001, 0x02000000, 0x00100400,
+	0x02000000, 0x00100400, 0x00100000, 0x02000401,
+	0x02000401, 0x02100001, 0x02100001, 0x00000001,
+	0x00100001, 0x02000000, 0x02000400, 0x00100000,
+	0x02100400, 0x00000401, 0x00100401, 0x02100400,
+	0x00000401, 0x02000001, 0x02100401, 0x02100000,
+	0x00100400, 0x00000000, 0x00000001, 0x02100401,
+	0x00000000, 0x00100401, 0x02100000, 0x00000400,
+	0x02000001, 0x02000400, 0x00000400, 0x00100001
+};
+
+static const uint32_t S8[] = {
+	0x08000820, 0x00000800, 0x00020000, 0x08020820,
+	0x08000000, 0x08000820, 0x00000020, 0x08000000,
+	0x00020020, 0x08020000, 0x08020820, 0x00020800,
+	0x08020800, 0x00020820, 0x00000800, 0x00000020,
+	0x08020000, 0x08000020, 0x08000800, 0x00000820,
+	0x00020800, 0x00020020, 0x08020020, 0x08020800,
+	0x00000820, 0x00000000, 0x00000000, 0x08020020,
+	0x08000020, 0x08000800, 0x00020820, 0x00020000,
+	0x00020820, 0x00020000, 0x08020800, 0x00000800,
+	0x00000020, 0x08020020, 0x00000800, 0x00020820,
+	0x08000800, 0x00000020, 0x08000020, 0x08020000,
+	0x08020020, 0x08000000, 0x00020000, 0x08000820,
+	0x00000000, 0x08020820, 0x00020020, 0x08000020,
+	0x08020000, 0x08000800, 0x08000820, 0x00000000,
+	0x08020820, 0x00020800, 0x00020800, 0x00000820,
+	0x00000820, 0x00020020, 0x08000000, 0x08020800
+};
+
+static inline uint32_t
+Fconf(uint32_t r0, uint32_t skl, uint32_t skr)
+{
+	uint32_t r1;
+
+	r1 = (r0 << 16) | (r0 >> 16);
+	return
+		  S1[((r1 >> 11) ^ (skl >> 18)) & 0x3F]
+		| S2[((r0 >> 23) ^ (skl >> 12)) & 0x3F]
+		| S3[((r0 >> 19) ^ (skl >>  6)) & 0x3F]
+		| S4[((r0 >> 15) ^ (skl      )) & 0x3F]
+		| S5[((r0 >> 11) ^ (skr >> 18)) & 0x3F]
+		| S6[((r0 >>  7) ^ (skr >> 12)) & 0x3F]
+		| S7[((r0 >>  3) ^ (skr >>  6)) & 0x3F]
+		| S8[((r1 >> 15) ^ (skr      )) & 0x3F];
+}
+
+static void
+process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *skey)
+{
+	int i;
+	uint32_t l, r;
+
+	l = *pl;
+	r = *pr;
+	for (i = 0; i < 16; i ++) {
+		uint32_t t;
+
+		t = l ^ Fconf(r, skey[(i << 1) + 0], skey[(i << 1) + 1]);
+		l = r;
+		r = t;
+	}
+	*pl = r;
+	*pr = l;
+}
+
+/* see inner.h */
+void
+br_des_tab_process_block(unsigned num_rounds, const uint32_t *skey, void *block)
+{
+	unsigned char *buf;
+	uint32_t l, r;
+
+	buf = block;
+	l = br_dec32be(buf);
+	r = br_dec32be(buf + 4);
+	br_des_do_IP(&l, &r);
+	while (num_rounds -- > 0) {
+		process_block_unit(&l, &r, skey);
+		skey += 32;
+	}
+	br_des_do_invIP(&l, &r);
+	br_enc32be(buf, l);
+	br_enc32be(buf + 4, r);
+}
+
+static void
+keysched_unit(uint32_t *skey, const void *key)
+{
+	int i;
+
+	br_des_keysched_unit(skey, key);
+
+	/*
+	 * Apply PC-2 to get the 48-bit subkeys.
+	 */
+	for (i = 0; i < 16; i ++) {
+		uint32_t xl, xr, ul, ur;
+		int j;
+
+		xl = skey[(i << 1) + 0];
+		xr = skey[(i << 1) + 1];
+		ul = 0;
+		ur = 0;
+		for (j = 0; j < 28; j ++) {
+			ul |= (xl & 1) << PC2left[j];
+			ur |= (xr & 1) << PC2right[j];
+			xl >>= 1;
+			xr >>= 1;
+		}
+		skey[(i << 1) + 0] = ul;
+		skey[(i << 1) + 1] = ur;
+	}
+}
+
+/* see inner.h */
+unsigned
+br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	switch (key_len) {
+	case 8:
+		keysched_unit(skey, key);
+		return 1;
+	case 16:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		memcpy(skey + 64, skey, 32 * sizeof *skey);
+		return 3;
+	default:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		keysched_unit(skey + 64, (const unsigned char *)key + 16);
+		return 3;
+	}
+}
--- a/third_party/bearssl/src/des_tab_cbcdec.c
+++ b/third_party/bearssl/src/des_tab_cbcdec.c
@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcdec_init(br_des_tab_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_tab_cbcdec_vtable;
+	ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
+	if (len == 8) {
+		br_des_rev_skey(ctx->skey);
+	} else {
+		int i;
+
+		for (i = 0; i < 48; i += 2) {
+			uint32_t t;
+
+			t = ctx->skey[i];
+			ctx->skey[i] = ctx->skey[94 - i];
+			ctx->skey[94 - i] = t;
+			t = ctx->skey[i + 1];
+			ctx->skey[i + 1] = ctx->skey[95 - i];
+			ctx->skey[95 - i] = t;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcdec_run(const br_des_tab_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[8];
+		int i;
+
+		memcpy(tmp, buf, 8);
+		br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_des_tab_cbcdec_vtable = {
+	sizeof(br_des_tab_cbcdec_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_des_tab_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_des_tab_cbcdec_run
+};
--- a/third_party/bearssl/src/des_tab_cbcenc.c
+++ b/third_party/bearssl/src/des_tab_cbcenc.c
@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcenc_init(br_des_tab_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_tab_cbcenc_vtable;
+	ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcenc_run(const br_des_tab_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_des_tab_cbcenc_vtable = {
+	sizeof(br_des_tab_cbcenc_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_des_tab_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_des_tab_cbcenc_run
+};
--- a/third_party/bearssl/src/dig_oid.c
+++ b/third_party/bearssl/src/dig_oid.c
@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This file contains the encoded OID for the standard hash functions.
+ * Such OID appear in, for instance, the PKCS#1 v1.5 padding for RSA
+ * signatures.
+ */
+
+static const unsigned char md5_OID[] = {
+	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05
+};
+
+static const unsigned char sha1_OID[] = {
+	0x2B, 0x0E, 0x03, 0x02, 0x1A
+};
+
+static const unsigned char sha224_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
+};
+
+static const unsigned char sha256_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
+};
+
+static const unsigned char sha384_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
+};
+
+static const unsigned char sha512_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
+};
+
+/* see inner.h */
+const unsigned char *
+br_digest_OID(int digest_id, size_t *len)
+{
+	switch (digest_id) {
+	case br_md5_ID:
+		*len = sizeof md5_OID;
+		return md5_OID;
+	case br_sha1_ID:
+		*len = sizeof sha1_OID;
+		return sha1_OID;
+	case br_sha224_ID:
+		*len = sizeof sha224_OID;
+		return sha224_OID;
+	case br_sha256_ID:
+		*len = sizeof sha256_OID;
+		return sha256_OID;
+	case br_sha384_ID:
+		*len = sizeof sha384_OID;
+		return sha384_OID;
+	case br_sha512_ID:
+		*len = sizeof sha512_OID;
+		return sha512_OID;
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
--- a/third_party/bearssl/src/dig_size.c
+++ b/third_party/bearssl/src/dig_size.c
@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+size_t
+br_digest_size_by_ID(int digest_id)
+{
+	switch (digest_id) {
+	case br_md5sha1_ID:
+		return br_md5_SIZE + br_sha1_SIZE;
+	case br_md5_ID:
+		return br_md5_SIZE;
+	case br_sha1_ID:
+		return br_sha1_SIZE;
+	case br_sha224_ID:
+		return br_sha224_SIZE;
+	case br_sha256_ID:
+		return br_sha256_SIZE;
+	case br_sha384_ID:
+		return br_sha384_SIZE;
+	case br_sha512_ID:
+		return br_sha512_SIZE;
+	default:
+		/* abort(); */
+		return 0;
+	}
+}
--- a/third_party/bearssl/src/eax.c
+++ b/third_party/bearssl/src/eax.c
@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * The combined CTR + CBC-MAC functions can only handle full blocks,
+ * so some buffering is necessary. Moreover, EAX has a special padding
+ * rule for CBC-MAC, which implies that we cannot compute the MAC over
+ * the last received full block until we know whether we are at the
+ * end of the data or not.
+ *
+ *  - 'ptr' contains a value from 1 to 16, which is the number of bytes
+ *    accumulated in buf[] that still needs to be processed with the
+ *    current OMAC computation. Beware that this can go to 16: a
+ *    complete block cannot be processed until it is known whether it
+ *    is the last block or not. However, it can never be 0, because
+ *    OMAC^t works on an input that is at least one-block long.
+ *
+ *  - When processing the message itself, CTR encryption/decryption is
+ *    also done at the same time. The first 'ptr' bytes of buf[] then
+ *    contains the encrypted bytes, while the last '16 - ptr' bytes of
+ *    buf[] are the remnants of the stream block, to be used against
+ *    the next input bytes, when available.
+ *
+ *  - The current counter and running CBC-MAC values are kept in 'ctr'
+ *    and 'cbcmac', respectively.
+ *
+ *  - The derived keys for padding are kept in L2 and L4 (double and
+ *    quadruple of Enc_K(0^n), in GF(2^128), respectively).
+ */
+
+/*
+ * Start an OMAC computation; the first block is the big-endian
+ * representation of the provided value ('val' must fit on one byte).
+ * We make it a delayed block because it may also be the last one,
+ */
+static void
+omac_start(br_eax_context *ctx, unsigned val)
+{
+	memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
+	memset(ctx->buf, 0, sizeof ctx->buf);
+	ctx->buf[15] = val;
+	ctx->ptr = 16;
+}
+
+/*
+ * Double a value in finite field GF(2^128), defined with modulus
+ * X^128+X^7+X^2+X+1.
+ */
+static void
+double_gf128(unsigned char *dst, const unsigned char *src)
+{
+	unsigned cc;
+	int i;
+
+	cc = 0x87 & -((unsigned)src[0] >> 7);
+	for (i = 15; i >= 0; i --) {
+		unsigned z;
+
+		z = (src[i] << 1) ^ cc;
+		cc = z >> 8;
+		dst[i] = (unsigned char)z;
+	}
+}
+
+/*
+ * Apply padding to the last block, currently in ctx->buf (with
+ * ctx->ptr bytes), and finalize OMAC computation.
+ */
+static void
+do_pad(br_eax_context *ctx)
+{
+	unsigned char *pad;
+	size_t ptr, u;
+
+	ptr = ctx->ptr;
+	if (ptr == 16) {
+		pad = ctx->L2;
+	} else {
+		ctx->buf[ptr ++] = 0x80;
+		memset(ctx->buf + ptr, 0x00, 16 - ptr);
+		pad = ctx->L4;
+	}
+	for (u = 0; u < sizeof ctx->buf; u ++) {
+		ctx->buf[u] ^= pad[u];
+	}
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+}
+
+/*
+ * Apply CBC-MAC on the provided data, with buffering management.
+ *
+ * Upon entry, two situations are acceptable:
+ *
+ *   ctx->ptr == 0: there is no data to process in ctx->buf
+ *   ctx->ptr == 16: there is a full block of unprocessed data in ctx->buf
+ *
+ * Upon exit, ctx->ptr may be zero only if it was already zero on entry,
+ * and len == 0. In all other situations, ctx->ptr will be non-zero on
+ * exit (and may have value 16).
+ */
+static void
+do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len)
+{
+	size_t ptr;
+
+	if (len == 0) {
+		return;
+	}
+	ptr = len & (size_t)15;
+	if (ptr == 0) {
+		len -= 16;
+		ptr = 16;
+	} else {
+		len -= ptr;
+	}
+	if (ctx->ptr == 16) {
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len);
+	memcpy(ctx->buf, (const unsigned char *)data + len, ptr);
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx)
+{
+	unsigned char tmp[16], iv[16];
+
+	ctx->vtable = &br_eax_vtable;
+	ctx->bctx = bctx;
+
+	/*
+	 * Encrypt a whole-zero block to compute L2 and L4.
+	 */
+	memset(tmp, 0, sizeof tmp);
+	memset(iv, 0, sizeof iv);
+	(*bctx)->ctr(bctx, iv, tmp, sizeof tmp);
+	double_gf128(ctx->L2, tmp);
+	double_gf128(ctx->L4, ctx->L2);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_capture(const br_eax_context *ctx, br_eax_state *st)
+{
+	/*
+	 * We capture the three OMAC* states _after_ processing the
+	 * initial block (assuming that nonce, message and AAD are
+	 * all non-empty).
+	 */
+	int i;
+
+	memset(st->st, 0, sizeof st->st);
+	for (i = 0; i < 3; i ++) {
+		unsigned char tmp[16];
+
+		memset(tmp, 0, sizeof tmp);
+		tmp[15] = (unsigned char)i;
+		(*ctx->bctx)->mac(ctx->bctx, st->st[i], tmp, sizeof tmp);
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len)
+{
+	/*
+	 * Process nonce with OMAC^0.
+	 */
+	omac_start(ctx, 0);
+	do_cbcmac_chunk(ctx, nonce, len);
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	/*
+	 * Start OMAC^1 for the AAD ("header" in the EAX specification).
+	 */
+	omac_start(ctx, 1);
+
+	/*
+	 * We use ctx->head[0] as temporary flag to mark that we are
+	 * using a "normal" reset().
+	 */
+	ctx->head[0] = 0;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset_pre_aad(br_eax_context *ctx, const br_eax_state *st,
+	const void *nonce, size_t len)
+{
+	if (len == 0) {
+		omac_start(ctx, 0);
+	} else {
+		memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+		do_cbcmac_chunk(ctx, nonce, len);
+	}
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	memcpy(ctx->cbcmac, st->st[1], sizeof ctx->cbcmac);
+	ctx->ptr = 0;
+
+	memcpy(ctx->ctr, st->st[2], sizeof ctx->ctr);
+
+	/*
+	 * We use ctx->head[0] as a flag to indicate that we use a
+	 * a recorded state, with ctx->ctr containing the preprocessed
+	 * first block for OMAC^2.
+	 */
+	ctx->head[0] = 1;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset_post_aad(br_eax_context *ctx, const br_eax_state *st,
+	const void *nonce, size_t len)
+{
+	if (len == 0) {
+		omac_start(ctx, 0);
+	} else {
+		memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+		do_cbcmac_chunk(ctx, nonce, len);
+	}
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+	memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
+
+	memcpy(ctx->head, st->st[1], sizeof ctx->head);
+
+	memcpy(ctx->cbcmac, st->st[2], sizeof ctx->cbcmac);
+	ctx->ptr = 0;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len)
+{
+	size_t ptr;
+
+	ptr = ctx->ptr;
+
+	/*
+	 * If there is a partial block, first complete it.
+	 */
+	if (ptr < 16) {
+		size_t clen;
+
+		clen = 16 - ptr;
+		if (len <= clen) {
+			memcpy(ctx->buf + ptr, data, len);
+			ctx->ptr = ptr + len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+	}
+
+	/*
+	 * We now have a full block in buf[], and this is not the last
+	 * block.
+	 */
+	do_cbcmac_chunk(ctx, data, len);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_flip(br_eax_context *ctx)
+{
+	int from_capture;
+
+	/*
+	 * ctx->head[0] may be non-zero if the context was reset with
+	 * a pre-AAD captured state. In that case, ctx->ctr[] contains
+	 * the state for OMAC^2 _after_ processing the first block.
+	 */
+	from_capture = ctx->head[0];
+
+	/*
+	 * Complete the OMAC computation on the AAD.
+	 */
+	do_pad(ctx);
+	memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	/*
+	 * Start OMAC^2 for the encrypted data.
+	 * If the context was initialized from a captured state, then
+	 * the OMAC^2 value is in the ctr[] array.
+	 */
+	if (from_capture) {
+		memcpy(ctx->cbcmac, ctx->ctr, sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+	} else {
+		omac_start(ctx, 2);
+	}
+
+	/*
+	 * Initial counter value for CTR is the processed nonce.
+	 */
+	memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *dbuf;
+	size_t ptr;
+
+	/*
+	 * Ensure that there is actual data to process.
+	 */
+	if (len == 0) {
+		return;
+	}
+
+	dbuf = data;
+	ptr = ctx->ptr;
+
+	/*
+	 * We may have ptr == 0 here if we initialized from a captured
+	 * state. In that case, there is no partially consumed block
+	 * or unprocessed data.
+	 */
+	if (ptr != 0 && ptr != 16) {
+		/*
+		 * We have a partially consumed block.
+		 */
+		size_t u, clen;
+
+		clen = 16 - ptr;
+		if (len <= clen) {
+			clen = len;
+		}
+		if (encrypt) {
+			for (u = 0; u < clen; u ++) {
+				ctx->buf[ptr + u] ^= dbuf[u];
+			}
+			memcpy(dbuf, ctx->buf + ptr, clen);
+		} else {
+			for (u = 0; u < clen; u ++) {
+				unsigned dx, sx;
+
+				sx = ctx->buf[ptr + u];
+				dx = dbuf[u];
+				ctx->buf[ptr + u] = dx;
+				dbuf[u] = sx ^ dx;
+			}
+		}
+
+		if (len <= clen) {
+			ctx->ptr = ptr + clen;
+			return;
+		}
+		dbuf += clen;
+		len -= clen;
+	}
+
+	/*
+	 * We now have a complete encrypted block in buf[] that must still
+	 * be processed with OMAC, and this is not the final buf.
+	 * Exception: when ptr == 0, no block has been produced yet.
+	 */
+	if (ptr != 0) {
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Do CTR encryption or decryption and CBC-MAC for all full blocks
+	 * except the last.
+	 */
+	ptr = len & (size_t)15;
+	if (ptr == 0) {
+		len -= 16;
+		ptr = 16;
+	} else {
+		len -= ptr;
+	}
+	if (encrypt) {
+		(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	} else {
+		(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	}
+	dbuf += len;
+
+	/*
+	 * Compute next block of CTR stream, and use it to finish
+	 * encrypting or decrypting the data.
+	 */
+	memset(ctx->buf, 0, sizeof ctx->buf);
+	(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf);
+	if (encrypt) {
+		size_t u;
+
+		for (u = 0; u < ptr; u ++) {
+			ctx->buf[u] ^= dbuf[u];
+		}
+		memcpy(dbuf, ctx->buf, ptr);
+	} else {
+		size_t u;
+
+		for (u = 0; u < ptr; u ++) {
+			unsigned dx, sx;
+
+			sx = ctx->buf[u];
+			dx = dbuf[u];
+			ctx->buf[u] = dx;
+			dbuf[u] = sx ^ dx;
+		}
+	}
+	ctx->ptr = ptr;
+}
+
+/*
+ * Complete tag computation. The final tag is written in ctx->cbcmac.
+ */
+static void
+do_final(br_eax_context *ctx)
+{
+	size_t u;
+
+	do_pad(ctx);
+
+	/*
+	 * Authentication tag is the XOR of the three OMAC outputs for
+	 * the nonce, AAD and encrypted data.
+	 */
+	for (u = 0; u < 16; u ++) {
+		ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u];
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_get_tag(br_eax_context *ctx, void *tag)
+{
+	do_final(ctx);
+	memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len)
+{
+	do_final(ctx);
+	memcpy(tag, ctx->cbcmac, len);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len)
+{
+	unsigned char tmp[16];
+	size_t u;
+	int x;
+
+	br_eax_get_tag(ctx, tmp);
+	x = 0;
+	for (u = 0; u < len; u ++) {
+		x |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(x);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_eax_check_tag(br_eax_context *ctx, const void *tag)
+{
+	return br_eax_check_tag_trunc(ctx, tag, 16);
+}
+
+/* see bearssl_aead.h */
+const br_aead_class br_eax_vtable = {
+	16,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_reset,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_aad_inject,
+	(void (*)(const br_aead_class **))
+		&br_eax_flip,
+	(void (*)(const br_aead_class **, int, void *, size_t))
+		&br_eax_run,
+	(void (*)(const br_aead_class **, void *))
+		&br_eax_get_tag,
+	(uint32_t (*)(const br_aead_class **, const void *))
+		&br_eax_check_tag,
+	(void (*)(const br_aead_class **, void *, size_t))
+		&br_eax_get_tag_trunc,
+	(uint32_t (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_check_tag_trunc
+};
--- a/third_party/bearssl/src/ec_all_m15.c
+++ b/third_party/bearssl/src/ec_all_m15.c
@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.generator(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.generator(curve, len);
+	default:
+		return br_ec_prime_i15.generator(curve, len);
+	}
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.order(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.order(curve, len);
+	default:
+		return br_ec_prime_i15.order(curve, len);
+	}
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.xoff(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.xoff(curve, len);
+	default:
+		return br_ec_prime_i15.xoff(curve, len);
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.mul(G, Glen, kb, kblen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.mul(G, Glen, kb, kblen, curve);
+	default:
+		return br_ec_prime_i15.mul(G, Glen, kb, kblen, curve);
+	}
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.mulgen(R, x, xlen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.mulgen(R, x, xlen, curve);
+	default:
+		return br_ec_prime_i15.mulgen(R, x, xlen, curve);
+	}
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	default:
+		return br_ec_prime_i15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	}
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_all_m15 = {
+	(uint32_t)0x23800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
--- a/third_party/bearssl/src/ec_all_m31.c
+++ b/third_party/bearssl/src/ec_all_m31.c
@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.generator(curve, len);
+#else
+		return br_ec_p256_m31.generator(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.generator(curve, len);
+#else
+		return br_ec_c25519_m31.generator(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.generator(curve, len);
+	}
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.order(curve, len);
+#else
+		return br_ec_p256_m31.order(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.order(curve, len);
+#else
+		return br_ec_c25519_m31.order(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.order(curve, len);
+	}
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.xoff(curve, len);
+#else
+		return br_ec_p256_m31.xoff(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.xoff(curve, len);
+#else
+		return br_ec_c25519_m31.xoff(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.xoff(curve, len);
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.mul(G, Glen, kb, kblen, curve);
+#else
+		return br_ec_p256_m31.mul(G, Glen, kb, kblen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.mul(G, Glen, kb, kblen, curve);
+#else
+		return br_ec_c25519_m31.mul(G, Glen, kb, kblen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.mul(G, Glen, kb, kblen, curve);
+	}
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.mulgen(R, x, xlen, curve);
+#else
+		return br_ec_p256_m31.mulgen(R, x, xlen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.mulgen(R, x, xlen, curve);
+#else
+		return br_ec_c25519_m31.mulgen(R, x, xlen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.mulgen(R, x, xlen, curve);
+	}
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#else
+		return br_ec_p256_m31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#else
+		return br_ec_c25519_m31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	}
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_all_m31 = {
+	(uint32_t)0x23800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
--- a/third_party/bearssl/src/ec_c25519_i15.c
+++ b/third_party/bearssl/src/ec_c25519_i15.c
@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for the field:
+ *   - field modulus p = 2^255-19
+ *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
+ */
+
+static const uint16_t C255_P[] = {
+	0x0110,
+	0x7FED, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF
+};
+
+#define P0I   0x4A1B
+
+static const uint16_t C255_R2[] = {
+	0x0110,
+	0x0169, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000
+};
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int_mont(const char *name, const uint16_t *x)
+{
+	uint16_t y[18];
+	unsigned char tmp[32];
+	size_t u;
+
+	printf("%s = ", name);
+	memcpy(y, x, sizeof y);
+	br_i15_from_monty(y, C255_P, P0I);
+	br_i15_encode(tmp, sizeof tmp, y);
+	for (u = 0; u < sizeof tmp; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+static const uint16_t C255_A24[] = {
+	0x0110,
+	0x45D3, 0x0046, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000
+};
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint16_t *a, uint16_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 18; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static void
+c255_add(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint32_t ctl;
+	uint16_t t[18];
+
+	memcpy(t, a, sizeof t);
+	ctl = br_i15_add(t, b, 1);
+	ctl |= NOT(br_i15_sub(t, C255_P, 0));
+	br_i15_sub(t, C255_P, ctl);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_sub(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint16_t t[18];
+
+	memcpy(t, a, sizeof t);
+	br_i15_add(t, C255_P, br_i15_sub(t, b, 1));
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_mul(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint16_t t[18];
+
+	br_i15_montymul(t, a, b, C255_P, P0I);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+byteswap(unsigned char *G)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		unsigned char t;
+
+		t = G[i];
+		G[i] = G[31 - i];
+		G[31 - i] = t;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+#define ILEN   (18 * sizeof(uint16_t))
+
+	/*
+	 * The a[] and b[] arrays have an extra word to allow for
+	 * decoding without using br_i15_decode_reduce().
+	 */
+	uint16_t x1[18], x2[18], x3[18], z2[18], z3[18];
+	uint16_t a[19], aa[18], b[19], bb[18];
+	uint16_t c[18], d[18], e[18], da[18], cb[18];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Byteswap the point encoding, because it uses little-endian, and
+	 * the generic decoding routine uses big-endian.
+	 */
+	byteswap(G);
+
+	/*
+	 * Decode the point ('u' coordinate). This should be reduced
+	 * modulo p, but we prefer to avoid the dependency on
+	 * br_i15_decode_reduce(). Instead, we use br_i15_decode_mod()
+	 * with a synthetic modulus of value 2^255 (this must work
+	 * since G was truncated to 255 bits), then use a conditional
+	 * subtraction. We use br_i15_decode_mod() and not
+	 * br_i15_decode(), because the ec_prime_i15 implementation uses
+	 * the former but not the latter.
+	 *    br_i15_decode_reduce(a, G, 32, C255_P);
+	 */
+	br_i15_zero(b, 0x111);
+	b[18] = 1;
+	br_i15_decode_mod(a, G, 32, b);
+	a[0] = 0x110;
+	br_i15_sub(a, C255_P, NOT(br_i15_sub(a, C255_P, 0)));
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	br_i15_montymul(x1, a, C255_R2, C255_P, P0I);
+	memcpy(x3, x1, ILEN);
+	br_i15_zero(z2, C255_P[0]);
+	memcpy(x2, z2, ILEN);
+	x2[1] = 19;
+	memcpy(z3, x2, ILEN);
+
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int_mont("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+
+		c255_add(a, x2, z2);
+		c255_mul(aa, a, a);
+		c255_sub(b, x2, z2);
+		c255_mul(bb, b, b);
+		c255_sub(e, aa, bb);
+		c255_add(c, x3, z3);
+		c255_sub(d, x3, z3);
+		c255_mul(da, d, a);
+		c255_mul(cb, c, b);
+
+		/* obsolete
+		print_int_mont("a ", a);
+		print_int_mont("aa", aa);
+		print_int_mont("b ", b);
+		print_int_mont("bb", bb);
+		print_int_mont("e ", e);
+		print_int_mont("c ", c);
+		print_int_mont("d ", d);
+		print_int_mont("da", da);
+		print_int_mont("cb", cb);
+		*/
+
+		c255_add(x3, da, cb);
+		c255_mul(x3, x3, x3);
+		c255_sub(z3, da, cb);
+		c255_mul(z3, z3, z3);
+		c255_mul(z3, z3, x1);
+		c255_mul(x2, aa, bb);
+		c255_mul(z2, C255_A24, e);
+		c255_add(z2, z2, aa);
+		c255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, ILEN);
+	for (i = 0; i < 15; i ++) {
+		c255_mul(a, a, a);
+		c255_mul(a, a, z2);
+	}
+	memcpy(b, a, ILEN);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			c255_mul(b, b, b);
+		}
+		c255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		c255_mul(b, b, b);
+		if ((0xFFEB >> i) & 1) {
+			c255_mul(b, z2, b);
+		}
+	}
+	c255_mul(b, x2, b);
+
+	/*
+	 * To avoid a dependency on br_i15_from_monty(), we use a
+	 * Montgomery multiplication with 1.
+	 *    memcpy(x2, b, ILEN);
+	 *    br_i15_from_monty(x2, C255_P, P0I);
+	 */
+	br_i15_zero(a, C255_P[0]);
+	a[1] = 1;
+	br_i15_montymul(x2, a, b, C255_P, P0I);
+
+	br_i15_encode(G, 32, x2);
+	byteswap(G);
+	return 1;
+
+#undef ILEN
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_i15 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
--- a/third_party/bearssl/src/ec_c25519_i31.c
+++ b/third_party/bearssl/src/ec_c25519_i31.c
@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for the field:
+ *   - field modulus p = 2^255-19
+ *   - R^2 mod p (R = 2^(31k) for the smallest k such that R >= p)
+ */
+
+static const uint32_t C255_P[] = {
+	0x00000107,
+	0x7FFFFFED, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0000007F
+};
+
+#define P0I   0x286BCA1B
+
+static const uint32_t C255_R2[] = {
+	0x00000107,
+	0x00000000, 0x02D20000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+static const uint32_t C255_A24[] = {
+	0x00000107,
+	0x53000000, 0x0000468B, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int_mont(const char *name, const uint32_t *x)
+{
+	uint32_t y[10];
+	unsigned char tmp[32];
+	size_t u;
+
+	printf("%s = ", name);
+	memcpy(y, x, sizeof y);
+	br_i31_from_monty(y, C255_P, P0I);
+	br_i31_encode(tmp, sizeof tmp, y);
+	for (u = 0; u < sizeof tmp; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 10; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static void
+c255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t ctl;
+	uint32_t t[10];
+
+	memcpy(t, a, sizeof t);
+	ctl = br_i31_add(t, b, 1);
+	ctl |= NOT(br_i31_sub(t, C255_P, 0));
+	br_i31_sub(t, C255_P, ctl);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[10];
+
+	memcpy(t, a, sizeof t);
+	br_i31_add(t, C255_P, br_i31_sub(t, b, 1));
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[10];
+
+	br_i31_montymul(t, a, b, C255_P, P0I);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+byteswap(unsigned char *G)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		unsigned char t;
+
+		t = G[i];
+		G[i] = G[31 - i];
+		G[31 - i] = t;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	uint32_t x1[10], x2[10], x3[10], z2[10], z3[10];
+	uint32_t a[10], aa[10], b[10], bb[10];
+	uint32_t c[10], d[10], e[10], da[10], cb[10];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Byteswap the point encoding, because it uses little-endian, and
+	 * the generic decoding routine uses big-endian.
+	 */
+	byteswap(G);
+
+	/*
+	 * Decode the point ('u' coordinate). This should be reduced
+	 * modulo p, but we prefer to avoid the dependency on
+	 * br_i31_decode_reduce(). Instead, we use br_i31_decode_mod()
+	 * with a synthetic modulus of value 2^255 (this must work
+	 * since G was truncated to 255 bits), then use a conditional
+	 * subtraction. We use br_i31_decode_mod() and not
+	 * br_i31_decode(), because the ec_prime_i31 implementation uses
+	 * the former but not the latter.
+	 *    br_i31_decode_reduce(a, G, 32, C255_P);
+	 */
+	br_i31_zero(b, 0x108);
+	b[9] = 0x0080;
+	br_i31_decode_mod(a, G, 32, b);
+	a[0] = 0x107;
+	br_i31_sub(a, C255_P, NOT(br_i31_sub(a, C255_P, 0)));
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	br_i31_montymul(x1, a, C255_R2, C255_P, P0I);
+	memcpy(x3, x1, sizeof x1);
+	br_i31_zero(z2, C255_P[0]);
+	memcpy(x2, z2, sizeof z2);
+	x2[1] = 0x13000000;
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * kb[] is in big-endian notation, but possibly shorter than k[].
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int_mont("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+
+		c255_add(a, x2, z2);
+		c255_mul(aa, a, a);
+		c255_sub(b, x2, z2);
+		c255_mul(bb, b, b);
+		c255_sub(e, aa, bb);
+		c255_add(c, x3, z3);
+		c255_sub(d, x3, z3);
+		c255_mul(da, d, a);
+		c255_mul(cb, c, b);
+
+		/* obsolete
+		print_int_mont("a ", a);
+		print_int_mont("aa", aa);
+		print_int_mont("b ", b);
+		print_int_mont("bb", bb);
+		print_int_mont("e ", e);
+		print_int_mont("c ", c);
+		print_int_mont("d ", d);
+		print_int_mont("da", da);
+		print_int_mont("cb", cb);
+		*/
+
+		c255_add(x3, da, cb);
+		c255_mul(x3, x3, x3);
+		c255_sub(z3, da, cb);
+		c255_mul(z3, z3, z3);
+		c255_mul(z3, z3, x1);
+		c255_mul(x2, aa, bb);
+		c255_mul(z2, C255_A24, e);
+		c255_add(z2, z2, aa);
+		c255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		c255_mul(a, a, a);
+		c255_mul(a, a, z2);
+	}
+	memcpy(b, a, sizeof a);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			c255_mul(b, b, b);
+		}
+		c255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		c255_mul(b, b, b);
+		if ((0xFFEB >> i) & 1) {
+			c255_mul(b, z2, b);
+		}
+	}
+	c255_mul(b, x2, b);
+
+	/*
+	 * To avoid a dependency on br_i31_from_monty(), we use
+	 * a Montgomery multiplication with 1.
+	 *    memcpy(x2, b, sizeof b);
+	 *    br_i31_from_monty(x2, C255_P, P0I);
+	 */
+	br_i31_zero(a, C255_P[0]);
+	a[1] = 1;
+	br_i31_montymul(x2, a, b, C255_P, P0I);
+
+	br_i31_encode(G, 32, x2);
+	byteswap(G);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_i31 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
--- a/third_party/bearssl/src/ec_c25519_m15.c
+++ b/third_party/bearssl/src/ec_c25519_m15.c
--- a/third_party/bearssl/src/ec_c25519_m31.c
+++ b/third_party/bearssl/src/ec_c25519_m31.c
@ -0,0 +1,800 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int(const char *name, const uint32_t *x)
+{
+	size_t u;
+	unsigned char tmp[40];
+
+	printf("%s = ", name);
+	for (u = 0; u < 9; u ++) {
+		if (x[u] > 0x3FFFFFFF) {
+			printf("INVALID:");
+			for (u = 0; u < 9; u ++) {
+				printf(" %08X", x[u]);
+			}
+			printf("\n");
+			return;
+		}
+	}
+	memset(tmp, 0, sizeof tmp);
+	for (u = 0; u < 9; u ++) {
+		uint64_t w;
+		int j, k;
+
+		w = x[u];
+		j = 30 * (int)u;
+		k = j & 7;
+		if (k != 0) {
+			w <<= k;
+			j -= k;
+		}
+		k = j >> 3;
+		for (j = 0; j < 8; j ++) {
+			tmp[39 - k - j] |= (unsigned char)w;
+			w >>= 8;
+		}
+	}
+	for (u = 8; u < 40; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                    | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned little-endian encoding to a sequence of
+ * 30-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+le8_to_le30(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		uint32_t b;
+
+		b = *src ++;
+		if (acc_len < 22) {
+			acc |= b << acc_len;
+			acc_len += 8;
+		} else {
+			*dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF;
+			acc = b >> (30 - acc_len);
+			acc_len -= 22;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (30-bit words, little-endian) to unsigned
+ * little-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le30_to_le8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			uint32_t w;
+
+			w = *src ++;
+			*dst ++ = (unsigned char)(acc | (w << acc_len));
+			acc = w >> (8 - acc_len);
+			acc_len += 22;
+		} else {
+			*dst ++ = (unsigned char)acc;
+			acc >>= 8;
+			acc_len -= 8;
+		}
+	}
+}
+
+/*
+ * Multiply two integers. Source integers are represented as arrays of
+ * nine 30-bit words, for values up to 2^270-1. Result is encoded over
+ * 18 words of 30 bits each.
+ */
+static void
+mul9(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Maximum intermediate result is no more than
+	 * 10376293531797946367, which fits in 64 bits. Reason:
+	 *
+	 *   10376293531797946367 = 9 * (2^30-1)^2 + 9663676406
+	 *   10376293531797946367 < 9663676407 * 2^30
+	 *
+	 * Thus, adding together 9 products of 30-bit integers, with
+	 * a carry of at most 9663676406, yields an integer that fits
+	 * on 64 bits and generates a carry of at most 9663676406.
+	 */
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], b[0]);
+	t[ 1] = MUL31(a[0], b[1])
+		+ MUL31(a[1], b[0]);
+	t[ 2] = MUL31(a[0], b[2])
+		+ MUL31(a[1], b[1])
+		+ MUL31(a[2], b[0]);
+	t[ 3] = MUL31(a[0], b[3])
+		+ MUL31(a[1], b[2])
+		+ MUL31(a[2], b[1])
+		+ MUL31(a[3], b[0]);
+	t[ 4] = MUL31(a[0], b[4])
+		+ MUL31(a[1], b[3])
+		+ MUL31(a[2], b[2])
+		+ MUL31(a[3], b[1])
+		+ MUL31(a[4], b[0]);
+	t[ 5] = MUL31(a[0], b[5])
+		+ MUL31(a[1], b[4])
+		+ MUL31(a[2], b[3])
+		+ MUL31(a[3], b[2])
+		+ MUL31(a[4], b[1])
+		+ MUL31(a[5], b[0]);
+	t[ 6] = MUL31(a[0], b[6])
+		+ MUL31(a[1], b[5])
+		+ MUL31(a[2], b[4])
+		+ MUL31(a[3], b[3])
+		+ MUL31(a[4], b[2])
+		+ MUL31(a[5], b[1])
+		+ MUL31(a[6], b[0]);
+	t[ 7] = MUL31(a[0], b[7])
+		+ MUL31(a[1], b[6])
+		+ MUL31(a[2], b[5])
+		+ MUL31(a[3], b[4])
+		+ MUL31(a[4], b[3])
+		+ MUL31(a[5], b[2])
+		+ MUL31(a[6], b[1])
+		+ MUL31(a[7], b[0]);
+	t[ 8] = MUL31(a[0], b[8])
+		+ MUL31(a[1], b[7])
+		+ MUL31(a[2], b[6])
+		+ MUL31(a[3], b[5])
+		+ MUL31(a[4], b[4])
+		+ MUL31(a[5], b[3])
+		+ MUL31(a[6], b[2])
+		+ MUL31(a[7], b[1])
+		+ MUL31(a[8], b[0]);
+	t[ 9] = MUL31(a[1], b[8])
+		+ MUL31(a[2], b[7])
+		+ MUL31(a[3], b[6])
+		+ MUL31(a[4], b[5])
+		+ MUL31(a[5], b[4])
+		+ MUL31(a[6], b[3])
+		+ MUL31(a[7], b[2])
+		+ MUL31(a[8], b[1]);
+	t[10] = MUL31(a[2], b[8])
+		+ MUL31(a[3], b[7])
+		+ MUL31(a[4], b[6])
+		+ MUL31(a[5], b[5])
+		+ MUL31(a[6], b[4])
+		+ MUL31(a[7], b[3])
+		+ MUL31(a[8], b[2]);
+	t[11] = MUL31(a[3], b[8])
+		+ MUL31(a[4], b[7])
+		+ MUL31(a[5], b[6])
+		+ MUL31(a[6], b[5])
+		+ MUL31(a[7], b[4])
+		+ MUL31(a[8], b[3]);
+	t[12] = MUL31(a[4], b[8])
+		+ MUL31(a[5], b[7])
+		+ MUL31(a[6], b[6])
+		+ MUL31(a[7], b[5])
+		+ MUL31(a[8], b[4]);
+	t[13] = MUL31(a[5], b[8])
+		+ MUL31(a[6], b[7])
+		+ MUL31(a[7], b[6])
+		+ MUL31(a[8], b[5]);
+	t[14] = MUL31(a[6], b[8])
+		+ MUL31(a[7], b[7])
+		+ MUL31(a[8], b[6]);
+	t[15] = MUL31(a[7], b[8])
+		+ MUL31(a[8], b[7]);
+	t[16] = MUL31(a[8], b[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Square a 270-bit integer, represented as an array of nine 30-bit words.
+ * Result uses 18 words of 30 bits each.
+ */
+static void
+square9(uint32_t *d, const uint32_t *a)
+{
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], a[0]);
+	t[ 1] = ((MUL31(a[0], a[1])) << 1);
+	t[ 2] = MUL31(a[1], a[1])
+		+ ((MUL31(a[0], a[2])) << 1);
+	t[ 3] = ((MUL31(a[0], a[3])
+		+ MUL31(a[1], a[2])) << 1);
+	t[ 4] = MUL31(a[2], a[2])
+		+ ((MUL31(a[0], a[4])
+		+ MUL31(a[1], a[3])) << 1);
+	t[ 5] = ((MUL31(a[0], a[5])
+		+ MUL31(a[1], a[4])
+		+ MUL31(a[2], a[3])) << 1);
+	t[ 6] = MUL31(a[3], a[3])
+		+ ((MUL31(a[0], a[6])
+		+ MUL31(a[1], a[5])
+		+ MUL31(a[2], a[4])) << 1);
+	t[ 7] = ((MUL31(a[0], a[7])
+		+ MUL31(a[1], a[6])
+		+ MUL31(a[2], a[5])
+		+ MUL31(a[3], a[4])) << 1);
+	t[ 8] = MUL31(a[4], a[4])
+		+ ((MUL31(a[0], a[8])
+		+ MUL31(a[1], a[7])
+		+ MUL31(a[2], a[6])
+		+ MUL31(a[3], a[5])) << 1);
+	t[ 9] = ((MUL31(a[1], a[8])
+		+ MUL31(a[2], a[7])
+		+ MUL31(a[3], a[6])
+		+ MUL31(a[4], a[5])) << 1);
+	t[10] = MUL31(a[5], a[5])
+		+ ((MUL31(a[2], a[8])
+		+ MUL31(a[3], a[7])
+		+ MUL31(a[4], a[6])) << 1);
+	t[11] = ((MUL31(a[3], a[8])
+		+ MUL31(a[4], a[7])
+		+ MUL31(a[5], a[6])) << 1);
+	t[12] = MUL31(a[6], a[6])
+		+ ((MUL31(a[4], a[8])
+		+ MUL31(a[5], a[7])) << 1);
+	t[13] = ((MUL31(a[5], a[8])
+		+ MUL31(a[6], a[7])) << 1);
+	t[14] = MUL31(a[7], a[7])
+		+ ((MUL31(a[6], a[8])) << 1);
+	t[15] = ((MUL31(a[7], a[8])) << 1);
+	t[16] = MUL31(a[8], a[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Perform a "final reduction" in field F255 (field for Curve25519)
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f255(uint32_t *d)
+{
+	uint32_t t[9];
+	uint32_t cc;
+	int i;
+
+	memcpy(t, d, sizeof t);
+	cc = 19;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = t[i] + cc;
+		cc = w >> 30;
+		t[i] = w & 0x3FFFFFFF;
+	}
+	cc = t[8] >> 15;
+	t[8] &= 0x7FFF;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+/*
+ * Perform a multiplication of two integers modulo 2^255-19.
+ * Operands are arrays of 9 words, each containing 30 bits of data, in
+ * little-endian order. Input value may be up to 2^256-1; on output, value
+ * fits on 256 bits and is lower than twice the modulus.
+ */
+static void
+f255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[18], cc;
+	int i;
+
+	/*
+	 * Compute raw multiplication. All result words fit in 30 bits
+	 * each; upper word (t[17]) must fit on 2 bits, since the product
+	 * of two 256-bit integers must fit on 512 bits.
+	 */
+	mul9(t, a, b);
+
+	/*
+	 * Modular reduction: each high word is added where necessary.
+	 * Since the modulus is 2^255-19 and word 9 corresponds to
+	 * offset 9*30 = 270, word 9+k must be added to word k with
+	 * a factor of 19*2^15 = 622592. The extra bits in word 8 are also
+	 * added that way.
+	 *
+	 * Keeping the carry on 32 bits helps with 32-bit architectures,
+	 * and does not noticeably impact performance on 64-bit systems.
+	 */
+	cc = MUL15(t[8] >> 15, 19);  /* at most 19*(2^15-1) = 622573 */
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint64_t w;
+
+		w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592);
+		t[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);  /* at most 622592 */
+	}
+
+	/*
+	 * Original product was up to (2^256-1)^2, i.e. a 512-bit integer.
+	 * This was split into two parts (upper of 257 bits, lower of 255
+	 * bits), and the upper was added to the lower with a factor 19,
+	 * which means that the intermediate value is less than 77*2^255
+	 * (19*2^257 + 2^255). Therefore, the extra bits "t[8] >> 15" are
+	 * less than 77, and the initial carry cc is at most 76*19 = 1444.
+	 */
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = t[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+
+	/*
+	 * Final result is at most 2^255 + 1443. In particular, the last
+	 * carry is necessarily 0, since t[8] was truncated to 15 bits.
+	 */
+}
+
+/*
+ * Perform a squaring of an integer modulo 2^255-19.
+ * Operands are arrays of 9 words, each containing 30 bits of data, in
+ * little-endian order. Input value may be up to 2^256-1; on output, value
+ * fits on 256 bits and is lower than twice the modulus.
+ */
+static void
+f255_square(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[18], cc;
+	int i;
+
+	/*
+	 * Compute raw squaring. All result words fit in 30 bits
+	 * each; upper word (t[17]) must fit on 2 bits, since the square
+	 * of a 256-bit integers must fit on 512 bits.
+	 */
+	square9(t, a);
+
+	/*
+	 * Modular reduction: each high word is added where necessary.
+	 * See f255_mul() for details on the reduction and carry limits.
+	 */
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint64_t w;
+
+		w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592);
+		t[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);
+	}
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = t[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+}
+
+/*
+ * Add two values in F255. Partial reduction is performed (down to less
+ * than twice the modulus).
+ */
+static void
+f255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Since operand words fit on 30 bits, we can use 32-bit
+	 * variables throughout.
+	 */
+	int i;
+	uint32_t cc, w;
+
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		w = a[i] + b[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	cc = MUL15(w >> 15, 19);
+	d[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+}
+
+/*
+ * Subtract one value from another in F255. Partial reduction is
+ * performed (down to less than twice the modulus).
+ */
+static void
+f255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * We actually compute a - b + 2*p, so that the final value is
+	 * necessarily positive.
+	 */
+	int i;
+	uint32_t cc, w;
+
+	cc = (uint32_t)-38;
+	for (i = 0; i < 9; i ++) {
+		w = a[i] - b[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = ARSH(w, 30);
+	}
+	cc = MUL15((w + 0x10000) >> 15, 19);
+	d[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+}
+
+/*
+ * Multiply an integer by the 'A24' constant (121665). Partial reduction
+ * is performed (down to less than twice the modulus).
+ */
+static void
+f255_mul_a24(uint32_t *d, const uint32_t *a)
+{
+	int i;
+	uint64_t w;
+	uint32_t cc;
+
+	/*
+	 * a[] is over 256 bits, thus a[8] has length at most 16 bits.
+	 * We single out the processing of the last word: intermediate
+	 * value w is up to 121665*2^16, yielding a carry for the next
+	 * loop of at most 19*(121665*2^16/2^15) = 4623289.
+	 */
+	cc = 0;
+	for (i = 0; i < 8; i ++) {
+		w = MUL31(a[i], 121665) + (uint64_t)cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);
+	}
+	w = MUL31(a[8], 121665) + (uint64_t)cc;
+	d[8] = (uint32_t)w & 0x7FFF;
+	cc = MUL15((uint32_t)(w >> 15), 19);
+
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = d[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+}
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 9; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	uint32_t x1[9], x2[9], x3[9], z2[9], z3[9];
+	uint32_t a[9], aa[9], b[9], bb[9];
+	uint32_t c[9], d[9], e[9], da[9], cb[9];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	x1[8] = le8_to_le30(x1, G, 32);
+	memcpy(x3, x1, sizeof x1);
+	memset(z2, 0, sizeof z2);
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z3, 0, sizeof z3);
+	z3[0] = 1;
+
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+
+		f255_add(a, x2, z2);
+		f255_square(aa, a);
+		f255_sub(b, x2, z2);
+		f255_square(bb, b);
+		f255_sub(e, aa, bb);
+		f255_add(c, x3, z3);
+		f255_sub(d, x3, z3);
+		f255_mul(da, d, a);
+		f255_mul(cb, c, b);
+
+		/* obsolete
+		print_int("a ", a);
+		print_int("aa", aa);
+		print_int("b ", b);
+		print_int("bb", bb);
+		print_int("e ", e);
+		print_int("c ", c);
+		print_int("d ", d);
+		print_int("da", da);
+		print_int("cb", cb);
+		*/
+
+		f255_add(x3, da, cb);
+		f255_square(x3, x3);
+		f255_sub(z3, da, cb);
+		f255_square(z3, z3);
+		f255_mul(z3, z3, x1);
+		f255_mul(x2, aa, bb);
+		f255_mul_a24(z2, e);
+		f255_add(z2, z2, aa);
+		f255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_square(a, a);
+		f255_mul(a, a, z2);
+	}
+	memcpy(b, a, sizeof a);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_square(b, b);
+		}
+		f255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_square(b, b);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(b, z2, b);
+		}
+	}
+	f255_mul(x2, x2, b);
+	reduce_final_f255(x2);
+	le30_to_le8(G, 32, x2);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m31 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
--- a/third_party/bearssl/src/ec_c25519_m62.c
+++ b/third_party/bearssl/src/ec_c25519_m62.c
@ -0,0 +1,605 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+/*
+ * A field element is encoded as five 64-bit integers, in basis 2^51.
+ * Limbs may be occasionally larger than 2^51, to save on carry
+ * propagation costs.
+ */
+
+#define MASK51   (((uint64_t)1 << 51) - (uint64_t)1)
+
+/*
+ * Swap two field elements, conditionally on a flag.
+ */
+static inline void
+f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl)
+{
+	uint64_t m, w;
+
+	m = -(uint64_t)ctl;
+	w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w;
+	w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w;
+	w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w;
+	w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w;
+	w = m & (a[4] ^ b[4]); a[4] ^= w; b[4] ^= w;
+}
+
+/*
+ * Addition with no carry propagation. Limbs double in size.
+ */
+static inline void
+f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	d[0] = a[0] + b[0];
+	d[1] = a[1] + b[1];
+	d[2] = a[2] + b[2];
+	d[3] = a[3] + b[3];
+	d[4] = a[4] + b[4];
+}
+
+/*
+ * Subtraction.
+ * On input, limbs must fit on 60 bits each. On output, result is
+ * partially reduced, with max value 2^255+19456; moreover, all
+ * limbs will fit on 51 bits, except the low limb, which may have
+ * value up to 2^51+19455.
+ */
+static inline void
+f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	uint64_t cc, w;
+
+	/*
+	 * We compute d = (2^255-19)*1024 + a - b. Since the limbs
+	 * fit on 60 bits, the maximum value of operands are slightly
+	 * more than 2^264, but much less than 2^265-19456. This
+	 * ensures that the result is positive.
+	 */
+
+	/*
+	 * Initial carry is 19456, since we add 2^265-19456. Each
+	 * individual subtraction may yield a carry up to 513.
+	 */
+	w = a[0] - b[0] - 19456;
+	d[0] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[1] - b[1] - cc;
+	d[1] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[2] - b[2] - cc;
+	d[2] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[3] - b[3] - cc;
+	d[3] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	d[4] = ((uint64_t)1 << 61) + a[4] - b[4] - cc;
+
+	/*
+	 * Partial reduction. The intermediate result may be up to
+	 * slightly above 2^265, but less than 2^265+2^255. When we
+	 * truncate to 255 bits, the upper bits will be at most 1024.
+	 */
+	d[0] += 19 * (d[4] >> 51);
+	d[4] &= MASK51;
+}
+
+/*
+ * UMUL51(hi, lo, x, y) computes:
+ *
+ *   hi = floor((x * y) / (2^51))
+ *   lo = x * y mod 2^51
+ *
+ * Note that lo < 2^51, but "hi" may be larger, if the input operands are
+ * larger.
+ */
+#if BR_INT128
+
+#define UMUL51(hi, lo, x, y)   do { \
+		unsigned __int128 umul_tmp; \
+		umul_tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \
+		(hi) = (uint64_t)(umul_tmp >> 51); \
+		(lo) = (uint64_t)umul_tmp & MASK51; \
+	} while (0)
+
+#elif BR_UMUL128
+
+#define UMUL51(hi, lo, x, y)   do { \
+		uint64_t umul_hi, umul_lo; \
+		umul_lo = _umul128((x), (y), &umul_hi); \
+		(hi) = (umul_hi << 13) | (umul_lo >> 51); \
+		(lo) = umul_lo & MASK51; \
+	} while (0)
+
+#endif
+
+/*
+ * Multiplication.
+ * On input, limbs must fit on 54 bits each.
+ * On output, limb 0 is at most 2^51 + 155647, and other limbs fit
+ * on 51 bits each.
+ */
+static inline void
+f255_mul(uint64_t *d, uint64_t *a, uint64_t *b)
+{
+	uint64_t t[10], hi, lo, w, cc;
+
+	/*
+	 * Perform cross products, accumulating values without carry
+	 * propagation.
+	 *
+	 * Since input limbs fit on 54 bits each, each individual
+	 * UMUL51 will produce a "hi" of less than 2^57. The maximum
+	 * sum will be at most 5*(2^57-1) + 4*(2^51-1) (for t[5]),
+	 * i.e. less than 324*2^51.
+	 */
+
+	UMUL51(t[1], t[0], a[0], b[0]);
+
+	UMUL51(t[2], lo, a[1], b[0]); t[1] += lo;
+	UMUL51(hi, lo, a[0], b[1]); t[1] += lo; t[2] += hi;
+
+	UMUL51(t[3], lo, a[2], b[0]); t[2] += lo;
+	UMUL51(hi, lo, a[1], b[1]); t[2] += lo; t[3] += hi;
+	UMUL51(hi, lo, a[0], b[2]); t[2] += lo; t[3] += hi;
+
+	UMUL51(t[4], lo, a[3], b[0]); t[3] += lo;
+	UMUL51(hi, lo, a[2], b[1]); t[3] += lo; t[4] += hi;
+	UMUL51(hi, lo, a[1], b[2]); t[3] += lo; t[4] += hi;
+	UMUL51(hi, lo, a[0], b[3]); t[3] += lo; t[4] += hi;
+
+	UMUL51(t[5], lo, a[4], b[0]); t[4] += lo;
+	UMUL51(hi, lo, a[3], b[1]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[2], b[2]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[1], b[3]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[0], b[4]); t[4] += lo; t[5] += hi;
+
+	UMUL51(t[6], lo, a[4], b[1]); t[5] += lo;
+	UMUL51(hi, lo, a[3], b[2]); t[5] += lo; t[6] += hi;
+	UMUL51(hi, lo, a[2], b[3]); t[5] += lo; t[6] += hi;
+	UMUL51(hi, lo, a[1], b[4]); t[5] += lo; t[6] += hi;
+
+	UMUL51(t[7], lo, a[4], b[2]); t[6] += lo;
+	UMUL51(hi, lo, a[3], b[3]); t[6] += lo; t[7] += hi;
+	UMUL51(hi, lo, a[2], b[4]); t[6] += lo; t[7] += hi;
+
+	UMUL51(t[8], lo, a[4], b[3]); t[7] += lo;
+	UMUL51(hi, lo, a[3], b[4]); t[7] += lo; t[8] += hi;
+
+	UMUL51(t[9], lo, a[4], b[4]); t[8] += lo;
+
+	/*
+	 * The upper words t[5]..t[9] are folded back into the lower
+	 * words, using the rule that 2^255 = 19 in the field.
+	 *
+	 * Since each t[i] is less than 324*2^51, the additions below
+	 * will yield less than 6480*2^51 in each limb; this fits in
+	 * 64 bits (6480*2^51 < 8192*2^51 = 2^64), hence there is
+	 * no overflow.
+	 */
+	t[0] += 19 * t[5];
+	t[1] += 19 * t[6];
+	t[2] += 19 * t[7];
+	t[3] += 19 * t[8];
+	t[4] += 19 * t[9];
+
+	/*
+	 * Propagate carries.
+	 */
+	w = t[0];
+	d[0] = w & MASK51;
+	cc = w >> 51;
+	w = t[1] + cc;
+	d[1] = w & MASK51;
+	cc = w >> 51;
+	w = t[2] + cc;
+	d[2] = w & MASK51;
+	cc = w >> 51;
+	w = t[3] + cc;
+	d[3] = w & MASK51;
+	cc = w >> 51;
+	w = t[4] + cc;
+	d[4] = w & MASK51;
+	cc = w >> 51;
+
+	/*
+	 * Since the limbs were 64-bit values, the top carry is at
+	 * most 8192 (in practice, that cannot be reached). We simply
+	 * performed a partial reduction.
+	 */
+	d[0] += 19 * cc;
+}
+
+/*
+ * Multiplication by A24 = 121665.
+ * Input must have limbs of 60 bits at most.
+ */
+static inline void
+f255_mul_a24(uint64_t *d, const uint64_t *a)
+{
+	uint64_t t[5], cc, w;
+
+	/*
+	 * 121665 = 15 * 8111. We first multiply by 15, with carry
+	 * propagation and partial reduction.
+	 */
+	w = a[0] * 15;
+	t[0] = w & MASK51;
+	cc = w >> 51;
+	w = a[1] * 15 + cc;
+	t[1] = w & MASK51;
+	cc = w >> 51;
+	w = a[2] * 15 + cc;
+	t[2] = w & MASK51;
+	cc = w >> 51;
+	w = a[3] * 15 + cc;
+	t[3] = w & MASK51;
+	cc = w >> 51;
+	w = a[4] * 15 + cc;
+	t[4] = w & MASK51;
+	t[0] += 19 * (w >> 51);
+
+	/*
+	 * Then multiplication by 8111. At that point, we known that
+	 * t[0] is less than 2^51 + 19*8192, and other limbs are less
+	 * than 2^51; thus, there will be no overflow.
+	 */
+	w = t[0] * 8111;
+	d[0] = w & MASK51;
+	cc = w >> 51;
+	w = t[1] * 8111 + cc;
+	d[1] = w & MASK51;
+	cc = w >> 51;
+	w = t[2] * 8111 + cc;
+	d[2] = w & MASK51;
+	cc = w >> 51;
+	w = t[3] * 8111 + cc;
+	d[3] = w & MASK51;
+	cc = w >> 51;
+	w = t[4] * 8111 + cc;
+	d[4] = w & MASK51;
+	d[0] += 19 * (w >> 51);
+}
+
+/*
+ * Finalize reduction.
+ * On input, limbs must fit on 51 bits, except possibly the low limb,
+ * which may be slightly above 2^51.
+ */
+static inline void
+f255_final_reduce(uint64_t *a)
+{
+	uint64_t t[5], cc, w;
+
+	/*
+	 * We add 19. If the result (in t[]) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	w = a[0] + 19;
+	t[0] = w & MASK51;
+	cc = w >> 51;
+	w = a[1] + cc;
+	t[1] = w & MASK51;
+	cc = w >> 51;
+	w = a[2] + cc;
+	t[2] = w & MASK51;
+	cc = w >> 51;
+	w = a[3] + cc;
+	t[3] = w & MASK51;
+	cc = w >> 51;
+	w = a[4] + cc;
+	t[4] = w & MASK51;
+	cc = w >> 51;
+
+	/*
+	 * The bit 255 of t is in cc. If that bit is 0, when a[] must
+	 * be unchanged; otherwise, it must be replaced with t[].
+	 */
+	cc = -cc;
+	a[0] ^= cc & (a[0] ^ t[0]);
+	a[1] ^= cc & (a[1] ^ t[1]);
+	a[2] ^= cc & (a[2] ^ t[2]);
+	a[3] ^= cc & (a[3] ^ t[3]);
+	a[4] ^= cc & (a[4] ^ t[4]);
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	unsigned char k[32];
+	uint64_t x1[5], x2[5], z2[5], x3[5], z3[5];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+
+	/*
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared; the "& MASK51" in the initialization for
+	 * x1[4] clears that bit.
+	 */
+	x1[0] = br_dec64le(&G[0]) & MASK51;
+	x1[1] = (br_dec64le(&G[6]) >> 3) & MASK51;
+	x1[2] = (br_dec64le(&G[12]) >> 6) & MASK51;
+	x1[3] = (br_dec64le(&G[19]) >> 1) & MASK51;
+	x1[4] = (br_dec64le(&G[24]) >> 12) & MASK51;
+
+	/*
+	 * We can use memset() to clear values, because exact-width types
+	 * like uint64_t are guaranteed to have no padding bits or
+	 * trap representations.
+	 */
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z2, 0, sizeof z2);
+	memcpy(x3, x1, sizeof x1);
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * The multiplier is provided in big-endian notation, and
+	 * possibly shorter than 32 bytes.
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	swap = 0;
+
+	for (i = 254; i >= 0; i --) {
+		uint64_t a[5], aa[5], b[5], bb[5], e[5];
+		uint64_t c[5], d[5], da[5], cb[5];
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		f255_cswap(x2, x3, swap);
+		f255_cswap(z2, z3, swap);
+		swap = kt;
+
+		/*
+		 * At that point, limbs of x_2 and z_2 are assumed to fit
+		 * on at most 52 bits each.
+		 *
+		 * Each f255_add() adds one bit to the maximum range of
+		 * the values, but f255_sub() and f255_mul() bring back
+		 * the limbs into 52 bits. All f255_add() outputs are
+		 * used only as inputs for f255_mul(), which ensures
+		 * that limbs remain in the proper range.
+		 */
+
+		/* A = x_2 + z_2   -- limbs fit on 53 bits each */
+		f255_add(a, x2, z2);
+
+		/* AA = A^2 */
+		f255_mul(aa, a, a);
+
+		/* B = x_2 - z_2 */
+		f255_sub(b, x2, z2);
+
+		/* BB = B^2 */
+		f255_mul(bb, b, b);
+
+		/* E = AA - BB */
+		f255_sub(e, aa, bb);
+
+		/* C = x_3 + z_3   -- limbs fit on 53 bits each */
+		f255_add(c, x3, z3);
+
+		/* D = x_3 - z_3 */
+		f255_sub(d, x3, z3);
+
+		/* DA = D * A */
+		f255_mul(da, d, a);
+
+		/* CB = C * B */
+		f255_mul(cb, c, b);
+
+		/* x_3 = (DA + CB)^2 */
+		f255_add(x3, da, cb);
+		f255_mul(x3, x3, x3);
+
+		/* z_3 = x_1 * (DA - CB)^2 */
+		f255_sub(z3, da, cb);
+		f255_mul(z3, z3, z3);
+		f255_mul(z3, x1, z3);
+
+		/* x_2 = AA * BB */
+		f255_mul(x2, aa, bb);
+
+		/* z_2 = E * (AA + a24 * E) */
+		f255_mul_a24(z2, e);
+		f255_add(z2, aa, z2);
+		f255_mul(z2, e, z2);
+	}
+
+	f255_cswap(x2, x3, swap);
+	f255_cswap(z2, z3, swap);
+
+	/*
+	 * Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize
+	 * most non-squarings. We use x1 and x3, now useless, as temporaries.
+	 */
+	memcpy(x1, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_mul(x1, x1, x1);
+		f255_mul(x1, x1, z2);
+	}
+	memcpy(x3, x1, sizeof x1);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_mul(x3, x3, x3);
+		}
+		f255_mul(x3, x3, x1);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_mul(x3, x3, x3);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(x3, z2, x3);
+		}
+	}
+
+	/*
+	 * Compute x2/z2. We have 1/z2 in x3.
+	 */
+	f255_mul(x2, x2, x3);
+	f255_final_reduce(x2);
+
+	/*
+	 * Encode the final x2 value in little-endian. We first assemble
+	 * the limbs into 64-bit values.
+	 */
+	x2[0] |= x2[1] << 51;
+	x2[1] = (x2[1] >> 13) | (x2[2] << 38);
+	x2[2] = (x2[2] >> 26) | (x2[3] << 25);
+	x2[3] = (x2[3] >> 39) | (x2[4] << 12);
+	br_enc64le(G, x2[0]);
+	br_enc64le(G + 8, x2[1]);
+	br_enc64le(G + 16, x2[2]);
+	br_enc64le(G + 24, x2[3]);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m62 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m62_get(void)
+{
+	return &br_ec_c25519_m62;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m62_get(void)
+{
+	return 0;
+}
+
+#endif
--- a/third_party/bearssl/src/ec_c25519_m64.c
+++ b/third_party/bearssl/src/ec_c25519_m64.c
@ -0,0 +1,831 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+/*
+ * A field element is encoded as four 64-bit integers, in basis 2^63.
+ * Operations return partially reduced values, which may range up to
+ * 2^255+37.
+ */
+
+#define MASK63   (((uint64_t)1 << 63) - (uint64_t)1)
+
+/*
+ * Swap two field elements, conditionally on a flag.
+ */
+static inline void
+f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl)
+{
+	uint64_t m, w;
+
+	m = -(uint64_t)ctl;
+	w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w;
+	w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w;
+	w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w;
+	w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w;
+}
+
+/*
+ * Addition in the field.
+ */
+static inline void
+f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3, cc;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] + (unsigned __int128)b[0];
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] + (unsigned __int128)b[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] + (unsigned __int128)b[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[3] + (unsigned __int128)b[3] + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+	cc = (uint64_t)(z >> 63);
+
+	/*
+	 * Since operands are at most 2^255+37, the sum is at most
+	 * 2^256+74; thus, the carry cc is equal to 0, 1 or 2.
+	 *
+	 * We use: 2^255 = 19 mod p.
+	 * Since we add 0, 19 or 38 to a value that fits on 255 bits,
+	 * the result is at most 2^255+37.
+	 */
+	z = (unsigned __int128)t0 + (unsigned __int128)(19 * cc);
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, cc;
+	unsigned char k;
+
+	k = _addcarry_u64(0, a[0], b[0], &t0);
+	k = _addcarry_u64(k, a[1], b[1], &t1);
+	k = _addcarry_u64(k, a[2], b[2], &t2);
+	k = _addcarry_u64(k, a[3], b[3], &t3);
+	cc = (k << 1) + (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Since operands are at most 2^255+37, the sum is at most
+	 * 2^256+74; thus, the carry cc is equal to 0, 1 or 2.
+	 *
+	 * We use: 2^255 = 19 mod p.
+	 * Since we add 0, 19 or 38 to a value that fits on 255 bits,
+	 * the result is at most 2^255+37.
+	 */
+	k = _addcarry_u64(0, t0, 19 * cc, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Subtraction.
+ */
+static inline void
+f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	/*
+	 * We compute t = 2^256 - 38 + a - b, which is necessarily
+	 * positive but lower than 2^256 + 2^255, since a <= 2^255 + 37
+	 * and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending
+	 * on the two upper bits of t (bits 255 and 256).
+	 */
+
+	uint64_t t0, t1, t2, t3, t4, cc;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] - (unsigned __int128)b[0] - 38;
+	t0 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[1] - (unsigned __int128)b[1]
+		- (unsigned __int128)cc;
+	t1 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[2] - (unsigned __int128)b[2]
+		- (unsigned __int128)cc;
+	t2 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[3] - (unsigned __int128)b[3]
+		- (unsigned __int128)cc;
+	t3 = (uint64_t)z;
+	t4 = 1 + (uint64_t)(z >> 64);
+
+	/*
+	 * We have a 257-bit result. The two top bits can be 00, 01 or 10,
+	 * but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1).
+	 * Therefore, we can truncate to 255 bits, and add 0, 19 or 38.
+	 * This guarantees that the result is at most 2^255+37.
+	 */
+	cc = (38 & -t4) + (19 & -(t3 >> 63));
+	t3 &= MASK63;
+	z = (unsigned __int128)t0 + (unsigned __int128)cc;
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	/*
+	 * We compute t = 2^256 - 38 + a - b, which is necessarily
+	 * positive but lower than 2^256 + 2^255, since a <= 2^255 + 37
+	 * and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending
+	 * on the two upper bits of t (bits 255 and 256).
+	 */
+
+	uint64_t t0, t1, t2, t3, t4;
+	unsigned char k;
+
+	k = _subborrow_u64(0, a[0], b[0], &t0);
+	k = _subborrow_u64(k, a[1], b[1], &t1);
+	k = _subborrow_u64(k, a[2], b[2], &t2);
+	k = _subborrow_u64(k, a[3], b[3], &t3);
+	(void)_subborrow_u64(k, 1, 0, &t4);
+
+	k = _subborrow_u64(0, t0, 38, &t0);
+	k = _subborrow_u64(k, t1, 0, &t1);
+	k = _subborrow_u64(k, t2, 0, &t2);
+	k = _subborrow_u64(k, t3, 0, &t3);
+	(void)_subborrow_u64(k, t4, 0, &t4);
+
+	/*
+	 * We have a 257-bit result. The two top bits can be 00, 01 or 10,
+	 * but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1).
+	 * Therefore, we can truncate to 255 bits, and add 0, 19 or 38.
+	 * This guarantees that the result is at most 2^255+37.
+	 */
+	t4 = (38 & -t4) + (19 & -(t3 >> 63));
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, t4, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Multiplication.
+ */
+static inline void
+f255_mul(uint64_t *d, uint64_t *a, uint64_t *b)
+{
+#if BR_INT128
+
+	unsigned __int128 z;
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th;
+
+	/*
+	 * Compute the product a*b over plain integers.
+	 */
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[0];
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[3] + (z >> 64);
+	t3 = (uint64_t)z;
+	t4 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t1;
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t2 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t3 + (z >> 64);
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	t5 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t2;
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t3 + (z >> 64);
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t5 + (z >> 64);
+	t5 = (uint64_t)z;
+	t6 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t3;
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t5 + (z >> 64);
+	t5 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t6 + (z >> 64);
+	t6 = (uint64_t)z;
+	t7 = (uint64_t)(z >> 64);
+
+	/*
+	 * Modulo p, we have:
+	 *
+	 *   2^255 = 19
+	 *   2^510 = 19*19 = 361
+	 *
+	 * We split the intermediate t into three parts, in basis
+	 * 2^255. The low one will be in t0..t3; the middle one in t4..t7.
+	 * The upper one can only be a single bit (th), since the
+	 * multiplication operands are at most 2^255+37 each.
+	 */
+	th = t7 >> 62;
+	t7 = ((t7 << 1) | (t6 >> 63)) & MASK63;
+	t6 = (t6 << 1) | (t5 >> 63);
+	t5 = (t5 << 1) | (t4 >> 63);
+	t4 = (t4 << 1) | (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Multiply the middle part (t4..t7) by 19. We truncate it to
+	 * 255 bits; the extra bits will go along with th.
+	 */
+	z = (unsigned __int128)t4 * 19;
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)t5 * 19 + (z >> 64);
+	t5 = (uint64_t)z;
+	z = (unsigned __int128)t6 * 19 + (z >> 64);
+	t6 = (uint64_t)z;
+	z = (unsigned __int128)t7 * 19 + (z >> 64);
+	t7 = (uint64_t)z & MASK63;
+
+	th = (361 & -th) + (19 * (uint64_t)(z >> 63));
+
+	/*
+	 * Add elements together.
+	 * At this point:
+	 *   t0..t3 fits on 255 bits.
+	 *   t4..t7 fits on 255 bits.
+	 *   th <= 361 + 342 = 703.
+	 */
+	z = (unsigned __int128)t0 + (unsigned __int128)t4
+		+ (unsigned __int128)th;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)t1 + (unsigned __int128)t5 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)t2 + (unsigned __int128)t6 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)t3 + (unsigned __int128)t7 + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+	th = (uint64_t)(z >> 63);
+
+	/*
+	 * Since the sum is at most 2^256 + 703, the two upper bits, in th,
+	 * can only have value 0, 1 or 2. We just add th*19, which
+	 * guarantees a result of at most 2^255+37.
+	 */
+	z = (unsigned __int128)t0 + (19 * th);
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th;
+	uint64_t h0, h1, h2, h3;
+	unsigned char k;
+
+	/*
+	 * Compute the product a*b over plain integers.
+	 */
+	t0 = _umul128(a[0], b[0], &h0);
+	t1 = _umul128(a[0], b[1], &h1);
+	k = _addcarry_u64(0, t1, h0, &t1);
+	t2 = _umul128(a[0], b[2], &h2);
+	k = _addcarry_u64(k, t2, h1, &t2);
+	t3 = _umul128(a[0], b[3], &h3);
+	k = _addcarry_u64(k, t3, h2, &t3);
+	(void)_addcarry_u64(k, h3, 0, &t4);
+
+	k = _addcarry_u64(0, _umul128(a[1], b[0], &h0), t1, &t1);
+	k = _addcarry_u64(k, _umul128(a[1], b[1], &h1), t2, &t2);
+	k = _addcarry_u64(k, _umul128(a[1], b[2], &h2), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[1], b[3], &h3), t4, &t4);
+	t5 = k;
+	k = _addcarry_u64(0, t2, h0, &t2);
+	k = _addcarry_u64(k, t3, h1, &t3);
+	k = _addcarry_u64(k, t4, h2, &t4);
+	(void)_addcarry_u64(k, t5, h3, &t5);
+
+	k = _addcarry_u64(0, _umul128(a[2], b[0], &h0), t2, &t2);
+	k = _addcarry_u64(k, _umul128(a[2], b[1], &h1), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[2], b[2], &h2), t4, &t4);
+	k = _addcarry_u64(k, _umul128(a[2], b[3], &h3), t5, &t5);
+	t6 = k;
+	k = _addcarry_u64(0, t3, h0, &t3);
+	k = _addcarry_u64(k, t4, h1, &t4);
+	k = _addcarry_u64(k, t5, h2, &t5);
+	(void)_addcarry_u64(k, t6, h3, &t6);
+
+	k = _addcarry_u64(0, _umul128(a[3], b[0], &h0), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[3], b[1], &h1), t4, &t4);
+	k = _addcarry_u64(k, _umul128(a[3], b[2], &h2), t5, &t5);
+	k = _addcarry_u64(k, _umul128(a[3], b[3], &h3), t6, &t6);
+	t7 = k;
+	k = _addcarry_u64(0, t4, h0, &t4);
+	k = _addcarry_u64(k, t5, h1, &t5);
+	k = _addcarry_u64(k, t6, h2, &t6);
+	(void)_addcarry_u64(k, t7, h3, &t7);
+
+	/*
+	 * Modulo p, we have:
+	 *
+	 *   2^255 = 19
+	 *   2^510 = 19*19 = 361
+	 *
+	 * We split the intermediate t into three parts, in basis
+	 * 2^255. The low one will be in t0..t3; the middle one in t4..t7.
+	 * The upper one can only be a single bit (th), since the
+	 * multiplication operands are at most 2^255+37 each.
+	 */
+	th = t7 >> 62;
+	t7 = ((t7 << 1) | (t6 >> 63)) & MASK63;
+	t6 = (t6 << 1) | (t5 >> 63);
+	t5 = (t5 << 1) | (t4 >> 63);
+	t4 = (t4 << 1) | (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Multiply the middle part (t4..t7) by 19. We truncate it to
+	 * 255 bits; the extra bits will go along with th.
+	 */
+	t4 = _umul128(t4, 19, &h0);
+	t5 = _umul128(t5, 19, &h1);
+	t6 = _umul128(t6, 19, &h2);
+	t7 = _umul128(t7, 19, &h3);
+	k = _addcarry_u64(0, t5, h0, &t5);
+	k = _addcarry_u64(k, t6, h1, &t6);
+	k = _addcarry_u64(k, t7, h2, &t7);
+	(void)_addcarry_u64(k, h3, 0, &h3);
+	th = (361 & -th) + (19 * ((h3 << 1) + (t7 >> 63)));
+	t7 &= MASK63;
+
+	/*
+	 * Add elements together.
+	 * At this point:
+	 *   t0..t3 fits on 255 bits.
+	 *   t4..t7 fits on 255 bits.
+	 *   th <= 361 + 342 = 703.
+	 */
+	k = _addcarry_u64(0, t0, t4, &t0);
+	k = _addcarry_u64(k, t1, t5, &t1);
+	k = _addcarry_u64(k, t2, t6, &t2);
+	k = _addcarry_u64(k, t3, t7, &t3);
+	t4 = k;
+	k = _addcarry_u64(0, t0, th, &t0);
+	k = _addcarry_u64(k, t1, 0, &t1);
+	k = _addcarry_u64(k, t2, 0, &t2);
+	k = _addcarry_u64(k, t3, 0, &t3);
+	(void)_addcarry_u64(k, t4, 0, &t4);
+
+	th = (t4 << 1) + (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Since the sum is at most 2^256 + 703, the two upper bits, in th,
+	 * can only have value 0, 1 or 2. We just add th*19, which
+	 * guarantees a result of at most 2^255+37.
+	 */
+	k = _addcarry_u64(0, t0, 19 * th, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Multiplication by A24 = 121665.
+ */
+static inline void
+f255_mul_a24(uint64_t *d, const uint64_t *a)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] * 121665;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * 121665 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * 121665 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * 121665 + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+
+	z = (unsigned __int128)t0 + (19 * (uint64_t)(z >> 63));
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	t2 = (uint64_t)z;
+	t3 = t3 + (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)t0 + (19 & -(t3 >> 63));
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = (t3 & MASK63) + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, t4, h0, h1, h2, h3;
+	unsigned char k;
+
+	t0 = _umul128(a[0], 121665, &h0);
+	t1 = _umul128(a[1], 121665, &h1);
+	k = _addcarry_u64(0, t1, h0, &t1);
+	t2 = _umul128(a[2], 121665, &h2);
+	k = _addcarry_u64(k, t2, h1, &t2);
+	t3 = _umul128(a[3], 121665, &h3);
+	k = _addcarry_u64(k, t3, h2, &t3);
+	(void)_addcarry_u64(k, h3, 0, &t4);
+
+	t4 = (t4 << 1) + (t3 >> 63);
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, 19 * t4, &t0);
+	k = _addcarry_u64(k, t1, 0, &t1);
+	k = _addcarry_u64(k, t2, 0, &t2);
+	(void)_addcarry_u64(k, t3, 0, &t3);
+
+	t4 = 19 & -(t3 >> 63);
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, t4, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Finalize reduction.
+ */
+static inline void
+f255_final_reduce(uint64_t *a)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3, m;
+	unsigned __int128 z;
+
+	/*
+	 * We add 19. If the result (in t) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	z = (unsigned __int128)a[0] + 19;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	t3 = a[3] + (uint64_t)(z >> 64);
+
+	m = -(t3 >> 63);
+	t3 &= MASK63;
+	a[0] ^= m & (a[0] ^ t0);
+	a[1] ^= m & (a[1] ^ t1);
+	a[2] ^= m & (a[2] ^ t2);
+	a[3] ^= m & (a[3] ^ t3);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, m;
+	unsigned char k;
+
+	/*
+	 * We add 19. If the result (in t) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	k = _addcarry_u64(0, a[0], 19, &t0);
+	k = _addcarry_u64(k, a[1], 0, &t1);
+	k = _addcarry_u64(k, a[2], 0, &t2);
+	(void)_addcarry_u64(k, a[3], 0, &t3);
+
+	m = -(t3 >> 63);
+	t3 &= MASK63;
+	a[0] ^= m & (a[0] ^ t0);
+	a[1] ^= m & (a[1] ^ t1);
+	a[2] ^= m & (a[2] ^ t2);
+	a[3] ^= m & (a[3] ^ t3);
+
+#endif
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	unsigned char k[32];
+	uint64_t x1[4], x2[4], z2[4], x3[4], z3[4];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+
+	/*
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	x1[0] = br_dec64le(&G[ 0]);
+	x1[1] = br_dec64le(&G[ 8]);
+	x1[2] = br_dec64le(&G[16]);
+	x1[3] = br_dec64le(&G[24]) & MASK63;
+
+	/*
+	 * We can use memset() to clear values, because exact-width types
+	 * like uint64_t are guaranteed to have no padding bits or
+	 * trap representations.
+	 */
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z2, 0, sizeof z2);
+	memcpy(x3, x1, sizeof x1);
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * The multiplier is provided in big-endian notation, and
+	 * possibly shorter than 32 bytes.
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	swap = 0;
+
+	for (i = 254; i >= 0; i --) {
+		uint64_t a[4], aa[4], b[4], bb[4], e[4];
+		uint64_t c[4], d[4], da[4], cb[4];
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		f255_cswap(x2, x3, swap);
+		f255_cswap(z2, z3, swap);
+		swap = kt;
+
+		/* A = x_2 + z_2 */
+		f255_add(a, x2, z2);
+
+		/* AA = A^2 */
+		f255_mul(aa, a, a);
+
+		/* B = x_2 - z_2 */
+		f255_sub(b, x2, z2);
+
+		/* BB = B^2 */
+		f255_mul(bb, b, b);
+
+		/* E = AA - BB */
+		f255_sub(e, aa, bb);
+
+		/* C = x_3 + z_3 */
+		f255_add(c, x3, z3);
+
+		/* D = x_3 - z_3 */
+		f255_sub(d, x3, z3);
+
+		/* DA = D * A */
+		f255_mul(da, d, a);
+
+		/* CB = C * B */
+		f255_mul(cb, c, b);
+
+		/* x_3 = (DA + CB)^2 */
+		f255_add(x3, da, cb);
+		f255_mul(x3, x3, x3);
+
+		/* z_3 = x_1 * (DA - CB)^2 */
+		f255_sub(z3, da, cb);
+		f255_mul(z3, z3, z3);
+		f255_mul(z3, x1, z3);
+
+		/* x_2 = AA * BB */
+		f255_mul(x2, aa, bb);
+
+		/* z_2 = E * (AA + a24 * E) */
+		f255_mul_a24(z2, e);
+		f255_add(z2, aa, z2);
+		f255_mul(z2, e, z2);
+	}
+
+	f255_cswap(x2, x3, swap);
+	f255_cswap(z2, z3, swap);
+
+	/*
+	 * Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize
+	 * most non-squarings. We use x1 and x3, now useless, as temporaries.
+	 */
+	memcpy(x1, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_mul(x1, x1, x1);
+		f255_mul(x1, x1, z2);
+	}
+	memcpy(x3, x1, sizeof x1);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_mul(x3, x3, x3);
+		}
+		f255_mul(x3, x3, x1);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_mul(x3, x3, x3);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(x3, z2, x3);
+		}
+	}
+
+	/*
+	 * Compute x2/z2. We have 1/z2 in x3.
+	 */
+	f255_mul(x2, x2, x3);
+	f255_final_reduce(x2);
+
+	/*
+	 * Encode the final x2 value in little-endian.
+	 */
+	br_enc64le(G,      x2[0]);
+	br_enc64le(G +  8, x2[1]);
+	br_enc64le(G + 16, x2[2]);
+	br_enc64le(G + 24, x2[3]);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m64 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m64_get(void)
+{
+	return &br_ec_c25519_m64;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m64_get(void)
+{
+	return 0;
+}
+
+#endif
--- a/third_party/bearssl/src/ec_curve25519.c
+++ b/third_party/bearssl/src/ec_curve25519.c
@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+/* see inner.h */
+const br_ec_curve_def br_curve25519 = {
+	BR_EC_curve25519,
+	ORDER, sizeof ORDER,
+	GEN, sizeof GEN
+};
--- a/third_party/bearssl/src/ec_default.c
+++ b/third_party/bearssl/src/ec_default.c
@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ec_all_m15;
+#else
+	return &br_ec_all_m31;
+#endif
+}
--- a/third_party/bearssl/src/ec_keygen.c
+++ b/third_party/bearssl/src/ec_keygen.c
@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+size_t
+br_ec_keygen(const br_prng_class **rng_ctx,
+	const br_ec_impl *impl, br_ec_private_key *sk,
+	void *kbuf, int curve)
+{
+	const unsigned char *order;
+	unsigned char *buf;
+	size_t len;
+	unsigned mask;
+
+	if (curve < 0 || curve >= 32
+		|| ((impl->supported_curves >> curve) & 1) == 0)
+	{
+		return 0;
+	}
+	order = impl->order(curve, &len);
+	while (len > 0 && *order == 0) {
+		order ++;
+		len --;
+	}
+	if (kbuf == NULL || len == 0) {
+		return len;
+	}
+	mask = order[0];
+	mask |= (mask >> 1);
+	mask |= (mask >> 2);
+	mask |= (mask >> 4);
+
+	/*
+	 * We generate sequences of random bits of the right size, until
+	 * the value is strictly lower than the curve order (we also
+	 * check for all-zero values, which are invalid).
+	 */
+	buf = kbuf;
+	for (;;) {
+		size_t u;
+		unsigned cc, zz;
+
+		(*rng_ctx)->generate(rng_ctx, buf, len);
+		buf[0] &= mask;
+		cc = 0;
+		u = len;
+		zz = 0;
+		while (u -- > 0) {
+			cc = ((unsigned)(buf[u] - order[u] - cc) >> 8) & 1;
+			zz |= buf[u];
+		}
+		if (cc != 0 && zz != 0) {
+			break;
+		}
+	}
+
+	if (sk != NULL) {
+		sk->curve = curve;
+		sk->x = buf;
+		sk->xlen = len;
+	}
+	return len;
+}
--- a/third_party/bearssl/src/ec_p256_m15.c
+++ b/third_party/bearssl/src/ec_p256_m15.c
--- a/third_party/bearssl/src/ec_p256_m31.c
+++ b/third_party/bearssl/src/ec_p256_m31.c
--- a/third_party/bearssl/src/ec_p256_m62.c
+++ b/third_party/bearssl/src/ec_p256_m62.c
--- a/third_party/bearssl/src/ec_p256_m64.c
+++ b/third_party/bearssl/src/ec_p256_m64.c
--- a/third_party/bearssl/src/ec_prime_i15.c
+++ b/third_party/bearssl/src/ec_prime_i15.c
@ -0,0 +1,824 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for supported curves:
+ *   - field modulus p
+ *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
+ *   - b*R mod p (b is the second curve equation parameter)
+ */
+
+static const uint16_t P256_P[] = {
+	0x0111,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
+	0x7FFF, 0x0001
+};
+
+static const uint16_t P256_R2[] = {
+	0x0111,
+	0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
+	0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
+	0x4FFF, 0x0000
+};
+
+static const uint16_t P256_B[] = {
+	0x0111,
+	0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
+	0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
+	0x0187, 0x0000
+};
+
+static const uint16_t P384_P[] = {
+	0x0199,
+	0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
+	0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x01FF
+};
+
+static const uint16_t P384_R2[] = {
+	0x0199,
+	0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
+	0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
+	0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000
+};
+
+static const uint16_t P384_B[] = {
+	0x0199,
+	0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
+	0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
+	0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
+	0x0452, 0x0084
+};
+
+static const uint16_t P521_P[] = {
+	0x022B,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x07FF
+};
+
+static const uint16_t P521_R2[] = {
+	0x022B,
+	0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000
+};
+
+static const uint16_t P521_B[] = {
+	0x022B,
+	0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
+	0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
+	0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
+	0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
+	0x1618, 0x27D7, 0x0465
+};
+
+typedef struct {
+	const uint16_t *p;
+	const uint16_t *b;
+	const uint16_t *R2;
+	uint16_t p0i;
+	size_t point_len;
+} curve_params;
+
+static inline const curve_params *
+id_to_curve(int curve)
+{
+	static const curve_params pp[] = {
+		{ P256_P, P256_B, P256_R2, 0x0001,  65 },
+		{ P384_P, P384_B, P384_R2, 0x0001,  97 },
+		{ P521_P, P521_B, P521_R2, 0x0001, 133 }
+	};
+
+	return &pp[curve - BR_EC_secp256r1];
+}
+
+#define I15_LEN   ((BR_MAX_EC_SIZE + 29) / 15)
+
+/*
+ * Type for a point in Jacobian coordinates:
+ * -- three values, x, y and z, in Montgomery representation
+ * -- affine coordinates are X = x / z^2 and Y = y / z^3
+ * -- for the point at infinity, z = 0
+ */
+typedef struct {
+	uint16_t c[3][I15_LEN];
+} jacobian;
+
+/*
+ * We use a custom interpreter that uses a dozen registers, and
+ * only six operations:
+ *    MSET(d, a)       copy a into d
+ *    MADD(d, a)       d = d+a (modular)
+ *    MSUB(d, a)       d = d-a (modular)
+ *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
+ *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
+ *    MTZ(d)           clear return value if d = 0
+ * Destination of MMUL (d) must be distinct from operands (a and b).
+ * There is no such constraint for MSUB and MADD.
+ *
+ * Registers include the operand coordinates, and temporaries.
+ */
+#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
+#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
+#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
+#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
+#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
+#define MTZ(d)          (0x5000 + ((d) << 8))
+#define ENDCODE         0
+
+/*
+ * Registers for the input operands.
+ */
+#define P1x    0
+#define P1y    1
+#define P1z    2
+#define P2x    3
+#define P2y    4
+#define P2z    5
+
+/*
+ * Alternate names for the first input operand.
+ */
+#define Px     0
+#define Py     1
+#define Pz     2
+
+/*
+ * Temporaries.
+ */
+#define t1     6
+#define t2     7
+#define t3     8
+#define t4     9
+#define t5    10
+#define t6    11
+#define t7    12
+
+/*
+ * Extra scratch registers available when there is no second operand (e.g.
+ * for "double" and "affine").
+ */
+#define t8     3
+#define t9     4
+#define t10    5
+
+/*
+ * Doubling formulas are:
+ *
+ *   s = 4*x*y^2
+ *   m = 3*(x + z^2)*(x - z^2)
+ *   x' = m^2 - 2*s
+ *   y' = m*(s - x') - 8*y^4
+ *   z' = 2*y*z
+ *
+ * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
+ * should. This case should not happen anyway, because our curves have
+ * prime order, and thus do not contain any point of order 2.
+ *
+ * If P is infinity (z = 0), then again the formulas yield infinity,
+ * which is correct. Thus, this code works for all points.
+ *
+ * Cost: 8 multiplications
+ */
+static const uint16_t code_double[] = {
+	/*
+	 * Compute z^2 (in t1).
+	 */
+	MMUL(t1, Pz, Pz),
+
+	/*
+	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
+	 */
+	MSET(t2, Px),
+	MSUB(t2, t1),
+	MADD(t1, Px),
+
+	/*
+	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
+	 */
+	MMUL(t3, t1, t2),
+	MSET(t1, t3),
+	MADD(t1, t3),
+	MADD(t1, t3),
+
+	/*
+	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	MMUL(t3, Py, Py),
+	MADD(t3, t3),
+	MMUL(t2, Px, t3),
+	MADD(t2, t2),
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	MMUL(Px, t1, t1),
+	MSUB(Px, t2),
+	MSUB(Px, t2),
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	MMUL(t4, Py, Pz),
+	MSET(Pz, t4),
+	MADD(Pz, t4),
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	MSUB(t2, Px),
+	MMUL(Py, t1, t2),
+	MMUL(t4, t3, t3),
+	MSUB(Py, t4),
+	MSUB(Py, t4),
+
+	ENDCODE
+};
+
+/*
+ * Addtions formulas are:
+ *
+ *   u1 = x1 * z2^2
+ *   u2 = x2 * z1^2
+ *   s1 = y1 * z2^3
+ *   s2 = y2 * z1^3
+ *   h = u2 - u1
+ *   r = s2 - s1
+ *   x3 = r^2 - h^3 - 2 * u1 * h^2
+ *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+ *   z3 = h * z1 * z2
+ *
+ * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
+ * z3 == 0, so the result is correct.
+ * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
+ * not correct.
+ * h == 0 only if u1 == u2; this happens in two cases:
+ * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
+ * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
+ *
+ * Thus, the following situations are not handled correctly:
+ * -- P1 = 0 and P2 != 0
+ * -- P1 != 0 and P2 = 0
+ * -- P1 = P2
+ * All other cases are properly computed. However, even in "incorrect"
+ * situations, the three coordinates still are properly formed field
+ * elements.
+ *
+ * The returned flag is cleared if r == 0. This happens in the following
+ * cases:
+ * -- Both points are on the same horizontal line (same Y coordinate).
+ * -- Both points are infinity.
+ * -- One point is infinity and the other is on line Y = 0.
+ * The third case cannot happen with our curves (there is no valid point
+ * on line Y = 0 since that would be a point of order 2). If the two
+ * source points are non-infinity, then remains only the case where the
+ * two points are on the same horizontal line.
+ *
+ * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
+ * P2 != 0:
+ * -- If the returned value is not the point at infinity, then it was properly
+ * computed.
+ * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
+ * is indeed the point at infinity.
+ * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
+ * use the 'double' code.
+ *
+ * Cost: 16 multiplications
+ */
+static const uint16_t code_add[] = {
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	MMUL(t3, P2z, P2z),
+	MMUL(t1, P1x, t3),
+	MMUL(t4, P2z, t3),
+	MMUL(t3, P1y, t4),
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	MMUL(t4, P1z, P1z),
+	MMUL(t2, P2x, t4),
+	MMUL(t5, P1z, t4),
+	MMUL(t4, P2y, t5),
+
+	/*
+	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 */
+	MSUB(t2, t1),
+	MSUB(t4, t3),
+
+	/*
+	 * Report cases where r = 0 through the returned flag.
+	 */
+	MTZ(t4),
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5).
+	 */
+	MMUL(t7, t2, t2),
+	MMUL(t6, t1, t7),
+	MMUL(t5, t7, t2),
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 * t1 and t7 can be used as scratch registers.
+	 */
+	MMUL(P1x, t4, t4),
+	MSUB(P1x, t5),
+	MSUB(P1x, t6),
+	MSUB(P1x, t6),
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	MSUB(t6, P1x),
+	MMUL(P1y, t4, t6),
+	MMUL(t1, t5, t3),
+	MSUB(P1y, t1),
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	MMUL(t1, P1z, P2z),
+	MMUL(P1z, t1, t2),
+
+	ENDCODE
+};
+
+/*
+ * Check that the point is on the curve. This code snippet assumes the
+ * following conventions:
+ * -- Coordinates x and y have been freshly decoded in P1 (but not
+ * converted to Montgomery coordinates yet).
+ * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
+ */
+static const uint16_t code_check[] = {
+
+	/* Convert x and y to Montgomery representation. */
+	MMUL(t1, P1x, P2x),
+	MMUL(t2, P1y, P2x),
+	MSET(P1x, t1),
+	MSET(P1y, t2),
+
+	/* Compute x^3 in t1. */
+	MMUL(t2, P1x, P1x),
+	MMUL(t1, P1x, t2),
+
+	/* Subtract 3*x from t1. */
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+
+	/* Add b. */
+	MADD(t1, P2y),
+
+	/* Compute y^2 in t2. */
+	MMUL(t2, P1y, P1y),
+
+	/* Compare y^2 with x^3 - 3*x + b; they must match. */
+	MSUB(t1, t2),
+	MTZ(t1),
+
+	/* Set z to 1 (in Montgomery representation). */
+	MMUL(P1z, P2x, P2z),
+
+	ENDCODE
+};
+
+/*
+ * Conversion back to affine coordinates. This code snippet assumes that
+ * the z coordinate of P2 is set to 1 (not in Montgomery representation).
+ */
+static const uint16_t code_affine[] = {
+
+	/* Save z*R in t1. */
+	MSET(t1, P1z),
+
+	/* Compute z^3 in t2. */
+	MMUL(t2, P1z, P1z),
+	MMUL(t3, P1z, t2),
+	MMUL(t2, t3, P2z),
+
+	/* Invert to (1/z^3) in t2. */
+	MINV(t2, t3, t4),
+
+	/* Compute y. */
+	MSET(t3, P1y),
+	MMUL(P1y, t2, t3),
+
+	/* Compute (1/z^2) in t3. */
+	MMUL(t3, t2, t1),
+
+	/* Compute x. */
+	MSET(t2, P1x),
+	MMUL(P1x, t2, t3),
+
+	ENDCODE
+};
+
+static uint32_t
+run_code(jacobian *P1, const jacobian *P2,
+	const curve_params *cc, const uint16_t *code)
+{
+	uint32_t r;
+	uint16_t t[13][I15_LEN];
+	size_t u;
+
+	r = 1;
+
+	/*
+	 * Copy the two operands in the dedicated registers.
+	 */
+	memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
+	memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
+
+	/*
+	 * Run formulas.
+	 */
+	for (u = 0;; u ++) {
+		unsigned op, d, a, b;
+
+		op = code[u];
+		if (op == 0) {
+			break;
+		}
+		d = (op >> 8) & 0x0F;
+		a = (op >> 4) & 0x0F;
+		b = op & 0x0F;
+		op >>= 12;
+		switch (op) {
+			uint32_t ctl;
+			size_t plen;
+			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
+
+		case 0:
+			memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
+			break;
+		case 1:
+			ctl = br_i15_add(t[d], t[a], 1);
+			ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
+			br_i15_sub(t[d], cc->p, ctl);
+			break;
+		case 2:
+			br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
+			break;
+		case 3:
+			br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
+			break;
+		case 4:
+			plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+			br_i15_encode(tp, plen, cc->p);
+			tp[plen - 1] -= 2;
+			br_i15_modpow(t[d], tp, plen,
+				cc->p, cc->p0i, t[a], t[b]);
+			break;
+		default:
+			r &= ~br_i15_iszero(t[d]);
+			break;
+		}
+	}
+
+	/*
+	 * Copy back result.
+	 */
+	memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
+	return r;
+}
+
+static void
+set_one(uint16_t *x, const uint16_t *p)
+{
+	size_t plen;
+
+	plen = (p[0] + 31) >> 4;
+	memset(x, 0, plen * sizeof *x);
+	x[0] = p[0];
+	x[1] = 0x0001;
+}
+
+static void
+point_zero(jacobian *P, const curve_params *cc)
+{
+	memset(P, 0, sizeof *P);
+	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
+}
+
+static inline void
+point_double(jacobian *P, const curve_params *cc)
+{
+	run_code(P, P, cc, code_double);
+}
+
+static inline uint32_t
+point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
+{
+	return run_code(P1, P2, cc, code_add);
+}
+
+static void
+point_mul(jacobian *P, const unsigned char *x, size_t xlen,
+	const curve_params *cc)
+{
+	/*
+	 * We do a simple double-and-add ladder with a 2-bit window
+	 * to make only one add every two doublings. We thus first
+	 * precompute 2P and 3P in some local buffers.
+	 *
+	 * We always perform two doublings and one addition; the
+	 * addition is with P, 2P and 3P and is done in a temporary
+	 * array.
+	 *
+	 * The addition code cannot handle cases where one of the
+	 * operands is infinity, which is the case at the start of the
+	 * ladder. We therefore need to maintain a flag that controls
+	 * this situation.
+	 */
+	uint32_t qz;
+	jacobian P2, P3, Q, T, U;
+
+	memcpy(&P2, P, sizeof P2);
+	point_double(&P2, cc);
+	memcpy(&P3, P, sizeof P3);
+	point_add(&P3, &P2, cc);
+
+	point_zero(&Q, cc);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			point_double(&Q, cc);
+			point_double(&Q, cc);
+			memcpy(&T, P, sizeof T);
+			memcpy(&U, &Q, sizeof U);
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			point_add(&U, &T, cc);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	memcpy(P, &Q, sizeof Q);
+}
+
+/*
+ * Decode point into Jacobian coordinates. This function does not support
+ * the point at infinity. If the point is invalid then this returns 0, but
+ * the coordinates are still set to properly formed field elements.
+ */
+static uint32_t
+point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
+{
+	/*
+	 * Points must use uncompressed format:
+	 * -- first byte is 0x04;
+	 * -- coordinates X and Y use unsigned big-endian, with the same
+	 *    length as the field modulus.
+	 *
+	 * We don't support hybrid format (uncompressed, but first byte
+	 * has value 0x06 or 0x07, depending on the least significant bit
+	 * of Y) because it is rather useless, and explicitly forbidden
+	 * by PKIX (RFC 5480, section 2.2).
+	 *
+	 * We don't support compressed format either, because it is not
+	 * much used in practice (there are or were patent-related
+	 * concerns about point compression, which explains the lack of
+	 * generalised support). Also, point compression support would
+	 * need a bit more code.
+	 */
+	const unsigned char *buf;
+	size_t plen, zlen;
+	uint32_t r;
+	jacobian Q;
+
+	buf = src;
+	point_zero(P, cc);
+	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+	if (len != 1 + (plen << 1)) {
+		return 0;
+	}
+	r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
+	r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
+
+	/*
+	 * Check first byte.
+	 */
+	r &= EQ(buf[0], 0x04);
+	/* obsolete
+	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
+		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
+	*/
+
+	/*
+	 * Convert coordinates and check that the point is valid.
+	 */
+	zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
+	memcpy(Q.c[0], cc->R2, zlen);
+	memcpy(Q.c[1], cc->b, zlen);
+	set_one(Q.c[2], cc->p);
+	r &= ~run_code(P, &Q, cc, code_check);
+	return r;
+}
+
+/*
+ * Encode a point. This method assumes that the point is correct and is
+ * not the point at infinity. Encoded size is always 1+2*plen, where
+ * plen is the field modulus length, in bytes.
+ */
+static void
+point_encode(void *dst, const jacobian *P, const curve_params *cc)
+{
+	unsigned char *buf;
+	size_t plen;
+	jacobian Q, T;
+
+	buf = dst;
+	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+	buf[0] = 0x04;
+	memcpy(&Q, P, sizeof *P);
+	set_one(T.c[2], cc->p);
+	run_code(&Q, &T, cc, code_affine);
+	br_i15_encode(buf + 1, plen, Q.c[0]);
+	br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
+}
+
+static const br_ec_curve_def *
+id_to_curve_def(int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return &br_secp256r1;
+	case BR_EC_secp384r1:
+		return &br_secp384r1;
+	case BR_EC_secp521r1:
+		return &br_secp521r1;
+	}
+	return NULL;
+}
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->generator_len;
+	return cd->generator;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->order_len;
+	return cd->order;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	api_generator(curve, len);
+	*len >>= 1;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	const curve_params *cc;
+	jacobian P;
+
+	cc = id_to_curve(curve);
+	if (Glen != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, G, Glen, cc);
+	point_mul(&P, x, xlen, cc);
+	point_encode(G, &P, cc);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	uint32_t r, t, z;
+	const curve_params *cc;
+	jacobian P, Q;
+
+	/*
+	 * TODO: see about merging the two ladders. Right now, we do
+	 * two independent point multiplications, which is a bit
+	 * wasteful of CPU resources (but yields short code).
+	 */
+
+	cc = id_to_curve(curve);
+	if (len != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, A, len, cc);
+	if (B == NULL) {
+		size_t Glen;
+
+		B = api_generator(curve, &Glen);
+	}
+	r &= point_decode(&Q, B, len, cc);
+	point_mul(&P, x, xlen, cc);
+	point_mul(&Q, y, ylen, cc);
+
+	/*
+	 * We want to compute P+Q. Since the base points A and B are distinct
+	 * from infinity, and the multipliers are non-zero and lower than the
+	 * curve order, then we know that P and Q are non-infinity. This
+	 * leaves two special situations to test for:
+	 * -- If P = Q then we must use point_double().
+	 * -- If P+Q = 0 then we must report an error.
+	 */
+	t = point_add(&P, &Q, cc);
+	point_double(&Q, cc);
+	z = br_i15_iszero(P.c[2]);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P, cc);
+	r &= ~(z & t);
+
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_prime_i15 = {
+	(uint32_t)0x03800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
--- a/third_party/bearssl/src/ec_prime_i31.c
+++ b/third_party/bearssl/src/ec_prime_i31.c
@ -0,0 +1,826 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for supported curves (field modulus, and 'b' equation
+ * parameter; both values use the 'i31' format, and 'b' is in Montgomery
+ * representation).
+ */
+
+static const uint32_t P256_P[] = {
+	0x00000108,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
+	0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
+	0x000000FF
+};
+
+static const uint32_t P256_R2[] = {
+	0x00000108,
+	0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
+	0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
+	0x00000000
+};
+
+static const uint32_t P256_B[] = {
+	0x00000108,
+	0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
+	0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
+	0x0000000E
+};
+
+static const uint32_t P384_P[] = {
+	0x0000018C,
+	0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
+	0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x00000FFF
+};
+
+static const uint32_t P384_R2[] = {
+	0x0000018C,
+	0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
+	0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
+	0x00008000, 0x00008000, 0x00000000, 0x00000000,
+	0x00000000
+};
+
+static const uint32_t P384_B[] = {
+	0x0000018C,
+	0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
+	0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
+	0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
+	0x000008A5
+};
+
+static const uint32_t P521_P[] = {
+	0x00000219,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x01FFFFFF
+};
+
+static const uint32_t P521_R2[] = {
+	0x00000219,
+	0x00001000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000
+};
+
+static const uint32_t P521_B[] = {
+	0x00000219,
+	0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
+	0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
+	0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
+	0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
+	0x00654FAE
+};
+
+typedef struct {
+	const uint32_t *p;
+	const uint32_t *b;
+	const uint32_t *R2;
+	uint32_t p0i;
+	size_t point_len;
+} curve_params;
+
+static inline const curve_params *
+id_to_curve(int curve)
+{
+	static const curve_params pp[] = {
+		{ P256_P, P256_B, P256_R2, 0x00000001,  65 },
+		{ P384_P, P384_B, P384_R2, 0x00000001,  97 },
+		{ P521_P, P521_B, P521_R2, 0x00000001, 133 }
+	};
+
+	return &pp[curve - BR_EC_secp256r1];
+}
+
+#define I31_LEN   ((BR_MAX_EC_SIZE + 61) / 31)
+
+/*
+ * Type for a point in Jacobian coordinates:
+ * -- three values, x, y and z, in Montgomery representation
+ * -- affine coordinates are X = x / z^2 and Y = y / z^3
+ * -- for the point at infinity, z = 0
+ */
+typedef struct {
+	uint32_t c[3][I31_LEN];
+} jacobian;
+
+/*
+ * We use a custom interpreter that uses a dozen registers, and
+ * only six operations:
+ *    MSET(d, a)       copy a into d
+ *    MADD(d, a)       d = d+a (modular)
+ *    MSUB(d, a)       d = d-a (modular)
+ *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
+ *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
+ *    MTZ(d)           clear return value if d = 0
+ * Destination of MMUL (d) must be distinct from operands (a and b).
+ * There is no such constraint for MSUB and MADD.
+ *
+ * Registers include the operand coordinates, and temporaries.
+ */
+#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
+#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
+#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
+#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
+#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
+#define MTZ(d)          (0x5000 + ((d) << 8))
+#define ENDCODE         0
+
+/*
+ * Registers for the input operands.
+ */
+#define P1x    0
+#define P1y    1
+#define P1z    2
+#define P2x    3
+#define P2y    4
+#define P2z    5
+
+/*
+ * Alternate names for the first input operand.
+ */
+#define Px     0
+#define Py     1
+#define Pz     2
+
+/*
+ * Temporaries.
+ */
+#define t1     6
+#define t2     7
+#define t3     8
+#define t4     9
+#define t5    10
+#define t6    11
+#define t7    12
+
+/*
+ * Extra scratch registers available when there is no second operand (e.g.
+ * for "double" and "affine").
+ */
+#define t8     3
+#define t9     4
+#define t10    5
+
+/*
+ * Doubling formulas are:
+ *
+ *   s = 4*x*y^2
+ *   m = 3*(x + z^2)*(x - z^2)
+ *   x' = m^2 - 2*s
+ *   y' = m*(s - x') - 8*y^4
+ *   z' = 2*y*z
+ *
+ * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
+ * should. This case should not happen anyway, because our curves have
+ * prime order, and thus do not contain any point of order 2.
+ *
+ * If P is infinity (z = 0), then again the formulas yield infinity,
+ * which is correct. Thus, this code works for all points.
+ *
+ * Cost: 8 multiplications
+ */
+static const uint16_t code_double[] = {
+	/*
+	 * Compute z^2 (in t1).
+	 */
+	MMUL(t1, Pz, Pz),
+
+	/*
+	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
+	 */
+	MSET(t2, Px),
+	MSUB(t2, t1),
+	MADD(t1, Px),
+
+	/*
+	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
+	 */
+	MMUL(t3, t1, t2),
+	MSET(t1, t3),
+	MADD(t1, t3),
+	MADD(t1, t3),
+
+	/*
+	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	MMUL(t3, Py, Py),
+	MADD(t3, t3),
+	MMUL(t2, Px, t3),
+	MADD(t2, t2),
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	MMUL(Px, t1, t1),
+	MSUB(Px, t2),
+	MSUB(Px, t2),
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	MMUL(t4, Py, Pz),
+	MSET(Pz, t4),
+	MADD(Pz, t4),
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	MSUB(t2, Px),
+	MMUL(Py, t1, t2),
+	MMUL(t4, t3, t3),
+	MSUB(Py, t4),
+	MSUB(Py, t4),
+
+	ENDCODE
+};
+
+/*
+ * Addtions formulas are:
+ *
+ *   u1 = x1 * z2^2
+ *   u2 = x2 * z1^2
+ *   s1 = y1 * z2^3
+ *   s2 = y2 * z1^3
+ *   h = u2 - u1
+ *   r = s2 - s1
+ *   x3 = r^2 - h^3 - 2 * u1 * h^2
+ *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+ *   z3 = h * z1 * z2
+ *
+ * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
+ * z3 == 0, so the result is correct.
+ * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
+ * not correct.
+ * h == 0 only if u1 == u2; this happens in two cases:
+ * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
+ * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
+ *
+ * Thus, the following situations are not handled correctly:
+ * -- P1 = 0 and P2 != 0
+ * -- P1 != 0 and P2 = 0
+ * -- P1 = P2
+ * All other cases are properly computed. However, even in "incorrect"
+ * situations, the three coordinates still are properly formed field
+ * elements.
+ *
+ * The returned flag is cleared if r == 0. This happens in the following
+ * cases:
+ * -- Both points are on the same horizontal line (same Y coordinate).
+ * -- Both points are infinity.
+ * -- One point is infinity and the other is on line Y = 0.
+ * The third case cannot happen with our curves (there is no valid point
+ * on line Y = 0 since that would be a point of order 2). If the two
+ * source points are non-infinity, then remains only the case where the
+ * two points are on the same horizontal line.
+ *
+ * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
+ * P2 != 0:
+ * -- If the returned value is not the point at infinity, then it was properly
+ * computed.
+ * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
+ * is indeed the point at infinity.
+ * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
+ * use the 'double' code.
+ *
+ * Cost: 16 multiplications
+ */
+static const uint16_t code_add[] = {
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	MMUL(t3, P2z, P2z),
+	MMUL(t1, P1x, t3),
+	MMUL(t4, P2z, t3),
+	MMUL(t3, P1y, t4),
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	MMUL(t4, P1z, P1z),
+	MMUL(t2, P2x, t4),
+	MMUL(t5, P1z, t4),
+	MMUL(t4, P2y, t5),
+
+	/*
+	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 */
+	MSUB(t2, t1),
+	MSUB(t4, t3),
+
+	/*
+	 * Report cases where r = 0 through the returned flag.
+	 */
+	MTZ(t4),
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5).
+	 */
+	MMUL(t7, t2, t2),
+	MMUL(t6, t1, t7),
+	MMUL(t5, t7, t2),
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 * t1 and t7 can be used as scratch registers.
+	 */
+	MMUL(P1x, t4, t4),
+	MSUB(P1x, t5),
+	MSUB(P1x, t6),
+	MSUB(P1x, t6),
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	MSUB(t6, P1x),
+	MMUL(P1y, t4, t6),
+	MMUL(t1, t5, t3),
+	MSUB(P1y, t1),
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	MMUL(t1, P1z, P2z),
+	MMUL(P1z, t1, t2),
+
+	ENDCODE
+};
+
+/*
+ * Check that the point is on the curve. This code snippet assumes the
+ * following conventions:
+ * -- Coordinates x and y have been freshly decoded in P1 (but not
+ * converted to Montgomery coordinates yet).
+ * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
+ */
+static const uint16_t code_check[] = {
+
+	/* Convert x and y to Montgomery representation. */
+	MMUL(t1, P1x, P2x),
+	MMUL(t2, P1y, P2x),
+	MSET(P1x, t1),
+	MSET(P1y, t2),
+
+	/* Compute x^3 in t1. */
+	MMUL(t2, P1x, P1x),
+	MMUL(t1, P1x, t2),
+
+	/* Subtract 3*x from t1. */
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+
+	/* Add b. */
+	MADD(t1, P2y),
+
+	/* Compute y^2 in t2. */
+	MMUL(t2, P1y, P1y),
+
+	/* Compare y^2 with x^3 - 3*x + b; they must match. */
+	MSUB(t1, t2),
+	MTZ(t1),
+
+	/* Set z to 1 (in Montgomery representation). */
+	MMUL(P1z, P2x, P2z),
+
+	ENDCODE
+};
+
+/*
+ * Conversion back to affine coordinates. This code snippet assumes that
+ * the z coordinate of P2 is set to 1 (not in Montgomery representation).
+ */
+static const uint16_t code_affine[] = {
+
+	/* Save z*R in t1. */
+	MSET(t1, P1z),
+
+	/* Compute z^3 in t2. */
+	MMUL(t2, P1z, P1z),
+	MMUL(t3, P1z, t2),
+	MMUL(t2, t3, P2z),
+
+	/* Invert to (1/z^3) in t2. */
+	MINV(t2, t3, t4),
+
+	/* Compute y. */
+	MSET(t3, P1y),
+	MMUL(P1y, t2, t3),
+
+	/* Compute (1/z^2) in t3. */
+	MMUL(t3, t2, t1),
+
+	/* Compute x. */
+	MSET(t2, P1x),
+	MMUL(P1x, t2, t3),
+
+	ENDCODE
+};
+
+static uint32_t
+run_code(jacobian *P1, const jacobian *P2,
+	const curve_params *cc, const uint16_t *code)
+{
+	uint32_t r;
+	uint32_t t[13][I31_LEN];
+	size_t u;
+
+	r = 1;
+
+	/*
+	 * Copy the two operands in the dedicated registers.
+	 */
+	memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t));
+	memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t));
+
+	/*
+	 * Run formulas.
+	 */
+	for (u = 0;; u ++) {
+		unsigned op, d, a, b;
+
+		op = code[u];
+		if (op == 0) {
+			break;
+		}
+		d = (op >> 8) & 0x0F;
+		a = (op >> 4) & 0x0F;
+		b = op & 0x0F;
+		op >>= 12;
+		switch (op) {
+			uint32_t ctl;
+			size_t plen;
+			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
+
+		case 0:
+			memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t));
+			break;
+		case 1:
+			ctl = br_i31_add(t[d], t[a], 1);
+			ctl |= NOT(br_i31_sub(t[d], cc->p, 0));
+			br_i31_sub(t[d], cc->p, ctl);
+			break;
+		case 2:
+			br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1));
+			break;
+		case 3:
+			br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
+			break;
+		case 4:
+			plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
+			br_i31_encode(tp, plen, cc->p);
+			tp[plen - 1] -= 2;
+			br_i31_modpow(t[d], tp, plen,
+				cc->p, cc->p0i, t[a], t[b]);
+			break;
+		default:
+			r &= ~br_i31_iszero(t[d]);
+			break;
+		}
+	}
+
+	/*
+	 * Copy back result.
+	 */
+	memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t));
+	return r;
+}
+
+static void
+set_one(uint32_t *x, const uint32_t *p)
+{
+	size_t plen;
+
+	plen = (p[0] + 63) >> 5;
+	memset(x, 0, plen * sizeof *x);
+	x[0] = p[0];
+	x[1] = 0x00000001;
+}
+
+static void
+point_zero(jacobian *P, const curve_params *cc)
+{
+	memset(P, 0, sizeof *P);
+	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
+}
+
+static inline void
+point_double(jacobian *P, const curve_params *cc)
+{
+	run_code(P, P, cc, code_double);
+}
+
+static inline uint32_t
+point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
+{
+	return run_code(P1, P2, cc, code_add);
+}
+
+static void
+point_mul(jacobian *P, const unsigned char *x, size_t xlen,
+	const curve_params *cc)
+{
+	/*
+	 * We do a simple double-and-add ladder with a 2-bit window
+	 * to make only one add every two doublings. We thus first
+	 * precompute 2P and 3P in some local buffers.
+	 *
+	 * We always perform two doublings and one addition; the
+	 * addition is with P, 2P and 3P and is done in a temporary
+	 * array.
+	 *
+	 * The addition code cannot handle cases where one of the
+	 * operands is infinity, which is the case at the start of the
+	 * ladder. We therefore need to maintain a flag that controls
+	 * this situation.
+	 */
+	uint32_t qz;
+	jacobian P2, P3, Q, T, U;
+
+	memcpy(&P2, P, sizeof P2);
+	point_double(&P2, cc);
+	memcpy(&P3, P, sizeof P3);
+	point_add(&P3, &P2, cc);
+
+	point_zero(&Q, cc);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			point_double(&Q, cc);
+			point_double(&Q, cc);
+			memcpy(&T, P, sizeof T);
+			memcpy(&U, &Q, sizeof U);
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			point_add(&U, &T, cc);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	memcpy(P, &Q, sizeof Q);
+}
+
+/*
+ * Decode point into Jacobian coordinates. This function does not support
+ * the point at infinity. If the point is invalid then this returns 0, but
+ * the coordinates are still set to properly formed field elements.
+ */
+static uint32_t
+point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
+{
+	/*
+	 * Points must use uncompressed format:
+	 * -- first byte is 0x04;
+	 * -- coordinates X and Y use unsigned big-endian, with the same
+	 *    length as the field modulus.
+	 *
+	 * We don't support hybrid format (uncompressed, but first byte
+	 * has value 0x06 or 0x07, depending on the least significant bit
+	 * of Y) because it is rather useless, and explicitly forbidden
+	 * by PKIX (RFC 5480, section 2.2).
+	 *
+	 * We don't support compressed format either, because it is not
+	 * much used in practice (there are or were patent-related
+	 * concerns about point compression, which explains the lack of
+	 * generalised support). Also, point compression support would
+	 * need a bit more code.
+	 */
+	const unsigned char *buf;
+	size_t plen, zlen;
+	uint32_t r;
+	jacobian Q;
+
+	buf = src;
+	point_zero(P, cc);
+	plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
+	if (len != 1 + (plen << 1)) {
+		return 0;
+	}
+	r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p);
+	r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
+
+	/*
+	 * Check first byte.
+	 */
+	r &= EQ(buf[0], 0x04);
+	/* obsolete
+	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
+		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
+	*/
+
+	/*
+	 * Convert coordinates and check that the point is valid.
+	 */
+	zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t);
+	memcpy(Q.c[0], cc->R2, zlen);
+	memcpy(Q.c[1], cc->b, zlen);
+	set_one(Q.c[2], cc->p);
+	r &= ~run_code(P, &Q, cc, code_check);
+	return r;
+}
+
+/*
+ * Encode a point. This method assumes that the point is correct and is
+ * not the point at infinity. Encoded size is always 1+2*plen, where
+ * plen is the field modulus length, in bytes.
+ */
+static void
+point_encode(void *dst, const jacobian *P, const curve_params *cc)
+{
+	unsigned char *buf;
+	uint32_t xbl;
+	size_t plen;
+	jacobian Q, T;
+
+	buf = dst;
+	xbl = cc->p[0];
+	xbl -= (xbl >> 5);
+	plen = (xbl + 7) >> 3;
+	buf[0] = 0x04;
+	memcpy(&Q, P, sizeof *P);
+	set_one(T.c[2], cc->p);
+	run_code(&Q, &T, cc, code_affine);
+	br_i31_encode(buf + 1, plen, Q.c[0]);
+	br_i31_encode(buf + 1 + plen, plen, Q.c[1]);
+}
+
+static const br_ec_curve_def *
+id_to_curve_def(int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return &br_secp256r1;
+	case BR_EC_secp384r1:
+		return &br_secp384r1;
+	case BR_EC_secp521r1:
+		return &br_secp521r1;
+	}
+	return NULL;
+}
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->generator_len;
+	return cd->generator;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->order_len;
+	return cd->order;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	api_generator(curve, len);
+	*len >>= 1;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	const curve_params *cc;
+	jacobian P;
+
+	cc = id_to_curve(curve);
+	if (Glen != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, G, Glen, cc);
+	point_mul(&P, x, xlen, cc);
+	point_encode(G, &P, cc);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	uint32_t r, t, z;
+	const curve_params *cc;
+	jacobian P, Q;
+
+	/*
+	 * TODO: see about merging the two ladders. Right now, we do
+	 * two independent point multiplications, which is a bit
+	 * wasteful of CPU resources (but yields short code).
+	 */
+
+	cc = id_to_curve(curve);
+	if (len != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, A, len, cc);
+	if (B == NULL) {
+		size_t Glen;
+
+		B = api_generator(curve, &Glen);
+	}
+	r &= point_decode(&Q, B, len, cc);
+	point_mul(&P, x, xlen, cc);
+	point_mul(&Q, y, ylen, cc);
+
+	/*
+	 * We want to compute P+Q. Since the base points A and B are distinct
+	 * from infinity, and the multipliers are non-zero and lower than the
+	 * curve order, then we know that P and Q are non-infinity. This
+	 * leaves two special situations to test for:
+	 * -- If P = Q then we must use point_double().
+	 * -- If P+Q = 0 then we must report an error.
+	 */
+	t = point_add(&P, &Q, cc);
+	point_double(&Q, cc);
+	z = br_i31_iszero(P.c[2]);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P, cc);
+	r &= ~(z & t);
+
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_prime_i31 = {
+	(uint32_t)0x03800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
--- a/third_party/bearssl/src/ec_pubkey.c
+++ b/third_party/bearssl/src/ec_pubkey.c
@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char POINT_LEN[] = {
+	  0,   /* 0: not a valid curve ID */
+	 43,   /* sect163k1 */
+	 43,   /* sect163r1 */
+	 43,   /* sect163r2 */
+	 51,   /* sect193r1 */
+	 51,   /* sect193r2 */
+	 61,   /* sect233k1 */
+	 61,   /* sect233r1 */
+	 61,   /* sect239k1 */
+	 73,   /* sect283k1 */
+	 73,   /* sect283r1 */
+	105,   /* sect409k1 */
+	105,   /* sect409r1 */
+	145,   /* sect571k1 */
+	145,   /* sect571r1 */
+	 41,   /* secp160k1 */
+	 41,   /* secp160r1 */
+	 41,   /* secp160r2 */
+	 49,   /* secp192k1 */
+	 49,   /* secp192r1 */
+	 57,   /* secp224k1 */
+	 57,   /* secp224r1 */
+	 65,   /* secp256k1 */
+	 65,   /* secp256r1 */
+	 97,   /* secp384r1 */
+	133,   /* secp521r1 */
+	 65,   /* brainpoolP256r1 */
+	 97,   /* brainpoolP384r1 */
+	129,   /* brainpoolP512r1 */
+	 32,   /* curve25519 */
+	 56,   /* curve448 */
+};
+
+/* see bearssl_ec.h */
+size_t
+br_ec_compute_pub(const br_ec_impl *impl, br_ec_public_key *pk,
+	void *kbuf, const br_ec_private_key *sk)
+{
+	int curve;
+	size_t len;
+
+	curve = sk->curve;
+	if (curve < 0 || curve >= 32 || curve >= (int)(sizeof POINT_LEN)
+		|| ((impl->supported_curves >> curve) & 1) == 0)
+	{
+		return 0;
+	}
+	if (kbuf == NULL) {
+		return POINT_LEN[curve];
+	}
+	len = impl->mulgen(kbuf, sk->x, sk->xlen, curve);
+	if (pk != NULL) {
+		pk->curve = curve;
+		pk->q = kbuf;
+		pk->qlen = len;
+	}
+	return len;
+}
--- a/third_party/bearssl/src/ec_secp256r1.c
+++ b/third_party/bearssl/src/ec_secp256r1.c
@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84,
+	0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51
+};
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42,
+	0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40,
+	0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33,
+	0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2,
+	0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E,
+	0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E,
+	0xCE, 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51,
+	0xF5
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp256r1 = {
+	BR_EC_secp256r1,
+	P256_N, sizeof P256_N,
+	P256_G, sizeof P256_G
+};
--- a/third_party/bearssl/src/ec_secp384r1.c
+++ b/third_party/bearssl/src/ec_secp384r1.c
@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P384_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF, 
+	0x58, 0x1A, 0x0D, 0xB2, 0x48, 0xB0, 0xA7, 0x7A, 
+	0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73
+};
+
+static const unsigned char P384_G[] = {
+	0x04, 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05,
+	0x37, 0x8E, 0xB1, 0xC7, 0x1E, 0xF3, 0x20, 0xAD,
+	0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B,
+	0x98, 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A,
+	0x38, 0x55, 0x02, 0xF2, 0x5D, 0xBF, 0x55, 0x29,
+	0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A,
+	0xB7, 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C,
+	0x6F, 0x5D, 0x9E, 0x98, 0xBF, 0x92, 0x92, 0xDC,
+	0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14,
+	0x7C, 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8,
+	0xC0, 0x0A, 0x60, 0xB1, 0xCE, 0x1D, 0x7E, 0x81,
+	0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E,
+	0x5F
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp384r1 = {
+	BR_EC_secp384r1,
+	P384_N, sizeof P384_N,
+	P384_G, sizeof P384_G
+};
--- a/third_party/bearssl/src/ec_secp521r1.c
+++ b/third_party/bearssl/src/ec_secp521r1.c
@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P521_N[] = {
+	0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F,
+	0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
+	0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C,
+	0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38,
+	0x64, 0x09
+};
+
+static const unsigned char P521_G[] = {
+	0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04,
+	0x04, 0xE9, 0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23,
+	0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05,
+	0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B,
+	0x4D, 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF,
+	0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2,
+	0xFF, 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85,
+	0x6A, 0x42, 0x9B, 0xF9, 0x7E, 0x7E, 0x31, 0xC2,
+	0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39, 0x29, 0x6A,
+	0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F,
+	0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44,
+	0x49, 0x57, 0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD,
+	0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, 0x72,
+	0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9,
+	0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70,
+	0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94,
+	0x76, 0x9F, 0xD1, 0x66, 0x50
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp521r1 = {
+	BR_EC_secp521r1,
+	P521_N, sizeof P521_N,
+	P521_G, sizeof P521_G
+};
--- a/third_party/bearssl/src/ecdsa_atr.c
+++ b/third_party/bearssl/src/ecdsa_atr.c
@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_asn1_to_raw(void *sig, size_t sig_len)
+{
+	/*
+	 * Note: this code is a bit lenient in that it accepts a few
+	 * deviations to DER with regards to minimality of encoding of
+	 * lengths and integer values. These deviations are still
+	 * unambiguous.
+	 *
+	 * Signature format is a SEQUENCE of two INTEGER values. We
+	 * support only integers of less than 127 bytes each (signed
+	 * encoding) so the resulting raw signature will have length
+	 * at most 254 bytes.
+	 */
+
+	unsigned char *buf, *r, *s;
+	size_t zlen, rlen, slen, off;
+	unsigned char tmp[254];
+
+	buf = sig;
+	if (sig_len < 8) {
+		return 0;
+	}
+
+	/*
+	 * First byte is SEQUENCE tag.
+	 */
+	if (buf[0] != 0x30) {
+		return 0;
+	}
+
+	/*
+	 * The SEQUENCE length will be encoded over one or two bytes. We
+	 * limit the total SEQUENCE contents to 255 bytes, because it
+	 * makes things simpler; this is enough for subgroup orders up
+	 * to 999 bits.
+	 */
+	zlen = buf[1];
+	if (zlen > 0x80) {
+		if (zlen != 0x81) {
+			return 0;
+		}
+		zlen = buf[2];
+		if (zlen != sig_len - 3) {
+			return 0;
+		}
+		off = 3;
+	} else {
+		if (zlen != sig_len - 2) {
+			return 0;
+		}
+		off = 2;
+	}
+
+	/*
+	 * First INTEGER (r).
+	 */
+	if (buf[off ++] != 0x02) {
+		return 0;
+	}
+	rlen = buf[off ++];
+	if (rlen >= 0x80) {
+		return 0;
+	}
+	r = buf + off;
+	off += rlen;
+
+	/*
+	 * Second INTEGER (s).
+	 */
+	if (off + 2 > sig_len) {
+		return 0;
+	}
+	if (buf[off ++] != 0x02) {
+		return 0;
+	}
+	slen = buf[off ++];
+	if (slen >= 0x80 || slen != sig_len - off) {
+		return 0;
+	}
+	s = buf + off;
+
+	/*
+	 * Removing leading zeros from r and s.
+	 */
+	while (rlen > 0 && *r == 0) {
+		rlen --;
+		r ++;
+	}
+	while (slen > 0 && *s == 0) {
+		slen --;
+		s ++;
+	}
+
+	/*
+	 * Compute common length for the two integers, then copy integers
+	 * into the temporary buffer, and finally copy it back over the
+	 * signature buffer.
+	 */
+	zlen = rlen > slen ? rlen : slen;
+	sig_len = zlen << 1;
+	memset(tmp, 0, sig_len);
+	memcpy(tmp + zlen - rlen, r, rlen);
+	memcpy(tmp + sig_len - slen, s, slen);
+	memcpy(sig, tmp, sig_len);
+	return sig_len;
+}
--- a/third_party/bearssl/src/ecdsa_default_sign_asn1.c
+++ b/third_party/bearssl/src/ecdsa_default_sign_asn1.c
@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_sign
+br_ecdsa_sign_asn1_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_sign_asn1;
+#else
+	return &br_ecdsa_i31_sign_asn1;
+#endif
+}
--- a/Show More
+++ b/Show More