mirror of https://github.com/ClassiCube/ClassiCube
WIP on adding BearSSL SSL backend
This commit is contained in:
parent
08e93f7562
commit
797f75140f
27
license.txt
27
license.txt
|
|
@ -150,6 +150,33 @@ SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
|
|||
==============================================================================
|
||||
|
||||
|
||||
BearSSL license
|
||||
==============================================================================
|
||||
Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
==============================================================================
|
||||
|
||||
|
||||
|
||||
FreeType license
|
||||
==================
|
||||
The FreeType Project LICENSE
|
||||
|
|
|
|||
168
src/SSL.c
168
src/SSL.c
|
|
@ -417,6 +417,174 @@ cc_result SSL_Free(void* ctx_) {
|
|||
Mem_Free(ctx);
|
||||
return 0;
|
||||
}
|
||||
#elif defined CC_BUILD_BEARSSL
|
||||
#include "bearssl.h"
|
||||
#define CERT_ATTRIBUTES
|
||||
#include "../misc/RootCerts.h"
|
||||
#include "String.h"
|
||||
// https://github.com/unkaktus/bearssl/blob/master/samples/client_basic.c#L283
|
||||
|
||||
typedef struct SSLContext {
|
||||
br_ssl_client_context sc;
|
||||
br_x509_minimal_context xc;
|
||||
unsigned char iobuf[BR_SSL_BUFSIZE_BIDI];
|
||||
br_sslio_context ioc;
|
||||
} SSLContext;
|
||||
|
||||
static cc_bool _verifyCerts;
|
||||
|
||||
|
||||
void SSLBackend_Init(cc_bool verifyCerts) {
|
||||
_verifyCerts = verifyCerts; // TODO support
|
||||
}
|
||||
cc_bool SSLBackend_DescribeError(cc_result res, cc_string* dst) { return false; }
|
||||
|
||||
static int sock_read(void *ctx, unsigned char *buf, size_t len) {
|
||||
cc_uint32 read;
|
||||
cc_result res = Socket_Read((int)ctx, buf, len, &read);
|
||||
|
||||
if (res) return -1;
|
||||
return read;
|
||||
}
|
||||
static int sock_write(void *ctx, const unsigned char *buf, size_t len) {
|
||||
cc_uint32 wrote;
|
||||
cc_result res = Socket_Write((int)ctx, buf, len, &wrote);
|
||||
|
||||
if (res) return -1;
|
||||
return wrote;
|
||||
}
|
||||
/*
|
||||
* The hardcoded trust anchors. These are the two DN + public key that
|
||||
* correspond to the self-signed certificates cert-root-rsa.pem and
|
||||
* cert-root-ec.pem.
|
||||
*
|
||||
* C code for hardcoded trust anchors can be generated with the "brssl"
|
||||
* command-line tool (with the "ta" command).
|
||||
*/
|
||||
static const unsigned char TA0_DN[] = {
|
||||
0x30, 0x1C, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13,
|
||||
0x02, 0x43, 0x41, 0x31, 0x0D, 0x30, 0x0B, 0x06, 0x03, 0x55, 0x04, 0x03,
|
||||
0x13, 0x04, 0x52, 0x6F, 0x6F, 0x74
|
||||
};
|
||||
static const unsigned char TA0_RSA_N[] = {
|
||||
0xB6, 0xD9, 0x34, 0xD4, 0x50, 0xFD, 0xB3, 0xAF, 0x7A, 0x73, 0xF1, 0xCE,
|
||||
0x38, 0xBF, 0x5D, 0x6F, 0x45, 0xE1, 0xFD, 0x4E, 0xB1, 0x98, 0xC6, 0x60,
|
||||
0x83, 0x26, 0xD2, 0x17, 0xD1, 0xC5, 0xB7, 0x9A, 0xA3, 0xC1, 0xDE, 0x63,
|
||||
0x39, 0x97, 0x9C, 0xF0, 0x5E, 0x5C, 0xC8, 0x1C, 0x17, 0xB9, 0x88, 0x19,
|
||||
0x6D, 0xF0, 0xB6, 0x2E, 0x30, 0x50, 0xA1, 0x54, 0x6E, 0x93, 0xC0, 0xDB,
|
||||
0xCF, 0x30, 0xCB, 0x9F, 0x1E, 0x27, 0x79, 0xF1, 0xC3, 0x99, 0x52, 0x35,
|
||||
0xAA, 0x3D, 0xB6, 0xDF, 0xB0, 0xAD, 0x7C, 0xCB, 0x49, 0xCD, 0xC0, 0xED,
|
||||
0xE7, 0x66, 0x10, 0x2A, 0xE9, 0xCE, 0x28, 0x1F, 0x21, 0x50, 0xFA, 0x77,
|
||||
0x4C, 0x2D, 0xDA, 0xEF, 0x3C, 0x58, 0xEB, 0x4E, 0xBF, 0xCE, 0xE9, 0xFB,
|
||||
0x1A, 0xDA, 0xA3, 0x83, 0xA3, 0xCD, 0xA3, 0xCA, 0x93, 0x80, 0xDC, 0xDA,
|
||||
0xF3, 0x17, 0xCC, 0x7A, 0xAB, 0x33, 0x80, 0x9C, 0xB2, 0xD4, 0x7F, 0x46,
|
||||
0x3F, 0xC5, 0x3C, 0xDC, 0x61, 0x94, 0xB7, 0x27, 0x29, 0x6E, 0x2A, 0xBC,
|
||||
0x5B, 0x09, 0x36, 0xD4, 0xC6, 0x3B, 0x0D, 0xEB, 0xBE, 0xCE, 0xDB, 0x1D,
|
||||
0x1C, 0xBC, 0x10, 0x6A, 0x71, 0x71, 0xB3, 0xF2, 0xCA, 0x28, 0x9A, 0x77,
|
||||
0xF2, 0x8A, 0xEC, 0x42, 0xEF, 0xB1, 0x4A, 0x8E, 0xE2, 0xF2, 0x1A, 0x32,
|
||||
0x2A, 0xCD, 0xC0, 0xA6, 0x46, 0x2C, 0x9A, 0xC2, 0x85, 0x37, 0x91, 0x7F,
|
||||
0x46, 0xA1, 0x93, 0x81, 0xA1, 0x74, 0x66, 0xDF, 0xBA, 0xB3, 0x39, 0x20,
|
||||
0x91, 0x93, 0xFA, 0x1D, 0xA1, 0xA8, 0x85, 0xE7, 0xE4, 0xF9, 0x07, 0xF6,
|
||||
0x10, 0xF6, 0xA8, 0x27, 0x01, 0xB6, 0x7F, 0x12, 0xC3, 0x40, 0xC3, 0xC9,
|
||||
0xE2, 0xB0, 0xAB, 0x49, 0x18, 0x3A, 0x64, 0xB6, 0x59, 0xB7, 0x95, 0xB5,
|
||||
0x96, 0x36, 0xDF, 0x22, 0x69, 0xAA, 0x72, 0x6A, 0x54, 0x4E, 0x27, 0x29,
|
||||
0xA3, 0x0E, 0x97, 0x15
|
||||
};
|
||||
static const unsigned char TA0_RSA_E[] = {
|
||||
0x01, 0x00, 0x01
|
||||
};
|
||||
static const unsigned char TA1_DN[] = {
|
||||
0x30, 0x1C, 0x31, 0x0B, 0x30, 0x09, 0x06, 0x03, 0x55, 0x04, 0x06, 0x13,
|
||||
0x02, 0x43, 0x41, 0x31, 0x0D, 0x30, 0x0B, 0x06, 0x03, 0x55, 0x04, 0x03,
|
||||
0x13, 0x04, 0x52, 0x6F, 0x6F, 0x74
|
||||
};
|
||||
static const unsigned char TA1_EC_Q[] = {
|
||||
0x04, 0x71, 0x74, 0xBA, 0xAB, 0xB9, 0x30, 0x2E, 0x81, 0xD5, 0xE5, 0x57,
|
||||
0xF9, 0xF3, 0x20, 0x68, 0x0C, 0x9C, 0xF9, 0x64, 0xDB, 0xB4, 0x20, 0x0D,
|
||||
0x6D, 0xEA, 0x40, 0xD0, 0x4A, 0x6E, 0x42, 0xFD, 0xB6, 0x9A, 0x68, 0x25,
|
||||
0x44, 0xF6, 0xDF, 0x7B, 0xC4, 0xFC, 0xDE, 0xDD, 0x7B, 0xBB, 0xC5, 0xDB,
|
||||
0x7C, 0x76, 0x3F, 0x41, 0x66, 0x40, 0x6E, 0xDB, 0xA7, 0x87, 0xC2, 0xE5,
|
||||
0xD8, 0xC5, 0xF3, 0x7F, 0x8D
|
||||
};
|
||||
static const br_x509_trust_anchor TAs[2] = {
|
||||
{
|
||||
{ (unsigned char *)TA0_DN, sizeof TA0_DN },
|
||||
BR_X509_TA_CA,
|
||||
{
|
||||
BR_KEYTYPE_RSA,
|
||||
{ .rsa = {
|
||||
(unsigned char *)TA0_RSA_N, sizeof TA0_RSA_N,
|
||||
(unsigned char *)TA0_RSA_E, sizeof TA0_RSA_E,
|
||||
} }
|
||||
}
|
||||
},
|
||||
{
|
||||
{ (unsigned char *)TA1_DN, sizeof TA1_DN },
|
||||
BR_X509_TA_CA,
|
||||
{
|
||||
BR_KEYTYPE_EC,
|
||||
{ .ec = {
|
||||
BR_EC_secp256r1,
|
||||
(unsigned char *)TA1_EC_Q, sizeof TA1_EC_Q,
|
||||
} }
|
||||
}
|
||||
}
|
||||
};
|
||||
#define TAs_NUM 2
|
||||
|
||||
cc_result SSL_Init(cc_socket socket, const cc_string* host_, void** out_ctx) {
|
||||
SSLContext* ctx;
|
||||
char host[NATIVE_STR_LEN];
|
||||
String_EncodeUtf8(host, host_);
|
||||
|
||||
ctx = Mem_TryAlloc(1, sizeof(SSLContext));
|
||||
if (!ctx) return ERR_OUT_OF_MEMORY;
|
||||
*out_ctx = (void*)ctx;
|
||||
|
||||
br_ssl_client_init_full(&ctx->sc, &ctx->xc, TAs, TAs_NUM);
|
||||
if (!_verify_certs) {
|
||||
br_x509_minimal_set_rsa(&ctx->xc, &br_rsa_i31_pkcs1_vrfy);
|
||||
br_x509_minimal_set_ecdsa(&ctx->xc, &br_ec_prime_i31, &br_ecdsa_i31_vrfy_asn1);
|
||||
}
|
||||
br_ssl_engine_set_buffer(&ctx->sc.eng, ctx->iobuf, sizeof(ctx->iobuf), 1);
|
||||
br_ssl_client_reset(&ctx->sc, host, 0);
|
||||
|
||||
br_sslio_init(&ctx->ioc, &ctx->sc.eng,
|
||||
sock_read, (void*)socket,
|
||||
sock_write, (void*)socket);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
cc_result SSL_Read(void* ctx_, cc_uint8* data, cc_uint32 count, cc_uint32* read) {
|
||||
SSLContext* ctx = (SSLContext*)ctx_;
|
||||
// TODO: just br_sslio_write ??
|
||||
int res = br_sslio_read(&ctx->ioc, data, count);
|
||||
if (res < 0) return br_ssl_engine_last_error(&ctx->sc.eng);
|
||||
|
||||
br_sslio_flush(&ctx->ioc);
|
||||
*read = res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
cc_result SSL_Write(void* ctx_, const cc_uint8* data, cc_uint32 count, cc_uint32* wrote) {
|
||||
SSLContext* ctx = (SSLContext*)ctx_;
|
||||
// TODO: just br_sslio_write ??
|
||||
int res = br_sslio_write_all(&ctx->ioc, data, count);
|
||||
if (res < 0) return br_ssl_engine_last_error(&ctx->sc.eng);
|
||||
|
||||
br_sslio_flush(&ctx->ioc);
|
||||
*wrote = res;
|
||||
return 0;
|
||||
}
|
||||
|
||||
cc_result SSL_Free(void* ctx_) {
|
||||
SSLContext* ctx = (SSLContext*)ctx_;
|
||||
if (ctx) br_sslio_close(&ctx->ioc);
|
||||
|
||||
Mem_Free(ctx_);
|
||||
return 0;
|
||||
}
|
||||
#elif defined CC_BUILD_3DS
|
||||
#include <3ds.h>
|
||||
#include "String.h"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
# Documentation
|
||||
|
||||
The most up-to-date documentation is supposed to be available on the
|
||||
[BearSSL Web site](https://www.bearssl.org/).
|
||||
|
||||
# Disclaimer
|
||||
|
||||
BearSSL is considered beta-level software. Most planned functionalities
|
||||
are implemented; new evolution may still break both source and binary
|
||||
compatibility.
|
||||
|
||||
Using BearSSL for production purposes would be a relatively bold but not
|
||||
utterly crazy move. BearSSL is free, open-source software, provided
|
||||
without any guarantee of fitness or reliability. That being said, it
|
||||
appears to behave properly, and only minor issues have been found (and
|
||||
fixed) so far. You are encourage to inspect its API and code for
|
||||
learning, testing and possibly contributing.
|
||||
|
||||
The usage license is explicited in the `LICENSE.txt` file. This is the
|
||||
"MIT license". It can be summarised in the following way:
|
||||
|
||||
- You can use and reuse the library as you wish, and modify it, and
|
||||
integrate it in your own code, and distribute it as is or in any
|
||||
modified form, and so on.
|
||||
|
||||
- The only obligation that the license terms put upon you is that you
|
||||
acknowledge and make it clear that if anything breaks, it is not my
|
||||
fault, and I am not liable for anything, regardless of the type and
|
||||
amount of collateral damage. The license terms say that the copyright
|
||||
notice "shall be included in all copies or substantial portions of
|
||||
the Software": this is how the disclaimer is "made explicit".
|
||||
Basically, I have put it in every source file, so just keep it there.
|
||||
|
||||
# Installation
|
||||
|
||||
Right now, BearSSL is a simple library, along with a few test and debug
|
||||
command-line tools. There is no installer yet. The library _can_ be
|
||||
compiled as a shared library on some systems, but since the binary API
|
||||
is not fully stabilised, this is not a very good idea to do that right
|
||||
now.
|
||||
|
||||
To compile the code, just type `make`. This will try to use sane
|
||||
"default" values. On a Windows system with Visual Studio, run a console
|
||||
with the environment initialised for a specific version of the C compiler,
|
||||
and type `nmake`.
|
||||
|
||||
To override the default settings, create a custom configuration file in
|
||||
the `conf` directory, and invoke `make` (or `nmake`) with an explicit
|
||||
`CONF=` parameter. For instance, to use the provided `samd20.mk`
|
||||
configuration file (that targets cross-compilation for an Atmel board
|
||||
that features a Cortex-M0+ CPU), type:
|
||||
|
||||
make CONF=samd20
|
||||
|
||||
The `conf/samd20.mk` file includes the `Unix.mk` file and then overrides
|
||||
some of the parameters, including the destination directory. Any custom
|
||||
configuration can be made the same way.
|
||||
|
||||
Some compile-time options can be set through macros, either on the
|
||||
compiler command-line, or in the `src/config.h` file. See the comments
|
||||
in that file. Some settings are autodetected but they can still be
|
||||
explicitly overridden.
|
||||
|
||||
When compilation is done, the library (static and DLL, when appropriate)
|
||||
and the command-line tools can be found in the designated build
|
||||
directory (by default named `build`). The public headers (to be used
|
||||
by applications linked against BearSSL) are in the `inc/` directory.
|
||||
|
||||
To run the tests:
|
||||
|
||||
- `testcrypto all` runs the cryptographic tests (test vectors on all
|
||||
implemented cryptogaphic functions). It can be slow. You can also
|
||||
run a selection of the tests by providing their names (run
|
||||
`testcrypto` without any parameter to see the available names).
|
||||
|
||||
- `testspeed all` runs a number of performance benchmarks, there again
|
||||
on cryptographic functions. It gives a taste of how things go on the
|
||||
current platform. As for `testcrypto`, specific named benchmarks can
|
||||
be executed.
|
||||
|
||||
- `testx509` runs X.509 validation tests. The test certificates are
|
||||
all in `test/x509/`.
|
||||
|
||||
The `brssl` command-line tool produced in the build directory is a
|
||||
stand-alone binary. It can exercise some of the functionalities of
|
||||
BearSSL, in particular running a test SSL client or server. It is not
|
||||
meant for production purposes (e.g. the SSL client has a mode where it
|
||||
disregards the inability to validate the server's certificate, which is
|
||||
inherently unsafe, but convenient for debug).
|
||||
|
||||
**Using the library** means writing some application code that invokes
|
||||
it, and linking with the static library. The header files are all in the
|
||||
`inc` directory; copy them wherever makes sense (e.g. in the
|
||||
`/usr/local/include` directory). The library itself (`libbearssl.a`) is
|
||||
what you link against.
|
||||
|
||||
Alternatively, you may want to copy the source files directly into your
|
||||
own application code. This will make integrating ulterior versions of
|
||||
BearSSL more difficult. If you still want to go down that road, then
|
||||
simply copy all the `*.h` and `*.c` files from the `src` and `inc`
|
||||
directories into your application source code. In the BearSSL source
|
||||
archive, the source files are segregated into various sub-directories,
|
||||
but this is for my convenience only. There is no technical requirement
|
||||
for that, and all files can be dumped together in a simple directory.
|
||||
|
||||
Dependencies are simple and systematic:
|
||||
|
||||
- Each `*.c` file includes `inner.h`
|
||||
- `inner.h` includes `config.h` and `bearssl.h`
|
||||
- `bearssl.h` includes the other `bearssl_*.h`
|
||||
|
||||
# Versioning
|
||||
|
||||
I follow this simple version numbering scheme:
|
||||
|
||||
- Version numbers are `x.y` or `x.y.z` where `x`, `y` and `z` are
|
||||
decimal integers (possibly greater than 10). When the `.z` part is
|
||||
missing, it is equivalent to `.0`.
|
||||
|
||||
- Backward compatibility is maintained, at both source and binary levels,
|
||||
for each major version: this means that if some application code was
|
||||
designed for version `x.y`, then it should compile, link and run
|
||||
properly with any version `x.y'` for any `y'` greater than `y`.
|
||||
|
||||
The major version `0` is an exception. You shall not expect that any
|
||||
version that starts with `0.` offers any kind of compatibility,
|
||||
either source or binary, with any other `0.` version. (Of course I
|
||||
will try to maintain some decent level of source compatibility, but I
|
||||
make no promise in that respect. Since the API uses caller-allocated
|
||||
context structures, I already know that binary compatibility _will_
|
||||
be broken.)
|
||||
|
||||
- Sub-versions (the `y` part) are about added functionality. That is,
|
||||
it can be expected that `1.3` will contain some extra functions when
|
||||
compared to `1.2`. The next version level (the `z` part) is for
|
||||
bugfixes that do not add any functionality.
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BR_BEARSSL_H__
|
||||
#define BR_BEARSSL_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/** \mainpage BearSSL API
|
||||
*
|
||||
* # API Layout
|
||||
*
|
||||
* The functions and structures defined by the BearSSL API are located
|
||||
* in various header files:
|
||||
*
|
||||
* | Header file | Elements |
|
||||
* | :-------------- | :------------------------------------------------ |
|
||||
* | bearssl_hash.h | Hash functions |
|
||||
* | bearssl_hmac.h | HMAC |
|
||||
* | bearssl_kdf.h | Key Derivation Functions |
|
||||
* | bearssl_rand.h | Pseudorandom byte generators |
|
||||
* | bearssl_prf.h | PRF implementations (for SSL/TLS) |
|
||||
* | bearssl_block.h | Symmetric encryption |
|
||||
* | bearssl_aead.h | AEAD algorithms (combined encryption + MAC) |
|
||||
* | bearssl_rsa.h | RSA encryption and signatures |
|
||||
* | bearssl_ec.h | Elliptic curves support (including ECDSA) |
|
||||
* | bearssl_ssl.h | SSL/TLS engine interface |
|
||||
* | bearssl_x509.h | X.509 certificate decoding and validation |
|
||||
* | bearssl_pem.h | Base64/PEM decoding support functions |
|
||||
*
|
||||
* Applications using BearSSL are supposed to simply include `bearssl.h`
|
||||
* as follows:
|
||||
*
|
||||
* #include <bearssl.h>
|
||||
*
|
||||
* The `bearssl.h` file itself includes all the other header files. It is
|
||||
* possible to include specific header files, but it has no practical
|
||||
* advantage for the application. The API is separated into separate
|
||||
* header files only for documentation convenience.
|
||||
*
|
||||
*
|
||||
* # Conventions
|
||||
*
|
||||
* ## MUST and SHALL
|
||||
*
|
||||
* In all descriptions, the usual "MUST", "SHALL", "MAY",... terminology
|
||||
* is used. Failure to meet requirements expressed with a "MUST" or
|
||||
* "SHALL" implies undefined behaviour, which means that segmentation
|
||||
* faults, buffer overflows, and other similar adverse events, may occur.
|
||||
*
|
||||
* In general, BearSSL is not very forgiving of programming errors, and
|
||||
* does not include much failsafes or error reporting when the problem
|
||||
* does not arise from external transient conditions, and can be fixed
|
||||
* only in the application code. This is done so in order to make the
|
||||
* total code footprint lighter.
|
||||
*
|
||||
*
|
||||
* ## `NULL` values
|
||||
*
|
||||
* Function parameters with a pointer type shall not be `NULL` unless
|
||||
* explicitly authorised by the documentation. As an exception, when
|
||||
* the pointer aims at a sequence of bytes and is accompanied with
|
||||
* a length parameter, and the length is zero (meaning that there is
|
||||
* no byte at all to retrieve), then the pointer may be `NULL` even if
|
||||
* not explicitly allowed.
|
||||
*
|
||||
*
|
||||
* ## Memory Allocation
|
||||
*
|
||||
* BearSSL does not perform dynamic memory allocation. This implies that
|
||||
* for any functionality that requires a non-transient state, the caller
|
||||
* is responsible for allocating the relevant context structure. Such
|
||||
* allocation can be done in any appropriate area, including static data
|
||||
* segments, the heap, and the stack, provided that proper alignment is
|
||||
* respected. The header files define these context structures
|
||||
* (including size and contents), so the C compiler should handle
|
||||
* alignment automatically.
|
||||
*
|
||||
* Since there is no dynamic resource allocation, there is also nothing to
|
||||
* release. When the calling code is done with a BearSSL feature, it
|
||||
* may simple release the context structures it allocated itself, with
|
||||
* no "close function" to call. If the context structures were allocated
|
||||
* on the stack (as local variables), then even that release operation is
|
||||
* implicit.
|
||||
*
|
||||
*
|
||||
* ## Structure Contents
|
||||
*
|
||||
* Except when explicitly indicated, structure contents are opaque: they
|
||||
* are included in the header files so that calling code may know the
|
||||
* structure sizes and alignment requirements, but callers SHALL NOT
|
||||
* access individual fields directly. For fields that are supposed to
|
||||
* be read from or written to, the API defines accessor functions (the
|
||||
* simplest of these accessor functions are defined as `static inline`
|
||||
* functions, and the C compiler will optimise them away).
|
||||
*
|
||||
*
|
||||
* # API Usage
|
||||
*
|
||||
* BearSSL usage for running a SSL/TLS client or server is described
|
||||
* on the [BearSSL Web site](https://www.bearssl.org/api1.html). The
|
||||
* BearSSL source archive also comes with sample code.
|
||||
*/
|
||||
|
||||
#include "bearssl_hash.h"
|
||||
#include "bearssl_hmac.h"
|
||||
#include "bearssl_kdf.h"
|
||||
#include "bearssl_rand.h"
|
||||
#include "bearssl_prf.h"
|
||||
#include "bearssl_block.h"
|
||||
#include "bearssl_aead.h"
|
||||
#include "bearssl_rsa.h"
|
||||
#include "bearssl_ec.h"
|
||||
#include "bearssl_ssl.h"
|
||||
#include "bearssl_x509.h"
|
||||
#include "bearssl_pem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \brief Type for a configuration option.
|
||||
*
|
||||
* A "configuration option" is a value that is selected when the BearSSL
|
||||
* library itself is compiled. Most options are boolean; their value is
|
||||
* then either 1 (option is enabled) or 0 (option is disabled). Some
|
||||
* values have other integer values. Option names correspond to macro
|
||||
* names. Some of the options can be explicitly set in the internal
|
||||
* `"config.h"` file.
|
||||
*/
|
||||
typedef struct {
|
||||
/** \brief Configurable option name. */
|
||||
const char *name;
|
||||
/** \brief Configurable option value. */
|
||||
long value;
|
||||
} br_config_option;
|
||||
|
||||
/** \brief Get configuration report.
|
||||
*
|
||||
* This function returns compiled configuration options, each as a
|
||||
* 'long' value. Names match internal macro names, in particular those
|
||||
* that can be set in the `"config.h"` inner file. For boolean options,
|
||||
* the numerical value is 1 if enabled, 0 if disabled. For maximum
|
||||
* key sizes, values are expressed in bits.
|
||||
*
|
||||
* The returned array is terminated by an entry whose `name` is `NULL`.
|
||||
*
|
||||
* \return the configuration report.
|
||||
*/
|
||||
const br_config_option *br_get_config(void);
|
||||
|
||||
/* ======================================================================= */
|
||||
|
||||
/** \brief Version feature: support for time callback. */
|
||||
#define BR_FEATURE_X509_TIME_CALLBACK 1
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,967 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BR_BEARSSL_EC_H__
|
||||
#define BR_BEARSSL_EC_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "bearssl_rand.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file bearssl_ec.h
|
||||
*
|
||||
* # Elliptic Curves
|
||||
*
|
||||
* This file documents the EC implementations provided with BearSSL, and
|
||||
* ECDSA.
|
||||
*
|
||||
* ## Elliptic Curve API
|
||||
*
|
||||
* Only "named curves" are supported. Each EC implementation supports
|
||||
* one or several named curves, identified by symbolic identifiers.
|
||||
* These identifiers are small integers, that correspond to the values
|
||||
* registered by the
|
||||
* [IANA](http://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-8).
|
||||
*
|
||||
* Since all currently defined elliptic curve identifiers are in the 0..31
|
||||
* range, it is convenient to encode support of some curves in a 32-bit
|
||||
* word, such that bit x corresponds to curve of identifier x.
|
||||
*
|
||||
* An EC implementation is incarnated by a `br_ec_impl` instance, that
|
||||
* offers the following fields:
|
||||
*
|
||||
* - `supported_curves`
|
||||
*
|
||||
* A 32-bit word that documents the identifiers of the curves supported
|
||||
* by this implementation.
|
||||
*
|
||||
* - `generator()`
|
||||
*
|
||||
* Callback method that returns a pointer to the conventional generator
|
||||
* point for that curve.
|
||||
*
|
||||
* - `order()`
|
||||
*
|
||||
* Callback method that returns a pointer to the subgroup order for
|
||||
* that curve. That value uses unsigned big-endian encoding.
|
||||
*
|
||||
* - `xoff()`
|
||||
*
|
||||
* Callback method that returns the offset and length of the X
|
||||
* coordinate in an encoded point.
|
||||
*
|
||||
* - `mul()`
|
||||
*
|
||||
* Multiply a curve point with an integer.
|
||||
*
|
||||
* - `mulgen()`
|
||||
*
|
||||
* Multiply the curve generator with an integer. This may be faster
|
||||
* than the generic `mul()`.
|
||||
*
|
||||
* - `muladd()`
|
||||
*
|
||||
* Multiply two curve points by two integers, and return the sum of
|
||||
* the two products.
|
||||
*
|
||||
* All curve points are represented in uncompressed format. The `mul()`
|
||||
* and `muladd()` methods take care to validate that the provided points
|
||||
* are really part of the relevant curve subgroup.
|
||||
*
|
||||
* For all point multiplication functions, the following holds:
|
||||
*
|
||||
* - Functions validate that the provided points are valid members
|
||||
* of the relevant curve subgroup. An error is reported if that is
|
||||
* not the case.
|
||||
*
|
||||
* - Processing is constant-time, even if the point operands are not
|
||||
* valid. This holds for both the source and resulting points, and
|
||||
* the multipliers (integers). Only the byte length of the provided
|
||||
* multiplier arrays (not their actual value length in bits) may
|
||||
* leak through timing-based side channels.
|
||||
*
|
||||
* - The multipliers (integers) MUST be lower than the subgroup order.
|
||||
* If this property is not met, then the result is indeterminate,
|
||||
* but an error value is not necessarily returned.
|
||||
*
|
||||
*
|
||||
* ## ECDSA
|
||||
*
|
||||
* ECDSA signatures have two standard formats, called "raw" and "asn1".
|
||||
* Internally, such a signature is a pair of modular integers `(r,s)`.
|
||||
* The "raw" format is the concatenation of the unsigned big-endian
|
||||
* encodings of these two integers, possibly left-padded with zeros so
|
||||
* that they have the same encoded length. The "asn1" format is the
|
||||
* DER encoding of an ASN.1 structure that contains the two integer
|
||||
* values:
|
||||
*
|
||||
* ECDSASignature ::= SEQUENCE {
|
||||
* r INTEGER,
|
||||
* s INTEGER
|
||||
* }
|
||||
*
|
||||
* In general, in all of X.509 and SSL/TLS, the "asn1" format is used.
|
||||
* BearSSL offers ECDSA implementations for both formats; conversion
|
||||
* functions between the two formats are also provided. Conversion of a
|
||||
* "raw" format signature into "asn1" may enlarge a signature by no more
|
||||
* than 9 bytes for all supported curves; conversely, conversion of an
|
||||
* "asn1" signature to "raw" may expand the signature but the "raw"
|
||||
* length will never be more than twice the length of the "asn1" length
|
||||
* (and usually it will be shorter).
|
||||
*
|
||||
* Note that for a given signature, the "raw" format is not fully
|
||||
* deterministic, in that it does not enforce a minimal common length.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Standard curve ID. These ID are equal to the assigned numerical
|
||||
* identifiers assigned to these curves for TLS:
|
||||
* http://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml#tls-parameters-8
|
||||
*/
|
||||
|
||||
/** \brief Identifier for named curve sect163k1. */
|
||||
#define BR_EC_sect163k1 1
|
||||
|
||||
/** \brief Identifier for named curve sect163r1. */
|
||||
#define BR_EC_sect163r1 2
|
||||
|
||||
/** \brief Identifier for named curve sect163r2. */
|
||||
#define BR_EC_sect163r2 3
|
||||
|
||||
/** \brief Identifier for named curve sect193r1. */
|
||||
#define BR_EC_sect193r1 4
|
||||
|
||||
/** \brief Identifier for named curve sect193r2. */
|
||||
#define BR_EC_sect193r2 5
|
||||
|
||||
/** \brief Identifier for named curve sect233k1. */
|
||||
#define BR_EC_sect233k1 6
|
||||
|
||||
/** \brief Identifier for named curve sect233r1. */
|
||||
#define BR_EC_sect233r1 7
|
||||
|
||||
/** \brief Identifier for named curve sect239k1. */
|
||||
#define BR_EC_sect239k1 8
|
||||
|
||||
/** \brief Identifier for named curve sect283k1. */
|
||||
#define BR_EC_sect283k1 9
|
||||
|
||||
/** \brief Identifier for named curve sect283r1. */
|
||||
#define BR_EC_sect283r1 10
|
||||
|
||||
/** \brief Identifier for named curve sect409k1. */
|
||||
#define BR_EC_sect409k1 11
|
||||
|
||||
/** \brief Identifier for named curve sect409r1. */
|
||||
#define BR_EC_sect409r1 12
|
||||
|
||||
/** \brief Identifier for named curve sect571k1. */
|
||||
#define BR_EC_sect571k1 13
|
||||
|
||||
/** \brief Identifier for named curve sect571r1. */
|
||||
#define BR_EC_sect571r1 14
|
||||
|
||||
/** \brief Identifier for named curve secp160k1. */
|
||||
#define BR_EC_secp160k1 15
|
||||
|
||||
/** \brief Identifier for named curve secp160r1. */
|
||||
#define BR_EC_secp160r1 16
|
||||
|
||||
/** \brief Identifier for named curve secp160r2. */
|
||||
#define BR_EC_secp160r2 17
|
||||
|
||||
/** \brief Identifier for named curve secp192k1. */
|
||||
#define BR_EC_secp192k1 18
|
||||
|
||||
/** \brief Identifier for named curve secp192r1. */
|
||||
#define BR_EC_secp192r1 19
|
||||
|
||||
/** \brief Identifier for named curve secp224k1. */
|
||||
#define BR_EC_secp224k1 20
|
||||
|
||||
/** \brief Identifier for named curve secp224r1. */
|
||||
#define BR_EC_secp224r1 21
|
||||
|
||||
/** \brief Identifier for named curve secp256k1. */
|
||||
#define BR_EC_secp256k1 22
|
||||
|
||||
/** \brief Identifier for named curve secp256r1. */
|
||||
#define BR_EC_secp256r1 23
|
||||
|
||||
/** \brief Identifier for named curve secp384r1. */
|
||||
#define BR_EC_secp384r1 24
|
||||
|
||||
/** \brief Identifier for named curve secp521r1. */
|
||||
#define BR_EC_secp521r1 25
|
||||
|
||||
/** \brief Identifier for named curve brainpoolP256r1. */
|
||||
#define BR_EC_brainpoolP256r1 26
|
||||
|
||||
/** \brief Identifier for named curve brainpoolP384r1. */
|
||||
#define BR_EC_brainpoolP384r1 27
|
||||
|
||||
/** \brief Identifier for named curve brainpoolP512r1. */
|
||||
#define BR_EC_brainpoolP512r1 28
|
||||
|
||||
/** \brief Identifier for named curve Curve25519. */
|
||||
#define BR_EC_curve25519 29
|
||||
|
||||
/** \brief Identifier for named curve Curve448. */
|
||||
#define BR_EC_curve448 30
|
||||
|
||||
/**
|
||||
* \brief Structure for an EC public key.
|
||||
*/
|
||||
typedef struct {
|
||||
/** \brief Identifier for the curve used by this key. */
|
||||
int curve;
|
||||
/** \brief Public curve point (uncompressed format). */
|
||||
unsigned char *q;
|
||||
/** \brief Length of public curve point (in bytes). */
|
||||
size_t qlen;
|
||||
} br_ec_public_key;
|
||||
|
||||
/**
|
||||
* \brief Structure for an EC private key.
|
||||
*
|
||||
* The private key is an integer modulo the curve subgroup order. The
|
||||
* encoding below tolerates extra leading zeros. In general, it is
|
||||
* recommended that the private key has the same length as the curve
|
||||
* subgroup order.
|
||||
*/
|
||||
typedef struct {
|
||||
/** \brief Identifier for the curve used by this key. */
|
||||
int curve;
|
||||
/** \brief Private key (integer, unsigned big-endian encoding). */
|
||||
unsigned char *x;
|
||||
/** \brief Private key length (in bytes). */
|
||||
size_t xlen;
|
||||
} br_ec_private_key;
|
||||
|
||||
/**
|
||||
* \brief Type for an EC implementation.
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* \brief Supported curves.
|
||||
*
|
||||
* This word is a bitfield: bit `x` is set if the curve of ID `x`
|
||||
* is supported. E.g. an implementation supporting both NIST P-256
|
||||
* (secp256r1, ID 23) and NIST P-384 (secp384r1, ID 24) will have
|
||||
* value `0x01800000` in this field.
|
||||
*/
|
||||
uint32_t supported_curves;
|
||||
|
||||
/**
|
||||
* \brief Get the conventional generator.
|
||||
*
|
||||
* This function returns the conventional generator (encoded
|
||||
* curve point) for the specified curve. This function MUST NOT
|
||||
* be called if the curve is not supported.
|
||||
*
|
||||
* \param curve curve identifier.
|
||||
* \param len receiver for the encoded generator length (in bytes).
|
||||
* \return the encoded generator.
|
||||
*/
|
||||
const unsigned char *(*generator)(int curve, size_t *len);
|
||||
|
||||
/**
|
||||
* \brief Get the subgroup order.
|
||||
*
|
||||
* This function returns the order of the subgroup generated by
|
||||
* the conventional generator, for the specified curve. Unsigned
|
||||
* big-endian encoding is used. This function MUST NOT be called
|
||||
* if the curve is not supported.
|
||||
*
|
||||
* \param curve curve identifier.
|
||||
* \param len receiver for the encoded order length (in bytes).
|
||||
* \return the encoded order.
|
||||
*/
|
||||
const unsigned char *(*order)(int curve, size_t *len);
|
||||
|
||||
/**
|
||||
* \brief Get the offset and length for the X coordinate.
|
||||
*
|
||||
* This function returns the offset and length (in bytes) of
|
||||
* the X coordinate in an encoded non-zero point.
|
||||
*
|
||||
* \param curve curve identifier.
|
||||
* \param len receiver for the X coordinate length (in bytes).
|
||||
* \return the offset for the X coordinate (in bytes).
|
||||
*/
|
||||
size_t (*xoff)(int curve, size_t *len);
|
||||
|
||||
/**
|
||||
* \brief Multiply a curve point by an integer.
|
||||
*
|
||||
* The source point is provided in array `G` (of size `Glen` bytes);
|
||||
* the multiplication result is written over it. The multiplier
|
||||
* `x` (of size `xlen` bytes) uses unsigned big-endian encoding.
|
||||
*
|
||||
* Rules:
|
||||
*
|
||||
* - The specified curve MUST be supported.
|
||||
*
|
||||
* - The source point must be a valid point on the relevant curve
|
||||
* subgroup (and not the "point at infinity" either). If this is
|
||||
* not the case, then this function returns an error (0).
|
||||
*
|
||||
* - The multiplier integer MUST be non-zero and less than the
|
||||
* curve subgroup order. If this property does not hold, then
|
||||
* the result is indeterminate and an error code is not
|
||||
* guaranteed.
|
||||
*
|
||||
* Returned value is 1 on success, 0 on error. On error, the
|
||||
* contents of `G` are indeterminate.
|
||||
*
|
||||
* \param G point to multiply.
|
||||
* \param Glen length of the encoded point (in bytes).
|
||||
* \param x multiplier (unsigned big-endian).
|
||||
* \param xlen multiplier length (in bytes).
|
||||
* \param curve curve identifier.
|
||||
* \return 1 on success, 0 on error.
|
||||
*/
|
||||
uint32_t (*mul)(unsigned char *G, size_t Glen,
|
||||
const unsigned char *x, size_t xlen, int curve);
|
||||
|
||||
/**
|
||||
* \brief Multiply the generator by an integer.
|
||||
*
|
||||
* The multiplier MUST be non-zero and less than the curve
|
||||
* subgroup order. Results are indeterminate if this property
|
||||
* does not hold.
|
||||
*
|
||||
* \param R output buffer for the point.
|
||||
* \param x multiplier (unsigned big-endian).
|
||||
* \param xlen multiplier length (in bytes).
|
||||
* \param curve curve identifier.
|
||||
* \return encoded result point length (in bytes).
|
||||
*/
|
||||
size_t (*mulgen)(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve);
|
||||
|
||||
/**
|
||||
* \brief Multiply two points by two integers and add the
|
||||
* results.
|
||||
*
|
||||
* The point `x*A + y*B` is computed and written back in the `A`
|
||||
* array.
|
||||
*
|
||||
* Rules:
|
||||
*
|
||||
* - The specified curve MUST be supported.
|
||||
*
|
||||
* - The source points (`A` and `B`) must be valid points on
|
||||
* the relevant curve subgroup (and not the "point at
|
||||
* infinity" either). If this is not the case, then this
|
||||
* function returns an error (0).
|
||||
*
|
||||
* - If the `B` pointer is `NULL`, then the conventional
|
||||
* subgroup generator is used. With some implementations,
|
||||
* this may be faster than providing a pointer to the
|
||||
* generator.
|
||||
*
|
||||
* - The multiplier integers (`x` and `y`) MUST be non-zero
|
||||
* and less than the curve subgroup order. If either integer
|
||||
* is zero, then an error is reported, but if one of them is
|
||||
* not lower than the subgroup order, then the result is
|
||||
* indeterminate and an error code is not guaranteed.
|
||||
*
|
||||
* - If the final result is the point at infinity, then an
|
||||
* error is returned.
|
||||
*
|
||||
* Returned value is 1 on success, 0 on error. On error, the
|
||||
* contents of `A` are indeterminate.
|
||||
*
|
||||
* \param A first point to multiply.
|
||||
* \param B second point to multiply (`NULL` for the generator).
|
||||
* \param len common length of the encoded points (in bytes).
|
||||
* \param x multiplier for `A` (unsigned big-endian).
|
||||
* \param xlen length of multiplier for `A` (in bytes).
|
||||
* \param y multiplier for `A` (unsigned big-endian).
|
||||
* \param ylen length of multiplier for `A` (in bytes).
|
||||
* \param curve curve identifier.
|
||||
* \return 1 on success, 0 on error.
|
||||
*/
|
||||
uint32_t (*muladd)(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve);
|
||||
} br_ec_impl;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "i31".
|
||||
*
|
||||
* This implementation internally uses generic code for modular integers,
|
||||
* with a representation as sequences of 31-bit words. It supports secp256r1,
|
||||
* secp384r1 and secp521r1 (aka NIST curves P-256, P-384 and P-521).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_prime_i31;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "i15".
|
||||
*
|
||||
* This implementation internally uses generic code for modular integers,
|
||||
* with a representation as sequences of 15-bit words. It supports secp256r1,
|
||||
* secp384r1 and secp521r1 (aka NIST curves P-256, P-384 and P-521).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_prime_i15;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "m15" for P-256.
|
||||
*
|
||||
* This implementation uses specialised code for curve secp256r1 (also
|
||||
* known as NIST P-256), with optional Karatsuba decomposition, and fast
|
||||
* modular reduction thanks to the field modulus special format. Only
|
||||
* 32-bit multiplications are used (with 32-bit results, not 64-bit).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_p256_m15;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "m31" for P-256.
|
||||
*
|
||||
* This implementation uses specialised code for curve secp256r1 (also
|
||||
* known as NIST P-256), relying on multiplications of 31-bit values
|
||||
* (MUL31).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_p256_m31;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "m62" (specialised code) for P-256.
|
||||
*
|
||||
* This implementation uses custom code relying on multiplication of
|
||||
* integers up to 64 bits, with a 128-bit result. This implementation is
|
||||
* defined only on platforms that offer the 64x64->128 multiplication
|
||||
* support; use `br_ec_p256_m62_get()` to dynamically obtain a pointer
|
||||
* to that implementation.
|
||||
*/
|
||||
extern const br_ec_impl br_ec_p256_m62;
|
||||
|
||||
/**
|
||||
* \brief Get the "m62" implementation of P-256, if available.
|
||||
*
|
||||
* \return the implementation, or 0.
|
||||
*/
|
||||
const br_ec_impl *br_ec_p256_m62_get(void);
|
||||
|
||||
/**
|
||||
* \brief EC implementation "m64" (specialised code) for P-256.
|
||||
*
|
||||
* This implementation uses custom code relying on multiplication of
|
||||
* integers up to 64 bits, with a 128-bit result. This implementation is
|
||||
* defined only on platforms that offer the 64x64->128 multiplication
|
||||
* support; use `br_ec_p256_m64_get()` to dynamically obtain a pointer
|
||||
* to that implementation.
|
||||
*/
|
||||
extern const br_ec_impl br_ec_p256_m64;
|
||||
|
||||
/**
|
||||
* \brief Get the "m64" implementation of P-256, if available.
|
||||
*
|
||||
* \return the implementation, or 0.
|
||||
*/
|
||||
const br_ec_impl *br_ec_p256_m64_get(void);
|
||||
|
||||
/**
|
||||
* \brief EC implementation "i15" (generic code) for Curve25519.
|
||||
*
|
||||
* This implementation uses the generic code for modular integers (with
|
||||
* 15-bit words) to support Curve25519. Due to the specificities of the
|
||||
* curve definition, the following applies:
|
||||
*
|
||||
* - `muladd()` is not implemented (the function returns 0 systematically).
|
||||
* - `order()` returns 2^255-1, since the point multiplication algorithm
|
||||
* accepts any 32-bit integer as input (it clears the top bit and low
|
||||
* three bits systematically).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_c25519_i15;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "i31" (generic code) for Curve25519.
|
||||
*
|
||||
* This implementation uses the generic code for modular integers (with
|
||||
* 31-bit words) to support Curve25519. Due to the specificities of the
|
||||
* curve definition, the following applies:
|
||||
*
|
||||
* - `muladd()` is not implemented (the function returns 0 systematically).
|
||||
* - `order()` returns 2^255-1, since the point multiplication algorithm
|
||||
* accepts any 32-bit integer as input (it clears the top bit and low
|
||||
* three bits systematically).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_c25519_i31;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "m15" (specialised code) for Curve25519.
|
||||
*
|
||||
* This implementation uses custom code relying on multiplication of
|
||||
* integers up to 15 bits. Due to the specificities of the curve
|
||||
* definition, the following applies:
|
||||
*
|
||||
* - `muladd()` is not implemented (the function returns 0 systematically).
|
||||
* - `order()` returns 2^255-1, since the point multiplication algorithm
|
||||
* accepts any 32-bit integer as input (it clears the top bit and low
|
||||
* three bits systematically).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_c25519_m15;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "m31" (specialised code) for Curve25519.
|
||||
*
|
||||
* This implementation uses custom code relying on multiplication of
|
||||
* integers up to 31 bits. Due to the specificities of the curve
|
||||
* definition, the following applies:
|
||||
*
|
||||
* - `muladd()` is not implemented (the function returns 0 systematically).
|
||||
* - `order()` returns 2^255-1, since the point multiplication algorithm
|
||||
* accepts any 32-bit integer as input (it clears the top bit and low
|
||||
* three bits systematically).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_c25519_m31;
|
||||
|
||||
/**
|
||||
* \brief EC implementation "m62" (specialised code) for Curve25519.
|
||||
*
|
||||
* This implementation uses custom code relying on multiplication of
|
||||
* integers up to 62 bits, with a 124-bit result. This implementation is
|
||||
* defined only on platforms that offer the 64x64->128 multiplication
|
||||
* support; use `br_ec_c25519_m62_get()` to dynamically obtain a pointer
|
||||
* to that implementation. Due to the specificities of the curve
|
||||
* definition, the following applies:
|
||||
*
|
||||
* - `muladd()` is not implemented (the function returns 0 systematically).
|
||||
* - `order()` returns 2^255-1, since the point multiplication algorithm
|
||||
* accepts any 32-bit integer as input (it clears the top bit and low
|
||||
* three bits systematically).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_c25519_m62;
|
||||
|
||||
/**
|
||||
* \brief Get the "m62" implementation of Curve25519, if available.
|
||||
*
|
||||
* \return the implementation, or 0.
|
||||
*/
|
||||
const br_ec_impl *br_ec_c25519_m62_get(void);
|
||||
|
||||
/**
|
||||
* \brief EC implementation "m64" (specialised code) for Curve25519.
|
||||
*
|
||||
* This implementation uses custom code relying on multiplication of
|
||||
* integers up to 64 bits, with a 128-bit result. This implementation is
|
||||
* defined only on platforms that offer the 64x64->128 multiplication
|
||||
* support; use `br_ec_c25519_m64_get()` to dynamically obtain a pointer
|
||||
* to that implementation. Due to the specificities of the curve
|
||||
* definition, the following applies:
|
||||
*
|
||||
* - `muladd()` is not implemented (the function returns 0 systematically).
|
||||
* - `order()` returns 2^255-1, since the point multiplication algorithm
|
||||
* accepts any 32-bit integer as input (it clears the top bit and low
|
||||
* three bits systematically).
|
||||
*/
|
||||
extern const br_ec_impl br_ec_c25519_m64;
|
||||
|
||||
/**
|
||||
* \brief Get the "m64" implementation of Curve25519, if available.
|
||||
*
|
||||
* \return the implementation, or 0.
|
||||
*/
|
||||
const br_ec_impl *br_ec_c25519_m64_get(void);
|
||||
|
||||
/**
|
||||
* \brief Aggregate EC implementation "m15".
|
||||
*
|
||||
* This implementation is a wrapper for:
|
||||
*
|
||||
* - `br_ec_c25519_m15` for Curve25519
|
||||
* - `br_ec_p256_m15` for NIST P-256
|
||||
* - `br_ec_prime_i15` for other curves (NIST P-384 and NIST-P512)
|
||||
*/
|
||||
extern const br_ec_impl br_ec_all_m15;
|
||||
|
||||
/**
|
||||
* \brief Aggregate EC implementation "m31".
|
||||
*
|
||||
* This implementation is a wrapper for:
|
||||
*
|
||||
* - `br_ec_c25519_m31` for Curve25519
|
||||
* - `br_ec_p256_m31` for NIST P-256
|
||||
* - `br_ec_prime_i31` for other curves (NIST P-384 and NIST-P512)
|
||||
*/
|
||||
extern const br_ec_impl br_ec_all_m31;
|
||||
|
||||
/**
|
||||
* \brief Get the "default" EC implementation for the current system.
|
||||
*
|
||||
* This returns a pointer to the preferred implementation on the
|
||||
* current system.
|
||||
*
|
||||
* \return the default EC implementation.
|
||||
*/
|
||||
const br_ec_impl *br_ec_get_default(void);
|
||||
|
||||
/**
|
||||
* \brief Convert a signature from "raw" to "asn1".
|
||||
*
|
||||
* Conversion is done "in place" and the new length is returned.
|
||||
* Conversion may enlarge the signature, but by no more than 9 bytes at
|
||||
* most. On error, 0 is returned (error conditions include an odd raw
|
||||
* signature length, or an oversized integer).
|
||||
*
|
||||
* \param sig signature to convert.
|
||||
* \param sig_len signature length (in bytes).
|
||||
* \return the new signature length, or 0 on error.
|
||||
*/
|
||||
size_t br_ecdsa_raw_to_asn1(void *sig, size_t sig_len);
|
||||
|
||||
/**
|
||||
* \brief Convert a signature from "asn1" to "raw".
|
||||
*
|
||||
* Conversion is done "in place" and the new length is returned.
|
||||
* Conversion may enlarge the signature, but the new signature length
|
||||
* will be less than twice the source length at most. On error, 0 is
|
||||
* returned (error conditions include an invalid ASN.1 structure or an
|
||||
* oversized integer).
|
||||
*
|
||||
* \param sig signature to convert.
|
||||
* \param sig_len signature length (in bytes).
|
||||
* \return the new signature length, or 0 on error.
|
||||
*/
|
||||
size_t br_ecdsa_asn1_to_raw(void *sig, size_t sig_len);
|
||||
|
||||
/**
|
||||
* \brief Type for an ECDSA signer function.
|
||||
*
|
||||
* A pointer to the EC implementation is provided. The hash value is
|
||||
* assumed to have the length inferred from the designated hash function
|
||||
* class.
|
||||
*
|
||||
* Signature is written in the buffer pointed to by `sig`, and the length
|
||||
* (in bytes) is returned. On error, nothing is written in the buffer,
|
||||
* and 0 is returned. This function returns 0 if the specified curve is
|
||||
* not supported by the provided EC implementation.
|
||||
*
|
||||
* The signature format is either "raw" or "asn1", depending on the
|
||||
* implementation; maximum length is predictable from the implemented
|
||||
* curve:
|
||||
*
|
||||
* | curve | raw | asn1 |
|
||||
* | :--------- | --: | ---: |
|
||||
* | NIST P-256 | 64 | 72 |
|
||||
* | NIST P-384 | 96 | 104 |
|
||||
* | NIST P-521 | 132 | 139 |
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hf hash function used to process the data.
|
||||
* \param hash_value signed data (hashed).
|
||||
* \param sk EC private key.
|
||||
* \param sig destination buffer.
|
||||
* \return the signature length (in bytes), or 0 on error.
|
||||
*/
|
||||
typedef size_t (*br_ecdsa_sign)(const br_ec_impl *impl,
|
||||
const br_hash_class *hf, const void *hash_value,
|
||||
const br_ec_private_key *sk, void *sig);
|
||||
|
||||
/**
|
||||
* \brief Type for an ECDSA signature verification function.
|
||||
*
|
||||
* A pointer to the EC implementation is provided. The hashed value,
|
||||
* computed over the purportedly signed data, is also provided with
|
||||
* its length.
|
||||
*
|
||||
* The signature format is either "raw" or "asn1", depending on the
|
||||
* implementation.
|
||||
*
|
||||
* Returned value is 1 on success (valid signature), 0 on error. This
|
||||
* function returns 0 if the specified curve is not supported by the
|
||||
* provided EC implementation.
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hash signed data (hashed).
|
||||
* \param hash_len hash value length (in bytes).
|
||||
* \param pk EC public key.
|
||||
* \param sig signature.
|
||||
* \param sig_len signature length (in bytes).
|
||||
* \return 1 on success, 0 on error.
|
||||
*/
|
||||
typedef uint32_t (*br_ecdsa_vrfy)(const br_ec_impl *impl,
|
||||
const void *hash, size_t hash_len,
|
||||
const br_ec_public_key *pk, const void *sig, size_t sig_len);
|
||||
|
||||
/**
|
||||
* \brief ECDSA signature generator, "i31" implementation, "asn1" format.
|
||||
*
|
||||
* \see br_ecdsa_sign()
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hf hash function used to process the data.
|
||||
* \param hash_value signed data (hashed).
|
||||
* \param sk EC private key.
|
||||
* \param sig destination buffer.
|
||||
* \return the signature length (in bytes), or 0 on error.
|
||||
*/
|
||||
size_t br_ecdsa_i31_sign_asn1(const br_ec_impl *impl,
|
||||
const br_hash_class *hf, const void *hash_value,
|
||||
const br_ec_private_key *sk, void *sig);
|
||||
|
||||
/**
|
||||
* \brief ECDSA signature generator, "i31" implementation, "raw" format.
|
||||
*
|
||||
* \see br_ecdsa_sign()
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hf hash function used to process the data.
|
||||
* \param hash_value signed data (hashed).
|
||||
* \param sk EC private key.
|
||||
* \param sig destination buffer.
|
||||
* \return the signature length (in bytes), or 0 on error.
|
||||
*/
|
||||
size_t br_ecdsa_i31_sign_raw(const br_ec_impl *impl,
|
||||
const br_hash_class *hf, const void *hash_value,
|
||||
const br_ec_private_key *sk, void *sig);
|
||||
|
||||
/**
|
||||
* \brief ECDSA signature verifier, "i31" implementation, "asn1" format.
|
||||
*
|
||||
* \see br_ecdsa_vrfy()
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hash signed data (hashed).
|
||||
* \param hash_len hash value length (in bytes).
|
||||
* \param pk EC public key.
|
||||
* \param sig signature.
|
||||
* \param sig_len signature length (in bytes).
|
||||
* \return 1 on success, 0 on error.
|
||||
*/
|
||||
uint32_t br_ecdsa_i31_vrfy_asn1(const br_ec_impl *impl,
|
||||
const void *hash, size_t hash_len,
|
||||
const br_ec_public_key *pk, const void *sig, size_t sig_len);
|
||||
|
||||
/**
|
||||
* \brief ECDSA signature verifier, "i31" implementation, "raw" format.
|
||||
*
|
||||
* \see br_ecdsa_vrfy()
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hash signed data (hashed).
|
||||
* \param hash_len hash value length (in bytes).
|
||||
* \param pk EC public key.
|
||||
* \param sig signature.
|
||||
* \param sig_len signature length (in bytes).
|
||||
* \return 1 on success, 0 on error.
|
||||
*/
|
||||
uint32_t br_ecdsa_i31_vrfy_raw(const br_ec_impl *impl,
|
||||
const void *hash, size_t hash_len,
|
||||
const br_ec_public_key *pk, const void *sig, size_t sig_len);
|
||||
|
||||
/**
|
||||
* \brief ECDSA signature generator, "i15" implementation, "asn1" format.
|
||||
*
|
||||
* \see br_ecdsa_sign()
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hf hash function used to process the data.
|
||||
* \param hash_value signed data (hashed).
|
||||
* \param sk EC private key.
|
||||
* \param sig destination buffer.
|
||||
* \return the signature length (in bytes), or 0 on error.
|
||||
*/
|
||||
size_t br_ecdsa_i15_sign_asn1(const br_ec_impl *impl,
|
||||
const br_hash_class *hf, const void *hash_value,
|
||||
const br_ec_private_key *sk, void *sig);
|
||||
|
||||
/**
|
||||
* \brief ECDSA signature generator, "i15" implementation, "raw" format.
|
||||
*
|
||||
* \see br_ecdsa_sign()
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hf hash function used to process the data.
|
||||
* \param hash_value signed data (hashed).
|
||||
* \param sk EC private key.
|
||||
* \param sig destination buffer.
|
||||
* \return the signature length (in bytes), or 0 on error.
|
||||
*/
|
||||
size_t br_ecdsa_i15_sign_raw(const br_ec_impl *impl,
|
||||
const br_hash_class *hf, const void *hash_value,
|
||||
const br_ec_private_key *sk, void *sig);
|
||||
|
||||
/**
|
||||
* \brief ECDSA signature verifier, "i15" implementation, "asn1" format.
|
||||
*
|
||||
* \see br_ecdsa_vrfy()
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hash signed data (hashed).
|
||||
* \param hash_len hash value length (in bytes).
|
||||
* \param pk EC public key.
|
||||
* \param sig signature.
|
||||
* \param sig_len signature length (in bytes).
|
||||
* \return 1 on success, 0 on error.
|
||||
*/
|
||||
uint32_t br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl,
|
||||
const void *hash, size_t hash_len,
|
||||
const br_ec_public_key *pk, const void *sig, size_t sig_len);
|
||||
|
||||
/**
|
||||
* \brief ECDSA signature verifier, "i15" implementation, "raw" format.
|
||||
*
|
||||
* \see br_ecdsa_vrfy()
|
||||
*
|
||||
* \param impl EC implementation to use.
|
||||
* \param hash signed data (hashed).
|
||||
* \param hash_len hash value length (in bytes).
|
||||
* \param pk EC public key.
|
||||
* \param sig signature.
|
||||
* \param sig_len signature length (in bytes).
|
||||
* \return 1 on success, 0 on error.
|
||||
*/
|
||||
uint32_t br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl,
|
||||
const void *hash, size_t hash_len,
|
||||
const br_ec_public_key *pk, const void *sig, size_t sig_len);
|
||||
|
||||
/**
|
||||
* \brief Get "default" ECDSA implementation (signer, asn1 format).
|
||||
*
|
||||
* This returns the preferred implementation of ECDSA signature generation
|
||||
* ("asn1" output format) on the current system.
|
||||
*
|
||||
* \return the default implementation.
|
||||
*/
|
||||
br_ecdsa_sign br_ecdsa_sign_asn1_get_default(void);
|
||||
|
||||
/**
|
||||
* \brief Get "default" ECDSA implementation (signer, raw format).
|
||||
*
|
||||
* This returns the preferred implementation of ECDSA signature generation
|
||||
* ("raw" output format) on the current system.
|
||||
*
|
||||
* \return the default implementation.
|
||||
*/
|
||||
br_ecdsa_sign br_ecdsa_sign_raw_get_default(void);
|
||||
|
||||
/**
|
||||
* \brief Get "default" ECDSA implementation (verifier, asn1 format).
|
||||
*
|
||||
* This returns the preferred implementation of ECDSA signature verification
|
||||
* ("asn1" output format) on the current system.
|
||||
*
|
||||
* \return the default implementation.
|
||||
*/
|
||||
br_ecdsa_vrfy br_ecdsa_vrfy_asn1_get_default(void);
|
||||
|
||||
/**
|
||||
* \brief Get "default" ECDSA implementation (verifier, raw format).
|
||||
*
|
||||
* This returns the preferred implementation of ECDSA signature verification
|
||||
* ("raw" output format) on the current system.
|
||||
*
|
||||
* \return the default implementation.
|
||||
*/
|
||||
br_ecdsa_vrfy br_ecdsa_vrfy_raw_get_default(void);
|
||||
|
||||
/**
|
||||
* \brief Maximum size for EC private key element buffer.
|
||||
*
|
||||
* This is the largest number of bytes that `br_ec_keygen()` may need or
|
||||
* ever return.
|
||||
*/
|
||||
#define BR_EC_KBUF_PRIV_MAX_SIZE 72
|
||||
|
||||
/**
|
||||
* \brief Maximum size for EC public key element buffer.
|
||||
*
|
||||
* This is the largest number of bytes that `br_ec_compute_public()` may
|
||||
* need or ever return.
|
||||
*/
|
||||
#define BR_EC_KBUF_PUB_MAX_SIZE 145
|
||||
|
||||
/**
|
||||
* \brief Generate a new EC private key.
|
||||
*
|
||||
* If the specified `curve` is not supported by the elliptic curve
|
||||
* implementation (`impl`), then this function returns zero.
|
||||
*
|
||||
* The `sk` structure fields are set to the new private key data. In
|
||||
* particular, `sk.x` is made to point to the provided key buffer (`kbuf`),
|
||||
* in which the actual private key data is written. That buffer is assumed
|
||||
* to be large enough. The `BR_EC_KBUF_PRIV_MAX_SIZE` defines the maximum
|
||||
* size for all supported curves.
|
||||
*
|
||||
* The number of bytes used in `kbuf` is returned. If `kbuf` is `NULL`, then
|
||||
* the private key is not actually generated, and `sk` may also be `NULL`;
|
||||
* the minimum length for `kbuf` is still computed and returned.
|
||||
*
|
||||
* If `sk` is `NULL` but `kbuf` is not `NULL`, then the private key is
|
||||
* still generated and stored in `kbuf`.
|
||||
*
|
||||
* \param rng_ctx source PRNG context (already initialized).
|
||||
* \param impl the elliptic curve implementation.
|
||||
* \param sk the private key structure to fill, or `NULL`.
|
||||
* \param kbuf the key element buffer, or `NULL`.
|
||||
* \param curve the curve identifier.
|
||||
* \return the key data length (in bytes), or zero.
|
||||
*/
|
||||
size_t br_ec_keygen(const br_prng_class **rng_ctx,
|
||||
const br_ec_impl *impl, br_ec_private_key *sk,
|
||||
void *kbuf, int curve);
|
||||
|
||||
/**
|
||||
* \brief Compute EC public key from EC private key.
|
||||
*
|
||||
* This function uses the provided elliptic curve implementation (`impl`)
|
||||
* to compute the public key corresponding to the private key held in `sk`.
|
||||
* The public key point is written into `kbuf`, which is then linked from
|
||||
* the `*pk` structure. The size of the public key point, i.e. the number
|
||||
* of bytes used in `kbuf`, is returned.
|
||||
*
|
||||
* If `kbuf` is `NULL`, then the public key point is NOT computed, and
|
||||
* the public key structure `*pk` is unmodified (`pk` may be `NULL` in
|
||||
* that case). The size of the public key point is still returned.
|
||||
*
|
||||
* If `pk` is `NULL` but `kbuf` is not `NULL`, then the public key
|
||||
* point is computed and stored in `kbuf`, and its size is returned.
|
||||
*
|
||||
* If the curve used by the private key is not supported by the curve
|
||||
* implementation, then this function returns zero.
|
||||
*
|
||||
* The private key MUST be valid. An off-range private key value is not
|
||||
* necessarily detected, and leads to unpredictable results.
|
||||
*
|
||||
* \param impl the elliptic curve implementation.
|
||||
* \param pk the public key structure to fill (or `NULL`).
|
||||
* \param kbuf the public key point buffer (or `NULL`).
|
||||
* \param sk the source private key.
|
||||
* \return the public key point length (in bytes), or zero.
|
||||
*/
|
||||
size_t br_ec_compute_pub(const br_ec_impl *impl, br_ec_public_key *pk,
|
||||
void *kbuf, const br_ec_private_key *sk);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,241 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BR_BEARSSL_HMAC_H__
|
||||
#define BR_BEARSSL_HMAC_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "bearssl_hash.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file bearssl_hmac.h
|
||||
*
|
||||
* # HMAC
|
||||
*
|
||||
* HMAC is initialized with a key and an underlying hash function; it
|
||||
* then fills a "key context". That context contains the processed
|
||||
* key.
|
||||
*
|
||||
* With the key context, a HMAC context can be initialized to process
|
||||
* the input bytes and obtain the MAC output. The key context is not
|
||||
* modified during that process, and can be reused.
|
||||
*
|
||||
* IMPORTANT: HMAC shall be used only with functions that have the
|
||||
* following properties:
|
||||
*
|
||||
* - hash output size does not exceed 64 bytes;
|
||||
* - hash internal state size does not exceed 64 bytes;
|
||||
* - internal block length is a power of 2 between 16 and 256 bytes.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief HMAC key context.
|
||||
*
|
||||
* The HMAC key context is initialised with a hash function implementation
|
||||
* and a secret key. Contents are opaque (callers should not access them
|
||||
* directly). The caller is responsible for allocating the context where
|
||||
* appropriate. Context initialisation and usage incurs no dynamic
|
||||
* allocation, so there is no release function.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
const br_hash_class *dig_vtable;
|
||||
unsigned char ksi[64], kso[64];
|
||||
#endif
|
||||
} br_hmac_key_context;
|
||||
|
||||
/**
|
||||
* \brief HMAC key context initialisation.
|
||||
*
|
||||
* Initialise the key context with the provided key, using the hash function
|
||||
* identified by `digest_vtable`. This supports arbitrary key lengths.
|
||||
*
|
||||
* \param kc HMAC key context to initialise.
|
||||
* \param digest_vtable pointer to the hash function implementation vtable.
|
||||
* \param key pointer to the HMAC secret key.
|
||||
* \param key_len HMAC secret key length (in bytes).
|
||||
*/
|
||||
void br_hmac_key_init(br_hmac_key_context *kc,
|
||||
const br_hash_class *digest_vtable, const void *key, size_t key_len);
|
||||
|
||||
/*
|
||||
* \brief Get the underlying hash function.
|
||||
*
|
||||
* This function returns a pointer to the implementation vtable of the
|
||||
* hash function used for this HMAC key context.
|
||||
*
|
||||
* \param kc HMAC key context.
|
||||
* \return the hash function implementation.
|
||||
*/
|
||||
static inline const br_hash_class *br_hmac_key_get_digest(
|
||||
const br_hmac_key_context *kc)
|
||||
{
|
||||
return kc->dig_vtable;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief HMAC computation context.
|
||||
*
|
||||
* The HMAC computation context maintains the state for a single HMAC
|
||||
* computation. It is modified as input bytes are injected. The context
|
||||
* is caller-allocated and has no release function since it does not
|
||||
* dynamically allocate external resources. Its contents are opaque.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
br_hash_compat_context dig;
|
||||
unsigned char kso[64];
|
||||
size_t out_len;
|
||||
#endif
|
||||
} br_hmac_context;
|
||||
|
||||
/**
|
||||
* \brief HMAC computation initialisation.
|
||||
*
|
||||
* Initialise a HMAC context with a key context. The key context is
|
||||
* unmodified. Relevant data from the key context is immediately copied;
|
||||
* the key context can thus be independently reused, modified or released
|
||||
* without impacting this HMAC computation.
|
||||
*
|
||||
* An explicit output length can be specified; the actual output length
|
||||
* will be the minimum of that value and the natural HMAC output length.
|
||||
* If `out_len` is 0, then the natural HMAC output length is selected. The
|
||||
* "natural output length" is the output length of the underlying hash
|
||||
* function.
|
||||
*
|
||||
* \param ctx HMAC context to initialise.
|
||||
* \param kc HMAC key context (already initialised with the key).
|
||||
* \param out_len HMAC output length (0 to select "natural length").
|
||||
*/
|
||||
void br_hmac_init(br_hmac_context *ctx,
|
||||
const br_hmac_key_context *kc, size_t out_len);
|
||||
|
||||
/**
|
||||
* \brief Get the HMAC output size.
|
||||
*
|
||||
* The HMAC output size is the number of bytes that will actually be
|
||||
* produced with `br_hmac_out()` with the provided context. This function
|
||||
* MUST NOT be called on a non-initialised HMAC computation context.
|
||||
* The returned value is the minimum of the HMAC natural length (output
|
||||
* size of the underlying hash function) and the `out_len` parameter which
|
||||
* was used with the last `br_hmac_init()` call on that context (if the
|
||||
* initialisation `out_len` parameter was 0, then this function will
|
||||
* return the HMAC natural length).
|
||||
*
|
||||
* \param ctx the (already initialised) HMAC computation context.
|
||||
* \return the HMAC actual output size.
|
||||
*/
|
||||
static inline size_t
|
||||
br_hmac_size(br_hmac_context *ctx)
|
||||
{
|
||||
return ctx->out_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* \brief Get the underlying hash function.
|
||||
*
|
||||
* This function returns a pointer to the implementation vtable of the
|
||||
* hash function used for this HMAC context.
|
||||
*
|
||||
* \param hc HMAC context.
|
||||
* \return the hash function implementation.
|
||||
*/
|
||||
static inline const br_hash_class *br_hmac_get_digest(
|
||||
const br_hmac_context *hc)
|
||||
{
|
||||
return hc->dig.vtable;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Inject some bytes in HMAC.
|
||||
*
|
||||
* The provided `len` bytes are injected as extra input in the HMAC
|
||||
* computation incarnated by the `ctx` HMAC context. It is acceptable
|
||||
* that `len` is zero, in which case `data` is ignored (and may be
|
||||
* `NULL`) and this function does nothing.
|
||||
*/
|
||||
void br_hmac_update(br_hmac_context *ctx, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Compute the HMAC output.
|
||||
*
|
||||
* The destination buffer MUST be large enough to accommodate the result;
|
||||
* its length is at most the "natural length" of HMAC (i.e. the output
|
||||
* length of the underlying hash function). The context is NOT modified;
|
||||
* further bytes may be processed. Thus, "partial HMAC" values can be
|
||||
* efficiently obtained.
|
||||
*
|
||||
* Returned value is the output length (in bytes).
|
||||
*
|
||||
* \param ctx HMAC computation context.
|
||||
* \param out destination buffer for the HMAC output.
|
||||
* \return the produced value length (in bytes).
|
||||
*/
|
||||
size_t br_hmac_out(const br_hmac_context *ctx, void *out);
|
||||
|
||||
/**
|
||||
* \brief Constant-time HMAC computation.
|
||||
*
|
||||
* This function compute the HMAC output in constant time. Some extra
|
||||
* input bytes are processed, then the output is computed. The extra
|
||||
* input consists in the `len` bytes pointed to by `data`. The `len`
|
||||
* parameter must lie between `min_len` and `max_len` (inclusive);
|
||||
* `max_len` bytes are actually read from `data`. Computing time (and
|
||||
* memory access pattern) will not depend upon the data byte contents or
|
||||
* the value of `len`.
|
||||
*
|
||||
* The output is written in the `out` buffer, that MUST be large enough
|
||||
* to receive it.
|
||||
*
|
||||
* The difference `max_len - min_len` MUST be less than 2<sup>30</sup>
|
||||
* (i.e. about one gigabyte).
|
||||
*
|
||||
* This function computes the output properly only if the underlying
|
||||
* hash function uses MD padding (i.e. MD5, SHA-1, SHA-224, SHA-256,
|
||||
* SHA-384 or SHA-512).
|
||||
*
|
||||
* The provided context is NOT modified.
|
||||
*
|
||||
* \param ctx the (already initialised) HMAC computation context.
|
||||
* \param data the extra input bytes.
|
||||
* \param len the extra input length (in bytes).
|
||||
* \param min_len minimum extra input length (in bytes).
|
||||
* \param max_len maximum extra input length (in bytes).
|
||||
* \param out destination buffer for the HMAC output.
|
||||
* \return the produced value length (in bytes).
|
||||
*/
|
||||
size_t br_hmac_outCT(const br_hmac_context *ctx,
|
||||
const void *data, size_t len, size_t min_len, size_t max_len,
|
||||
void *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,284 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BR_BEARSSL_KDF_H__
|
||||
#define BR_BEARSSL_KDF_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "bearssl_hash.h"
|
||||
#include "bearssl_hmac.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file bearssl_kdf.h
|
||||
*
|
||||
* # Key Derivation Functions
|
||||
*
|
||||
* KDF are functions that takes a variable length input, and provide a
|
||||
* variable length output, meant to be used to derive subkeys from a
|
||||
* master key.
|
||||
*
|
||||
* ## HKDF
|
||||
*
|
||||
* HKDF is a KDF defined by [RFC 5869](https://tools.ietf.org/html/rfc5869).
|
||||
* It is based on HMAC, itself using an underlying hash function. Any
|
||||
* hash function can be used, as long as it is compatible with the rules
|
||||
* for the HMAC implementation (i.e. output size is 64 bytes or less, hash
|
||||
* internal state size is 64 bytes or less, and the internal block length is
|
||||
* a power of 2 between 16 and 256 bytes). HKDF has two phases:
|
||||
*
|
||||
* - HKDF-Extract: the input data in ingested, along with a "salt" value.
|
||||
*
|
||||
* - HKDF-Expand: the output is produced, from the result of processing
|
||||
* the input and salt, and using an extra non-secret parameter called
|
||||
* "info".
|
||||
*
|
||||
* The "salt" and "info" strings are non-secret and can be empty. Their role
|
||||
* is normally to bind the input and output, respectively, to conventional
|
||||
* identifiers that qualifu them within the used protocol or application.
|
||||
*
|
||||
* The implementation defined in this file uses the following functions:
|
||||
*
|
||||
* - `br_hkdf_init()`: initialize an HKDF context, with a hash function,
|
||||
* and the salt. This starts the HKDF-Extract process.
|
||||
*
|
||||
* - `br_hkdf_inject()`: inject more input bytes. This function may be
|
||||
* called repeatedly if the input data is provided by chunks.
|
||||
*
|
||||
* - `br_hkdf_flip()`: end the HKDF-Extract process, and start the
|
||||
* HKDF-Expand process.
|
||||
*
|
||||
* - `br_hkdf_produce()`: get the next bytes of output. This function
|
||||
* may be called several times to obtain the full output by chunks.
|
||||
* For correct HKDF processing, the same "info" string must be
|
||||
* provided for each call.
|
||||
*
|
||||
* Note that the HKDF total output size (the number of bytes that
|
||||
* HKDF-Expand is willing to produce) is limited: if the hash output size
|
||||
* is _n_ bytes, then the maximum output size is _255*n_.
|
||||
*
|
||||
* ## SHAKE
|
||||
*
|
||||
* SHAKE is defined in
|
||||
* [FIPS 202](https://csrc.nist.gov/publications/detail/fips/202/final)
|
||||
* under two versions: SHAKE128 and SHAKE256, offering an alleged
|
||||
* "security level" of 128 and 256 bits, respectively (SHAKE128 is
|
||||
* about 20 to 25% faster than SHAKE256). SHAKE internally relies on
|
||||
* the Keccak family of sponge functions, not on any externally provided
|
||||
* hash function. Contrary to HKDF, SHAKE does not have a concept of
|
||||
* either a "salt" or an "info" string. The API consists in four
|
||||
* functions:
|
||||
*
|
||||
* - `br_shake_init()`: initialize a SHAKE context for a given
|
||||
* security level.
|
||||
*
|
||||
* - `br_shake_inject()`: inject more input bytes. This function may be
|
||||
* called repeatedly if the input data is provided by chunks.
|
||||
*
|
||||
* - `br_shake_flip()`: end the data injection process, and start the
|
||||
* data production process.
|
||||
*
|
||||
* - `br_shake_produce()`: get the next bytes of output. This function
|
||||
* may be called several times to obtain the full output by chunks.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief HKDF context.
|
||||
*
|
||||
* The HKDF context is initialized with a hash function implementation
|
||||
* and a salt value. Contents are opaque (callers should not access them
|
||||
* directly). The caller is responsible for allocating the context where
|
||||
* appropriate. Context initialisation and usage incurs no dynamic
|
||||
* allocation, so there is no release function.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
union {
|
||||
br_hmac_context hmac_ctx;
|
||||
br_hmac_key_context prk_ctx;
|
||||
} u;
|
||||
unsigned char buf[64];
|
||||
size_t ptr;
|
||||
size_t dig_len;
|
||||
unsigned chunk_num;
|
||||
#endif
|
||||
} br_hkdf_context;
|
||||
|
||||
/**
|
||||
* \brief HKDF context initialization.
|
||||
*
|
||||
* The underlying hash function and salt value are provided. Arbitrary
|
||||
* salt lengths can be used.
|
||||
*
|
||||
* HKDF makes a difference between a salt of length zero, and an
|
||||
* absent salt (the latter being equivalent to a salt consisting of
|
||||
* bytes of value zero, of the same length as the hash function output).
|
||||
* If `salt_len` is zero, then this function assumes that the salt is
|
||||
* present but of length zero. To specify an _absent_ salt, use
|
||||
* `BR_HKDF_NO_SALT` as `salt` parameter (`salt_len` is then ignored).
|
||||
*
|
||||
* \param hc HKDF context to initialise.
|
||||
* \param digest_vtable pointer to the hash function implementation vtable.
|
||||
* \param salt HKDF-Extract salt.
|
||||
* \param salt_len HKDF-Extract salt length (in bytes).
|
||||
*/
|
||||
void br_hkdf_init(br_hkdf_context *hc, const br_hash_class *digest_vtable,
|
||||
const void *salt, size_t salt_len);
|
||||
|
||||
/**
|
||||
* \brief The special "absent salt" value for HKDF.
|
||||
*/
|
||||
#define BR_HKDF_NO_SALT (&br_hkdf_no_salt)
|
||||
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
extern const unsigned char br_hkdf_no_salt;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \brief HKDF input injection (HKDF-Extract).
|
||||
*
|
||||
* This function injects some more input bytes ("key material") into
|
||||
* HKDF. This function may be called several times, after `br_hkdf_init()`
|
||||
* but before `br_hkdf_flip()`.
|
||||
*
|
||||
* \param hc HKDF context.
|
||||
* \param ikm extra input bytes.
|
||||
* \param ikm_len number of extra input bytes.
|
||||
*/
|
||||
void br_hkdf_inject(br_hkdf_context *hc, const void *ikm, size_t ikm_len);
|
||||
|
||||
/**
|
||||
* \brief HKDF switch to the HKDF-Expand phase.
|
||||
*
|
||||
* This call terminates the HKDF-Extract process (input injection), and
|
||||
* starts the HKDF-Expand process (output production).
|
||||
*
|
||||
* \param hc HKDF context.
|
||||
*/
|
||||
void br_hkdf_flip(br_hkdf_context *hc);
|
||||
|
||||
/**
|
||||
* \brief HKDF output production (HKDF-Expand).
|
||||
*
|
||||
* Produce more output bytes from the current state. This function may be
|
||||
* called several times, but only after `br_hkdf_flip()`.
|
||||
*
|
||||
* Returned value is the number of actually produced bytes. The total
|
||||
* output length is limited to 255 times the output length of the
|
||||
* underlying hash function.
|
||||
*
|
||||
* \param hc HKDF context.
|
||||
* \param info application specific information string.
|
||||
* \param info_len application specific information string length (in bytes).
|
||||
* \param out destination buffer for the HKDF output.
|
||||
* \param out_len the length of the requested output (in bytes).
|
||||
* \return the produced output length (in bytes).
|
||||
*/
|
||||
size_t br_hkdf_produce(br_hkdf_context *hc,
|
||||
const void *info, size_t info_len, void *out, size_t out_len);
|
||||
|
||||
/**
|
||||
* \brief SHAKE context.
|
||||
*
|
||||
* The HKDF context is initialized with a "security level". The internal
|
||||
* notion is called "capacity"; the capacity is twice the security level
|
||||
* (for instance, SHAKE128 has capacity 256).
|
||||
*
|
||||
* The caller is responsible for allocating the context where
|
||||
* appropriate. Context initialisation and usage incurs no dynamic
|
||||
* allocation, so there is no release function.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
unsigned char dbuf[200];
|
||||
size_t dptr;
|
||||
size_t rate;
|
||||
uint64_t A[25];
|
||||
#endif
|
||||
} br_shake_context;
|
||||
|
||||
/**
|
||||
* \brief SHAKE context initialization.
|
||||
*
|
||||
* The context is initialized for the provided "security level".
|
||||
* Internally, this sets the "capacity" to twice the security level;
|
||||
* thus, for SHAKE128, the `security_level` parameter should be 128,
|
||||
* which corresponds to a 256-bit capacity.
|
||||
*
|
||||
* Allowed security levels are all multiples of 32, from 32 to 768,
|
||||
* inclusive. Larger security levels imply lower performance; levels
|
||||
* beyond 256 bits don't make much sense. Standard levels are 128
|
||||
* and 256 bits (for SHAKE128 and SHAKE256, respectively).
|
||||
*
|
||||
* \param sc SHAKE context to initialise.
|
||||
* \param security_level security level (in bits).
|
||||
*/
|
||||
void br_shake_init(br_shake_context *sc, int security_level);
|
||||
|
||||
/**
|
||||
* \brief SHAKE input injection.
|
||||
*
|
||||
* This function injects some more input bytes ("key material") into
|
||||
* SHAKE. This function may be called several times, after `br_shake_init()`
|
||||
* but before `br_shake_flip()`.
|
||||
*
|
||||
* \param sc SHAKE context.
|
||||
* \param data extra input bytes.
|
||||
* \param len number of extra input bytes.
|
||||
*/
|
||||
void br_shake_inject(br_shake_context *sc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief SHAKE switch to production phase.
|
||||
*
|
||||
* This call terminates the input injection process, and starts the
|
||||
* output production process.
|
||||
*
|
||||
* \param sc SHAKE context.
|
||||
*/
|
||||
void br_shake_flip(br_shake_context *hc);
|
||||
|
||||
/**
|
||||
* \brief SHAKE output production.
|
||||
*
|
||||
* Produce more output bytes from the current state. This function may be
|
||||
* called several times, but only after `br_shake_flip()`.
|
||||
*
|
||||
* There is no practical limit to the number of bytes that may be produced.
|
||||
*
|
||||
* \param sc SHAKE context.
|
||||
* \param out destination buffer for the SHAKE output.
|
||||
* \param len the length of the requested output (in bytes).
|
||||
*/
|
||||
void br_shake_produce(br_shake_context *sc, void *out, size_t len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,294 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BR_BEARSSL_PEM_H__
|
||||
#define BR_BEARSSL_PEM_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file bearssl_pem.h
|
||||
*
|
||||
* # PEM Support
|
||||
*
|
||||
* PEM is a traditional encoding layer use to store binary objects (in
|
||||
* particular X.509 certificates, and private keys) in text files. While
|
||||
* the acronym comes from an old, defunct standard ("Privacy Enhanced
|
||||
* Mail"), the format has been reused, with some variations, by many
|
||||
* systems, and is a _de facto_ standard, even though it is not, actually,
|
||||
* specified in all clarity anywhere.
|
||||
*
|
||||
* ## Format Details
|
||||
*
|
||||
* BearSSL contains a generic, streamed PEM decoder, which handles the
|
||||
* following format:
|
||||
*
|
||||
* - The input source (a sequence of bytes) is assumed to be the
|
||||
* encoding of a text file in an ASCII-compatible charset. This
|
||||
* includes ISO-8859-1, Windows-1252, and UTF-8 encodings. Each
|
||||
* line ends on a newline character (U+000A LINE FEED). The
|
||||
* U+000D CARRIAGE RETURN characters are ignored, so the code
|
||||
* accepts both Windows-style and Unix-style line endings.
|
||||
*
|
||||
* - Each object begins with a banner that occurs at the start of
|
||||
* a line; the first banner characters are "`-----BEGIN `" (five
|
||||
* dashes, the word "BEGIN", and a space). The banner matching is
|
||||
* not case-sensitive.
|
||||
*
|
||||
* - The _object name_ consists in the characters that follow the
|
||||
* banner start sequence, up to the end of the line, but without
|
||||
* trailing dashes (in "normal" PEM, there are five trailing
|
||||
* dashes, but this implementation is not picky about these dashes).
|
||||
* The BearSSL decoder normalises the name characters to uppercase
|
||||
* (for ASCII letters only) and accepts names up to 127 characters.
|
||||
*
|
||||
* - The object ends with a banner that again occurs at the start of
|
||||
* a line, and starts with "`-----END `" (again case-insensitive).
|
||||
*
|
||||
* - Between that start and end banner, only Base64 data shall occur.
|
||||
* Base64 converts each sequence of three bytes into four
|
||||
* characters; the four characters are ASCII letters, digits, "`+`"
|
||||
* or "`-`" signs, and one or two "`=`" signs may occur in the last
|
||||
* quartet. Whitespace is ignored (whitespace is any ASCII character
|
||||
* of code 32 or less, so control characters are whitespace) and
|
||||
* lines may have arbitrary length; the only restriction is that the
|
||||
* four characters of a quartet must appear on the same line (no
|
||||
* line break inside a quartet).
|
||||
*
|
||||
* - A single file may contain more than one PEM object. Bytes that
|
||||
* occur between objects are ignored.
|
||||
*
|
||||
*
|
||||
* ## PEM Decoder API
|
||||
*
|
||||
* The PEM decoder offers a state-machine API. The caller allocates a
|
||||
* decoder context, then injects source bytes. Source bytes are pushed
|
||||
* with `br_pem_decoder_push()`. The decoder stops accepting bytes when
|
||||
* it reaches an "event", which is either the start of an object, the
|
||||
* end of an object, or a decoding error within an object.
|
||||
*
|
||||
* The `br_pem_decoder_event()` function is used to obtain the current
|
||||
* event; it also clears it, thus allowing the decoder to accept more
|
||||
* bytes. When a object start event is raised, the decoder context
|
||||
* offers the found object name (normalised to ASCII uppercase).
|
||||
*
|
||||
* When an object is reached, the caller must set an appropriate callback
|
||||
* function, which will receive (by chunks) the decoded object data.
|
||||
*
|
||||
* Since the decoder context makes no dynamic allocation, it requires
|
||||
* no explicit deallocation.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief PEM decoder context.
|
||||
*
|
||||
* Contents are opaque (they should not be accessed directly).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
/* CPU for the T0 virtual machine. */
|
||||
struct {
|
||||
uint32_t *dp;
|
||||
uint32_t *rp;
|
||||
const unsigned char *ip;
|
||||
} cpu;
|
||||
uint32_t dp_stack[32];
|
||||
uint32_t rp_stack[32];
|
||||
int err;
|
||||
|
||||
const unsigned char *hbuf;
|
||||
size_t hlen;
|
||||
|
||||
void (*dest)(void *dest_ctx, const void *src, size_t len);
|
||||
void *dest_ctx;
|
||||
|
||||
unsigned char event;
|
||||
char name[128];
|
||||
unsigned char buf[255];
|
||||
size_t ptr;
|
||||
#endif
|
||||
} br_pem_decoder_context;
|
||||
|
||||
/**
|
||||
* \brief Initialise a PEM decoder structure.
|
||||
*
|
||||
* \param ctx decoder context to initialise.
|
||||
*/
|
||||
void br_pem_decoder_init(br_pem_decoder_context *ctx);
|
||||
|
||||
/**
|
||||
* \brief Push some bytes into the decoder.
|
||||
*
|
||||
* Returned value is the number of bytes actually consumed; this may be
|
||||
* less than the number of provided bytes if an event is raised. When an
|
||||
* event is raised, it must be read (with `br_pem_decoder_event()`);
|
||||
* until the event is read, this function will return 0.
|
||||
*
|
||||
* \param ctx decoder context.
|
||||
* \param data new data bytes.
|
||||
* \param len number of new data bytes.
|
||||
* \return the number of bytes actually received (may be less than `len`).
|
||||
*/
|
||||
size_t br_pem_decoder_push(br_pem_decoder_context *ctx,
|
||||
const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Set the receiver for decoded data.
|
||||
*
|
||||
* When an object is entered, the provided function (with opaque context
|
||||
* pointer) will be called repeatedly with successive chunks of decoded
|
||||
* data for that object. If `dest` is set to 0, then decoded data is
|
||||
* simply ignored. The receiver can be set at any time, but, in practice,
|
||||
* it should be called immediately after receiving a "start of object"
|
||||
* event.
|
||||
*
|
||||
* \param ctx decoder context.
|
||||
* \param dest callback for receiving decoded data.
|
||||
* \param dest_ctx opaque context pointer for the `dest` callback.
|
||||
*/
|
||||
static inline void
|
||||
br_pem_decoder_setdest(br_pem_decoder_context *ctx,
|
||||
void (*dest)(void *dest_ctx, const void *src, size_t len),
|
||||
void *dest_ctx)
|
||||
{
|
||||
ctx->dest = dest;
|
||||
ctx->dest_ctx = dest_ctx;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Get the last event.
|
||||
*
|
||||
* If an event was raised, then this function returns the event value, and
|
||||
* also clears it, thereby allowing the decoder to proceed. If no event
|
||||
* was raised since the last call to `br_pem_decoder_event()`, then this
|
||||
* function returns 0.
|
||||
*
|
||||
* \param ctx decoder context.
|
||||
* \return the raised event, or 0.
|
||||
*/
|
||||
int br_pem_decoder_event(br_pem_decoder_context *ctx);
|
||||
|
||||
/**
|
||||
* \brief Event: start of object.
|
||||
*
|
||||
* This event is raised when the start of a new object has been detected.
|
||||
* The object name (normalised to uppercase) can be accessed with
|
||||
* `br_pem_decoder_name()`.
|
||||
*/
|
||||
#define BR_PEM_BEGIN_OBJ 1
|
||||
|
||||
/**
|
||||
* \brief Event: end of object.
|
||||
*
|
||||
* This event is raised when the end of the current object is reached
|
||||
* (normally, i.e. with no decoding error).
|
||||
*/
|
||||
#define BR_PEM_END_OBJ 2
|
||||
|
||||
/**
|
||||
* \brief Event: decoding error.
|
||||
*
|
||||
* This event is raised when decoding fails within an object.
|
||||
* This formally closes the current object and brings the decoder back
|
||||
* to the "out of any object" state. The offending line in the source
|
||||
* is consumed.
|
||||
*/
|
||||
#define BR_PEM_ERROR 3
|
||||
|
||||
/**
|
||||
* \brief Get the name of the encountered object.
|
||||
*
|
||||
* The encountered object name is defined only when the "start of object"
|
||||
* event is raised. That name is normalised to uppercase (for ASCII letters
|
||||
* only) and does not include trailing dashes.
|
||||
*
|
||||
* \param ctx decoder context.
|
||||
* \return the current object name.
|
||||
*/
|
||||
static inline const char *
|
||||
br_pem_decoder_name(br_pem_decoder_context *ctx)
|
||||
{
|
||||
return ctx->name;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Encode an object in PEM.
|
||||
*
|
||||
* This function encodes the provided binary object (`data`, of length `len`
|
||||
* bytes) into PEM. The `banner` text will be included in the header and
|
||||
* footer (e.g. use `"CERTIFICATE"` to get a `"BEGIN CERTIFICATE"` header).
|
||||
*
|
||||
* The length (in characters) of the PEM output is returned; that length
|
||||
* does NOT include the terminating zero, that this function nevertheless
|
||||
* adds. If using the returned value for allocation purposes, the allocated
|
||||
* buffer size MUST be at least one byte larger than the returned size.
|
||||
*
|
||||
* If `dest` is `NULL`, then the encoding does not happen; however, the
|
||||
* length of the encoded object is still computed and returned.
|
||||
*
|
||||
* The `data` pointer may be `NULL` only if `len` is zero (when encoding
|
||||
* an object of length zero, which is not very useful), or when `dest`
|
||||
* is `NULL` (in that case, source data bytes are ignored).
|
||||
*
|
||||
* Some `flags` can be specified to alter the encoding behaviour:
|
||||
*
|
||||
* - If `BR_PEM_LINE64` is set, then line-breaking will occur after
|
||||
* every 64 characters of output, instead of the default of 76.
|
||||
*
|
||||
* - If `BR_PEM_CRLF` is set, then end-of-line sequence will use
|
||||
* CR+LF instead of a single LF.
|
||||
*
|
||||
* The `data` and `dest` buffers may overlap, in which case the source
|
||||
* binary data is destroyed in the process. Note that the PEM-encoded output
|
||||
* is always larger than the source binary.
|
||||
*
|
||||
* \param dest the destination buffer (or `NULL`).
|
||||
* \param data the source buffer (can be `NULL` in some cases).
|
||||
* \param len the source length (in bytes).
|
||||
* \param banner the PEM banner expression.
|
||||
* \param flags the behavioural flags.
|
||||
* \return the PEM object length (in characters), EXCLUDING the final zero.
|
||||
*/
|
||||
size_t br_pem_encode(void *dest, const void *data, size_t len,
|
||||
const char *banner, unsigned flags);
|
||||
|
||||
/**
|
||||
* \brief PEM encoding flag: split lines at 64 characters.
|
||||
*/
|
||||
#define BR_PEM_LINE64 0x0001
|
||||
|
||||
/**
|
||||
* \brief PEM encoding flag: use CR+LF line endings.
|
||||
*/
|
||||
#define BR_PEM_CRLF 0x0002
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BR_BEARSSL_PRF_H__
|
||||
#define BR_BEARSSL_PRF_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file bearssl_prf.h
|
||||
*
|
||||
* # The TLS PRF
|
||||
*
|
||||
* The "PRF" is the pseudorandom function used internally during the
|
||||
* SSL/TLS handshake, notably to expand negotiated shared secrets into
|
||||
* the symmetric encryption keys that will be used to process the
|
||||
* application data.
|
||||
*
|
||||
* TLS 1.0 and 1.1 define a PRF that is based on both MD5 and SHA-1. This
|
||||
* is implemented by the `br_tls10_prf()` function.
|
||||
*
|
||||
* TLS 1.2 redefines the PRF, using an explicit hash function. The
|
||||
* `br_tls12_sha256_prf()` and `br_tls12_sha384_prf()` functions apply that
|
||||
* PRF with, respectively, SHA-256 and SHA-384. Most standard cipher suites
|
||||
* rely on the SHA-256 based PRF, but some use SHA-384.
|
||||
*
|
||||
* The PRF always uses as input three parameters: a "secret" (some
|
||||
* bytes), a "label" (ASCII string), and a "seed" (again some bytes). An
|
||||
* arbitrary output length can be produced. The "seed" is provided as an
|
||||
* arbitrary number of binary chunks, that gets internally concatenated.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief Type for a seed chunk.
|
||||
*
|
||||
* Each chunk may have an arbitrary length, and may be empty (no byte at
|
||||
* all). If the chunk length is zero, then the pointer to the chunk data
|
||||
* may be `NULL`.
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* \brief Pointer to the chunk data.
|
||||
*/
|
||||
const void *data;
|
||||
|
||||
/**
|
||||
* \brief Chunk length (in bytes).
|
||||
*/
|
||||
size_t len;
|
||||
} br_tls_prf_seed_chunk;
|
||||
|
||||
/**
|
||||
* \brief PRF implementation for TLS 1.0 and 1.1.
|
||||
*
|
||||
* This PRF is the one specified by TLS 1.0 and 1.1. It internally uses
|
||||
* MD5 and SHA-1.
|
||||
*
|
||||
* \param dst destination buffer.
|
||||
* \param len output length (in bytes).
|
||||
* \param secret secret value (key) for this computation.
|
||||
* \param secret_len length of "secret" (in bytes).
|
||||
* \param label PRF label (zero-terminated ASCII string).
|
||||
* \param seed_num number of seed chunks.
|
||||
* \param seed seed chnks for this computation (usually non-secret).
|
||||
*/
|
||||
void br_tls10_prf(void *dst, size_t len,
|
||||
const void *secret, size_t secret_len, const char *label,
|
||||
size_t seed_num, const br_tls_prf_seed_chunk *seed);
|
||||
|
||||
/**
|
||||
* \brief PRF implementation for TLS 1.2, with SHA-256.
|
||||
*
|
||||
* This PRF is the one specified by TLS 1.2, when the underlying hash
|
||||
* function is SHA-256.
|
||||
*
|
||||
* \param dst destination buffer.
|
||||
* \param len output length (in bytes).
|
||||
* \param secret secret value (key) for this computation.
|
||||
* \param secret_len length of "secret" (in bytes).
|
||||
* \param label PRF label (zero-terminated ASCII string).
|
||||
* \param seed_num number of seed chunks.
|
||||
* \param seed seed chnks for this computation (usually non-secret).
|
||||
*/
|
||||
void br_tls12_sha256_prf(void *dst, size_t len,
|
||||
const void *secret, size_t secret_len, const char *label,
|
||||
size_t seed_num, const br_tls_prf_seed_chunk *seed);
|
||||
|
||||
/**
|
||||
* \brief PRF implementation for TLS 1.2, with SHA-384.
|
||||
*
|
||||
* This PRF is the one specified by TLS 1.2, when the underlying hash
|
||||
* function is SHA-384.
|
||||
*
|
||||
* \param dst destination buffer.
|
||||
* \param len output length (in bytes).
|
||||
* \param secret secret value (key) for this computation.
|
||||
* \param secret_len length of "secret" (in bytes).
|
||||
* \param label PRF label (zero-terminated ASCII string).
|
||||
* \param seed_num number of seed chunks.
|
||||
* \param seed seed chnks for this computation (usually non-secret).
|
||||
*/
|
||||
void br_tls12_sha384_prf(void *dst, size_t len,
|
||||
const void *secret, size_t secret_len, const char *label,
|
||||
size_t seed_num, const br_tls_prf_seed_chunk *seed);
|
||||
|
||||
/**
|
||||
* brief A convenient type name for a PRF implementation.
|
||||
*
|
||||
* \param dst destination buffer.
|
||||
* \param len output length (in bytes).
|
||||
* \param secret secret value (key) for this computation.
|
||||
* \param secret_len length of "secret" (in bytes).
|
||||
* \param label PRF label (zero-terminated ASCII string).
|
||||
* \param seed_num number of seed chunks.
|
||||
* \param seed seed chnks for this computation (usually non-secret).
|
||||
*/
|
||||
typedef void (*br_tls_prf_impl)(void *dst, size_t len,
|
||||
const void *secret, size_t secret_len, const char *label,
|
||||
size_t seed_num, const br_tls_prf_seed_chunk *seed);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,397 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BR_BEARSSL_RAND_H__
|
||||
#define BR_BEARSSL_RAND_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "bearssl_block.h"
|
||||
#include "bearssl_hash.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/** \file bearssl_rand.h
|
||||
*
|
||||
* # Pseudo-Random Generators
|
||||
*
|
||||
* A PRNG is a state-based engine that outputs pseudo-random bytes on
|
||||
* demand. It is initialized with an initial seed, and additional seed
|
||||
* bytes can be added afterwards. Bytes produced depend on the seeds and
|
||||
* also on the exact sequence of calls (including sizes requested for
|
||||
* each call).
|
||||
*
|
||||
*
|
||||
* ## Procedural and OOP API
|
||||
*
|
||||
* For the PRNG of name "`xxx`", two API are provided. The _procedural_
|
||||
* API defined a context structure `br_xxx_context` and three functions:
|
||||
*
|
||||
* - `br_xxx_init()`
|
||||
*
|
||||
* Initialise the context with an initial seed.
|
||||
*
|
||||
* - `br_xxx_generate()`
|
||||
*
|
||||
* Produce some pseudo-random bytes.
|
||||
*
|
||||
* - `br_xxx_update()`
|
||||
*
|
||||
* Inject some additional seed.
|
||||
*
|
||||
* The initialisation function sets the first context field (`vtable`)
|
||||
* to a pointer to the vtable that supports the OOP API. The OOP API
|
||||
* provides access to the same functions through function pointers,
|
||||
* named `init()`, `generate()` and `update()`.
|
||||
*
|
||||
* Note that the context initialisation method may accept additional
|
||||
* parameters, provided as a 'const void *' pointer at API level. These
|
||||
* additional parameters depend on the implemented PRNG.
|
||||
*
|
||||
*
|
||||
* ## HMAC_DRBG
|
||||
*
|
||||
* HMAC_DRBG is defined in [NIST SP 800-90A Revision
|
||||
* 1](http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf).
|
||||
* It uses HMAC repeatedly, over some configurable underlying hash
|
||||
* function. In BearSSL, it is implemented under the "`hmac_drbg`" name.
|
||||
* The "extra parameters" pointer for context initialisation should be
|
||||
* set to a pointer to the vtable for the underlying hash function (e.g.
|
||||
* pointer to `br_sha256_vtable` to use HMAC_DRBG with SHA-256).
|
||||
*
|
||||
* According to the NIST standard, each request shall produce up to
|
||||
* 2<sup>19</sup> bits (i.e. 64 kB of data); moreover, the context shall
|
||||
* be reseeded at least once every 2<sup>48</sup> requests. This
|
||||
* implementation does not maintain the reseed counter (the threshold is
|
||||
* too high to be reached in practice) and does not object to producing
|
||||
* more than 64 kB in a single request; thus, the code cannot fail,
|
||||
* which corresponds to the fact that the API has no room for error
|
||||
* codes. However, this implies that requesting more than 64 kB in one
|
||||
* `generate()` request, or making more than 2<sup>48</sup> requests
|
||||
* without reseeding, is formally out of NIST specification. There is
|
||||
* no currently known security penalty for exceeding the NIST limits,
|
||||
* and, in any case, HMAC_DRBG usage in implementing SSL/TLS always
|
||||
* stays much below these thresholds.
|
||||
*
|
||||
*
|
||||
* ## AESCTR_DRBG
|
||||
*
|
||||
* AESCTR_DRBG is a custom PRNG based on AES-128 in CTR mode. This is
|
||||
* meant to be used only in situations where you are desperate for
|
||||
* speed, and have an hardware-optimized AES/CTR implementation. Whether
|
||||
* this will yield perceptible improvements depends on what you use the
|
||||
* pseudorandom bytes for, and how many you want; for instance, RSA key
|
||||
* pair generation uses a substantial amount of randomness, and using
|
||||
* AESCTR_DRBG instead of HMAC_DRBG yields a 15 to 20% increase in key
|
||||
* generation speed on a recent x86 CPU (Intel Core i7-6567U at 3.30 GHz).
|
||||
*
|
||||
* Internally, it uses CTR mode with successive counter values, starting
|
||||
* at zero (counter value expressed over 128 bits, big-endian convention).
|
||||
* The counter is not allowed to reach 32768; thus, every 32768*16 bytes
|
||||
* at most, the `update()` function is run (on an empty seed, if none is
|
||||
* provided). The `update()` function computes the new AES-128 key by
|
||||
* applying a custom hash function to the concatenation of a state-dependent
|
||||
* word (encryption of an all-one block with the current key) and the new
|
||||
* seed. The custom hash function uses Hirose's construction over AES-256;
|
||||
* see the comments in `aesctr_drbg.c` for details.
|
||||
*
|
||||
* This DRBG does not follow an existing standard, and thus should be
|
||||
* considered as inadequate for production use until it has been properly
|
||||
* analysed.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \brief Class type for PRNG implementations.
|
||||
*
|
||||
* A `br_prng_class` instance references the methods implementing a PRNG.
|
||||
* Constant instances of this structure are defined for each implemented
|
||||
* PRNG. Such instances are also called "vtables".
|
||||
*/
|
||||
typedef struct br_prng_class_ br_prng_class;
|
||||
struct br_prng_class_ {
|
||||
/**
|
||||
* \brief Size (in bytes) of the context structure appropriate for
|
||||
* running this PRNG.
|
||||
*/
|
||||
size_t context_size;
|
||||
|
||||
/**
|
||||
* \brief Initialisation method.
|
||||
*
|
||||
* The context to initialise is provided as a pointer to its
|
||||
* first field (the vtable pointer); this function sets that
|
||||
* first field to a pointer to the vtable.
|
||||
*
|
||||
* The extra parameters depend on the implementation; each
|
||||
* implementation defines what kind of extra parameters it
|
||||
* expects (if any).
|
||||
*
|
||||
* Requirements on the initial seed depend on the implemented
|
||||
* PRNG.
|
||||
*
|
||||
* \param ctx PRNG context to initialise.
|
||||
* \param params extra parameters for the PRNG.
|
||||
* \param seed initial seed.
|
||||
* \param seed_len initial seed length (in bytes).
|
||||
*/
|
||||
void (*init)(const br_prng_class **ctx, const void *params,
|
||||
const void *seed, size_t seed_len);
|
||||
|
||||
/**
|
||||
* \brief Random bytes generation.
|
||||
*
|
||||
* This method produces `len` pseudorandom bytes, in the `out`
|
||||
* buffer. The context is updated accordingly.
|
||||
*
|
||||
* \param ctx PRNG context.
|
||||
* \param out output buffer.
|
||||
* \param len number of pseudorandom bytes to produce.
|
||||
*/
|
||||
void (*generate)(const br_prng_class **ctx, void *out, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Inject additional seed bytes.
|
||||
*
|
||||
* The provided seed bytes are added into the PRNG internal
|
||||
* entropy pool.
|
||||
*
|
||||
* \param ctx PRNG context.
|
||||
* \param seed additional seed.
|
||||
* \param seed_len additional seed length (in bytes).
|
||||
*/
|
||||
void (*update)(const br_prng_class **ctx,
|
||||
const void *seed, size_t seed_len);
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Context for HMAC_DRBG.
|
||||
*
|
||||
* The context contents are opaque, except the first field, which
|
||||
* supports OOP.
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* \brief Pointer to the vtable.
|
||||
*
|
||||
* This field is set with the initialisation method/function.
|
||||
*/
|
||||
const br_prng_class *vtable;
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
unsigned char K[64];
|
||||
unsigned char V[64];
|
||||
const br_hash_class *digest_class;
|
||||
#endif
|
||||
} br_hmac_drbg_context;
|
||||
|
||||
/**
|
||||
* \brief Statically allocated, constant vtable for HMAC_DRBG.
|
||||
*/
|
||||
extern const br_prng_class br_hmac_drbg_vtable;
|
||||
|
||||
/**
|
||||
* \brief HMAC_DRBG initialisation.
|
||||
*
|
||||
* The context to initialise is provided as a pointer to its first field
|
||||
* (the vtable pointer); this function sets that first field to a
|
||||
* pointer to the vtable.
|
||||
*
|
||||
* The `seed` value is what is called, in NIST terminology, the
|
||||
* concatenation of the "seed", "nonce" and "personalization string", in
|
||||
* that order.
|
||||
*
|
||||
* The `digest_class` parameter defines the underlying hash function.
|
||||
* Formally, the NIST standard specifies that the hash function shall
|
||||
* be only SHA-1 or one of the SHA-2 functions. This implementation also
|
||||
* works with any other implemented hash function (such as MD5), but
|
||||
* this is non-standard and therefore not recommended.
|
||||
*
|
||||
* \param ctx HMAC_DRBG context to initialise.
|
||||
* \param digest_class vtable for the underlying hash function.
|
||||
* \param seed initial seed.
|
||||
* \param seed_len initial seed length (in bytes).
|
||||
*/
|
||||
void br_hmac_drbg_init(br_hmac_drbg_context *ctx,
|
||||
const br_hash_class *digest_class, const void *seed, size_t seed_len);
|
||||
|
||||
/**
|
||||
* \brief Random bytes generation with HMAC_DRBG.
|
||||
*
|
||||
* This method produces `len` pseudorandom bytes, in the `out`
|
||||
* buffer. The context is updated accordingly. Formally, requesting
|
||||
* more than 65536 bytes in one request falls out of specification
|
||||
* limits (but it won't fail).
|
||||
*
|
||||
* \param ctx HMAC_DRBG context.
|
||||
* \param out output buffer.
|
||||
* \param len number of pseudorandom bytes to produce.
|
||||
*/
|
||||
void br_hmac_drbg_generate(br_hmac_drbg_context *ctx, void *out, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Inject additional seed bytes in HMAC_DRBG.
|
||||
*
|
||||
* The provided seed bytes are added into the HMAC_DRBG internal
|
||||
* entropy pool. The process does not _replace_ existing entropy,
|
||||
* thus pushing non-random bytes (i.e. bytes which are known to the
|
||||
* attackers) does not degrade the overall quality of generated bytes.
|
||||
*
|
||||
* \param ctx HMAC_DRBG context.
|
||||
* \param seed additional seed.
|
||||
* \param seed_len additional seed length (in bytes).
|
||||
*/
|
||||
void br_hmac_drbg_update(br_hmac_drbg_context *ctx,
|
||||
const void *seed, size_t seed_len);
|
||||
|
||||
/**
|
||||
* \brief Get the hash function implementation used by a given instance of
|
||||
* HMAC_DRBG.
|
||||
*
|
||||
* This calls MUST NOT be performed on a context which was not
|
||||
* previously initialised.
|
||||
*
|
||||
* \param ctx HMAC_DRBG context.
|
||||
* \return the hash function vtable.
|
||||
*/
|
||||
static inline const br_hash_class *
|
||||
br_hmac_drbg_get_hash(const br_hmac_drbg_context *ctx)
|
||||
{
|
||||
return ctx->digest_class;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Type for a provider of entropy seeds.
|
||||
*
|
||||
* A "seeder" is a function that is able to obtain random values from
|
||||
* some source and inject them as entropy seed in a PRNG. A seeder
|
||||
* shall guarantee that the total entropy of the injected seed is large
|
||||
* enough to seed a PRNG for purposes of cryptographic key generation
|
||||
* (i.e. at least 128 bits).
|
||||
*
|
||||
* A seeder may report a failure to obtain adequate entropy. Seeders
|
||||
* shall endeavour to fix themselves transient errors by trying again;
|
||||
* thus, callers may consider reported errors as permanent.
|
||||
*
|
||||
* \param ctx PRNG context to seed.
|
||||
* \return 1 on success, 0 on error.
|
||||
*/
|
||||
typedef int (*br_prng_seeder)(const br_prng_class **ctx);
|
||||
|
||||
/**
|
||||
* \brief Get a seeder backed by the operating system or hardware.
|
||||
*
|
||||
* Get a seeder that feeds on RNG facilities provided by the current
|
||||
* operating system or hardware. If no such facility is known, then 0
|
||||
* is returned.
|
||||
*
|
||||
* If `name` is not `NULL`, then `*name` is set to a symbolic string
|
||||
* that identifies the seeder implementation. If no seeder is returned
|
||||
* and `name` is not `NULL`, then `*name` is set to a pointer to the
|
||||
* constant string `"none"`.
|
||||
*
|
||||
* \param name receiver for seeder name, or `NULL`.
|
||||
* \return the system seeder, if available, or 0.
|
||||
*/
|
||||
br_prng_seeder br_prng_seeder_system(const char **name);
|
||||
|
||||
/**
|
||||
* \brief Context for AESCTR_DRBG.
|
||||
*
|
||||
* The context contents are opaque, except the first field, which
|
||||
* supports OOP.
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* \brief Pointer to the vtable.
|
||||
*
|
||||
* This field is set with the initialisation method/function.
|
||||
*/
|
||||
const br_prng_class *vtable;
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
br_aes_gen_ctr_keys sk;
|
||||
uint32_t cc;
|
||||
#endif
|
||||
} br_aesctr_drbg_context;
|
||||
|
||||
/**
|
||||
* \brief Statically allocated, constant vtable for AESCTR_DRBG.
|
||||
*/
|
||||
extern const br_prng_class br_aesctr_drbg_vtable;
|
||||
|
||||
/**
|
||||
* \brief AESCTR_DRBG initialisation.
|
||||
*
|
||||
* The context to initialise is provided as a pointer to its first field
|
||||
* (the vtable pointer); this function sets that first field to a
|
||||
* pointer to the vtable.
|
||||
*
|
||||
* The internal AES key is first set to the all-zero key; then, the
|
||||
* `br_aesctr_drbg_update()` function is called with the provided `seed`.
|
||||
* The call is performed even if the seed length (`seed_len`) is zero.
|
||||
*
|
||||
* The `aesctr` parameter defines the underlying AES/CTR implementation.
|
||||
*
|
||||
* \param ctx AESCTR_DRBG context to initialise.
|
||||
* \param aesctr vtable for the AES/CTR implementation.
|
||||
* \param seed initial seed (can be `NULL` if `seed_len` is zero).
|
||||
* \param seed_len initial seed length (in bytes).
|
||||
*/
|
||||
void br_aesctr_drbg_init(br_aesctr_drbg_context *ctx,
|
||||
const br_block_ctr_class *aesctr, const void *seed, size_t seed_len);
|
||||
|
||||
/**
|
||||
* \brief Random bytes generation with AESCTR_DRBG.
|
||||
*
|
||||
* This method produces `len` pseudorandom bytes, in the `out`
|
||||
* buffer. The context is updated accordingly.
|
||||
*
|
||||
* \param ctx AESCTR_DRBG context.
|
||||
* \param out output buffer.
|
||||
* \param len number of pseudorandom bytes to produce.
|
||||
*/
|
||||
void br_aesctr_drbg_generate(br_aesctr_drbg_context *ctx,
|
||||
void *out, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Inject additional seed bytes in AESCTR_DRBG.
|
||||
*
|
||||
* The provided seed bytes are added into the AESCTR_DRBG internal
|
||||
* entropy pool. The process does not _replace_ existing entropy,
|
||||
* thus pushing non-random bytes (i.e. bytes which are known to the
|
||||
* attackers) does not degrade the overall quality of generated bytes.
|
||||
*
|
||||
* \param ctx AESCTR_DRBG context.
|
||||
* \param seed additional seed.
|
||||
* \param seed_len additional seed length (in bytes).
|
||||
*/
|
||||
void br_aesctr_drbg_update(br_aesctr_drbg_context *ctx,
|
||||
const void *seed, size_t seed_len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_big_cbcdec_vtable;
|
||||
ctx->num_rounds = br_aes_big_keysched_inv(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
|
||||
ivbuf = iv;
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
unsigned char tmp[16];
|
||||
int i;
|
||||
|
||||
memcpy(tmp, buf, 16);
|
||||
br_aes_big_decrypt(ctx->num_rounds, ctx->skey, buf);
|
||||
for (i = 0; i < 16; i ++) {
|
||||
buf[i] ^= ivbuf[i];
|
||||
}
|
||||
memcpy(ivbuf, tmp, 16);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_aes_big_cbcdec_vtable = {
|
||||
sizeof(br_aes_big_cbcdec_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_aes_big_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_aes_big_cbcdec_run
|
||||
};
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_cbcenc_init(br_aes_big_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_big_cbcenc_vtable;
|
||||
ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_cbcenc_run(const br_aes_big_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
|
||||
ivbuf = iv;
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++) {
|
||||
buf[i] ^= ivbuf[i];
|
||||
}
|
||||
br_aes_big_encrypt(ctx->num_rounds, ctx->skey, buf);
|
||||
memcpy(ivbuf, buf, 16);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_aes_big_cbcenc_vtable = {
|
||||
sizeof(br_aes_big_cbcenc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_aes_big_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_aes_big_cbcenc_run
|
||||
};
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_big_ctr_vtable;
|
||||
ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
xorbuf(void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
*d ++ ^= *s ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
uint32_t
|
||||
br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
unsigned char tmp[16];
|
||||
|
||||
memcpy(tmp, iv, 12);
|
||||
br_enc32be(tmp + 12, cc ++);
|
||||
br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
|
||||
if (len <= 16) {
|
||||
xorbuf(buf, tmp, len);
|
||||
break;
|
||||
}
|
||||
xorbuf(buf, tmp, 16);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class br_aes_big_ctr_vtable = {
|
||||
sizeof(br_aes_big_ctr_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctr_class **, const void *, size_t))
|
||||
&br_aes_big_ctr_init,
|
||||
(uint32_t (*)(const br_block_ctr_class *const *,
|
||||
const void *, uint32_t, void *, size_t))
|
||||
&br_aes_big_ctr_run
|
||||
};
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_big_ctrcbc_vtable;
|
||||
ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
xorbuf(void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
*d ++ ^= *s ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx,
|
||||
void *ctr, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *bctr;
|
||||
uint32_t cc0, cc1, cc2, cc3;
|
||||
|
||||
buf = data;
|
||||
bctr = ctr;
|
||||
cc3 = br_dec32be(bctr + 0);
|
||||
cc2 = br_dec32be(bctr + 4);
|
||||
cc1 = br_dec32be(bctr + 8);
|
||||
cc0 = br_dec32be(bctr + 12);
|
||||
while (len > 0) {
|
||||
unsigned char tmp[16];
|
||||
uint32_t carry;
|
||||
|
||||
br_enc32be(tmp + 0, cc3);
|
||||
br_enc32be(tmp + 4, cc2);
|
||||
br_enc32be(tmp + 8, cc1);
|
||||
br_enc32be(tmp + 12, cc0);
|
||||
br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
|
||||
xorbuf(buf, tmp, 16);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
cc0 ++;
|
||||
carry = (~(cc0 | -cc0)) >> 31;
|
||||
cc1 += carry;
|
||||
carry &= (~(cc1 | -cc1)) >> 31;
|
||||
cc2 += carry;
|
||||
carry &= (~(cc2 | -cc2)) >> 31;
|
||||
cc3 += carry;
|
||||
}
|
||||
br_enc32be(bctr + 0, cc3);
|
||||
br_enc32be(bctr + 4, cc2);
|
||||
br_enc32be(bctr + 8, cc1);
|
||||
br_enc32be(bctr + 12, cc0);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx,
|
||||
void *cbcmac, const void *data, size_t len)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
xorbuf(cbcmac, buf, 16);
|
||||
br_aes_big_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
|
||||
br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
|
||||
br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable = {
|
||||
sizeof(br_aes_big_ctrcbc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
|
||||
&br_aes_big_ctrcbc_init,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_big_ctrcbc_encrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_big_ctrcbc_decrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, size_t))
|
||||
&br_aes_big_ctrcbc_ctr,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, const void *, size_t))
|
||||
&br_aes_big_ctrcbc_mac
|
||||
};
|
||||
|
|
@ -0,0 +1,254 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* Inverse S-box (used in key schedule for decryption).
|
||||
*/
|
||||
static const unsigned char iS[] = {
|
||||
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
|
||||
0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
|
||||
0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
|
||||
0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
|
||||
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
|
||||
0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
|
||||
0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
|
||||
0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
|
||||
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
|
||||
0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
|
||||
0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
|
||||
0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
|
||||
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
|
||||
0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
|
||||
0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
|
||||
0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
|
||||
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
|
||||
0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
|
||||
0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
|
||||
0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
|
||||
0x55, 0x21, 0x0C, 0x7D
|
||||
};
|
||||
|
||||
static const uint32_t iSsm0[] = {
|
||||
0x51F4A750, 0x7E416553, 0x1A17A4C3, 0x3A275E96, 0x3BAB6BCB, 0x1F9D45F1,
|
||||
0xACFA58AB, 0x4BE30393, 0x2030FA55, 0xAD766DF6, 0x88CC7691, 0xF5024C25,
|
||||
0x4FE5D7FC, 0xC52ACBD7, 0x26354480, 0xB562A38F, 0xDEB15A49, 0x25BA1B67,
|
||||
0x45EA0E98, 0x5DFEC0E1, 0xC32F7502, 0x814CF012, 0x8D4697A3, 0x6BD3F9C6,
|
||||
0x038F5FE7, 0x15929C95, 0xBF6D7AEB, 0x955259DA, 0xD4BE832D, 0x587421D3,
|
||||
0x49E06929, 0x8EC9C844, 0x75C2896A, 0xF48E7978, 0x99583E6B, 0x27B971DD,
|
||||
0xBEE14FB6, 0xF088AD17, 0xC920AC66, 0x7DCE3AB4, 0x63DF4A18, 0xE51A3182,
|
||||
0x97513360, 0x62537F45, 0xB16477E0, 0xBB6BAE84, 0xFE81A01C, 0xF9082B94,
|
||||
0x70486858, 0x8F45FD19, 0x94DE6C87, 0x527BF8B7, 0xAB73D323, 0x724B02E2,
|
||||
0xE31F8F57, 0x6655AB2A, 0xB2EB2807, 0x2FB5C203, 0x86C57B9A, 0xD33708A5,
|
||||
0x302887F2, 0x23BFA5B2, 0x02036ABA, 0xED16825C, 0x8ACF1C2B, 0xA779B492,
|
||||
0xF307F2F0, 0x4E69E2A1, 0x65DAF4CD, 0x0605BED5, 0xD134621F, 0xC4A6FE8A,
|
||||
0x342E539D, 0xA2F355A0, 0x058AE132, 0xA4F6EB75, 0x0B83EC39, 0x4060EFAA,
|
||||
0x5E719F06, 0xBD6E1051, 0x3E218AF9, 0x96DD063D, 0xDD3E05AE, 0x4DE6BD46,
|
||||
0x91548DB5, 0x71C45D05, 0x0406D46F, 0x605015FF, 0x1998FB24, 0xD6BDE997,
|
||||
0x894043CC, 0x67D99E77, 0xB0E842BD, 0x07898B88, 0xE7195B38, 0x79C8EEDB,
|
||||
0xA17C0A47, 0x7C420FE9, 0xF8841EC9, 0x00000000, 0x09808683, 0x322BED48,
|
||||
0x1E1170AC, 0x6C5A724E, 0xFD0EFFFB, 0x0F853856, 0x3DAED51E, 0x362D3927,
|
||||
0x0A0FD964, 0x685CA621, 0x9B5B54D1, 0x24362E3A, 0x0C0A67B1, 0x9357E70F,
|
||||
0xB4EE96D2, 0x1B9B919E, 0x80C0C54F, 0x61DC20A2, 0x5A774B69, 0x1C121A16,
|
||||
0xE293BA0A, 0xC0A02AE5, 0x3C22E043, 0x121B171D, 0x0E090D0B, 0xF28BC7AD,
|
||||
0x2DB6A8B9, 0x141EA9C8, 0x57F11985, 0xAF75074C, 0xEE99DDBB, 0xA37F60FD,
|
||||
0xF701269F, 0x5C72F5BC, 0x44663BC5, 0x5BFB7E34, 0x8B432976, 0xCB23C6DC,
|
||||
0xB6EDFC68, 0xB8E4F163, 0xD731DCCA, 0x42638510, 0x13972240, 0x84C61120,
|
||||
0x854A247D, 0xD2BB3DF8, 0xAEF93211, 0xC729A16D, 0x1D9E2F4B, 0xDCB230F3,
|
||||
0x0D8652EC, 0x77C1E3D0, 0x2BB3166C, 0xA970B999, 0x119448FA, 0x47E96422,
|
||||
0xA8FC8CC4, 0xA0F03F1A, 0x567D2CD8, 0x223390EF, 0x87494EC7, 0xD938D1C1,
|
||||
0x8CCAA2FE, 0x98D40B36, 0xA6F581CF, 0xA57ADE28, 0xDAB78E26, 0x3FADBFA4,
|
||||
0x2C3A9DE4, 0x5078920D, 0x6A5FCC9B, 0x547E4662, 0xF68D13C2, 0x90D8B8E8,
|
||||
0x2E39F75E, 0x82C3AFF5, 0x9F5D80BE, 0x69D0937C, 0x6FD52DA9, 0xCF2512B3,
|
||||
0xC8AC993B, 0x10187DA7, 0xE89C636E, 0xDB3BBB7B, 0xCD267809, 0x6E5918F4,
|
||||
0xEC9AB701, 0x834F9AA8, 0xE6956E65, 0xAAFFE67E, 0x21BCCF08, 0xEF15E8E6,
|
||||
0xBAE79BD9, 0x4A6F36CE, 0xEA9F09D4, 0x29B07CD6, 0x31A4B2AF, 0x2A3F2331,
|
||||
0xC6A59430, 0x35A266C0, 0x744EBC37, 0xFC82CAA6, 0xE090D0B0, 0x33A7D815,
|
||||
0xF104984A, 0x41ECDAF7, 0x7FCD500E, 0x1791F62F, 0x764DD68D, 0x43EFB04D,
|
||||
0xCCAA4D54, 0xE49604DF, 0x9ED1B5E3, 0x4C6A881B, 0xC12C1FB8, 0x4665517F,
|
||||
0x9D5EEA04, 0x018C355D, 0xFA877473, 0xFB0B412E, 0xB3671D5A, 0x92DBD252,
|
||||
0xE9105633, 0x6DD64713, 0x9AD7618C, 0x37A10C7A, 0x59F8148E, 0xEB133C89,
|
||||
0xCEA927EE, 0xB761C935, 0xE11CE5ED, 0x7A47B13C, 0x9CD2DF59, 0x55F2733F,
|
||||
0x1814CE79, 0x73C737BF, 0x53F7CDEA, 0x5FFDAA5B, 0xDF3D6F14, 0x7844DB86,
|
||||
0xCAAFF381, 0xB968C43E, 0x3824342C, 0xC2A3405F, 0x161DC372, 0xBCE2250C,
|
||||
0x283C498B, 0xFF0D9541, 0x39A80171, 0x080CB3DE, 0xD8B4E49C, 0x6456C190,
|
||||
0x7BCB8461, 0xD532B670, 0x486C5C74, 0xD0B85742
|
||||
};
|
||||
|
||||
static unsigned
|
||||
mul2(unsigned x)
|
||||
{
|
||||
x <<= 1;
|
||||
return x ^ ((unsigned)(-(int)(x >> 8)) & 0x11B);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
mul9(unsigned x)
|
||||
{
|
||||
return x ^ mul2(mul2(mul2(x)));
|
||||
}
|
||||
|
||||
static unsigned
|
||||
mulb(unsigned x)
|
||||
{
|
||||
unsigned x2;
|
||||
|
||||
x2 = mul2(x);
|
||||
return x ^ x2 ^ mul2(mul2(x2));
|
||||
}
|
||||
|
||||
static unsigned
|
||||
muld(unsigned x)
|
||||
{
|
||||
unsigned x4;
|
||||
|
||||
x4 = mul2(mul2(x));
|
||||
return x ^ x4 ^ mul2(x4);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
mule(unsigned x)
|
||||
{
|
||||
unsigned x2, x4;
|
||||
|
||||
x2 = mul2(x);
|
||||
x4 = mul2(x2);
|
||||
return x2 ^ x4 ^ mul2(x4);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
unsigned
|
||||
br_aes_big_keysched_inv(uint32_t *skey, const void *key, size_t key_len)
|
||||
{
|
||||
unsigned num_rounds;
|
||||
int i, m;
|
||||
|
||||
/*
|
||||
* Sub-keys for decryption are distinct from encryption sub-keys
|
||||
* in that InvMixColumns() is already applied for the inner
|
||||
* rounds.
|
||||
*/
|
||||
num_rounds = br_aes_keysched(skey, key, key_len);
|
||||
m = (int)(num_rounds << 2);
|
||||
for (i = 4; i < m; i ++) {
|
||||
uint32_t p;
|
||||
unsigned p0, p1, p2, p3;
|
||||
uint32_t q0, q1, q2, q3;
|
||||
|
||||
p = skey[i];
|
||||
p0 = p >> 24;
|
||||
p1 = (p >> 16) & 0xFF;
|
||||
p2 = (p >> 8) & 0xFF;
|
||||
p3 = p & 0xFF;
|
||||
q0 = mule(p0) ^ mulb(p1) ^ muld(p2) ^ mul9(p3);
|
||||
q1 = mul9(p0) ^ mule(p1) ^ mulb(p2) ^ muld(p3);
|
||||
q2 = muld(p0) ^ mul9(p1) ^ mule(p2) ^ mulb(p3);
|
||||
q3 = mulb(p0) ^ muld(p1) ^ mul9(p2) ^ mule(p3);
|
||||
skey[i] = (q0 << 24) | (q1 << 16) | (q2 << 8) | q3;
|
||||
}
|
||||
return num_rounds;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rotr(uint32_t x, int n)
|
||||
{
|
||||
return (x << (32 - n)) | (x >> n);
|
||||
}
|
||||
|
||||
#define iSboxExt0(x) (iSsm0[x])
|
||||
#define iSboxExt1(x) (rotr(iSsm0[x], 8))
|
||||
#define iSboxExt2(x) (rotr(iSsm0[x], 16))
|
||||
#define iSboxExt3(x) (rotr(iSsm0[x], 24))
|
||||
|
||||
/* see bearssl.h */
|
||||
void
|
||||
br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint32_t s0, s1, s2, s3;
|
||||
uint32_t t0, t1, t2, t3;
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
s0 = br_dec32be(buf);
|
||||
s1 = br_dec32be(buf + 4);
|
||||
s2 = br_dec32be(buf + 8);
|
||||
s3 = br_dec32be(buf + 12);
|
||||
s0 ^= skey[(num_rounds << 2) + 0];
|
||||
s1 ^= skey[(num_rounds << 2) + 1];
|
||||
s2 ^= skey[(num_rounds << 2) + 2];
|
||||
s3 ^= skey[(num_rounds << 2) + 3];
|
||||
for (u = num_rounds - 1; u > 0; u --) {
|
||||
uint32_t v0 = iSboxExt0(s0 >> 24)
|
||||
^ iSboxExt1((s3 >> 16) & 0xFF)
|
||||
^ iSboxExt2((s2 >> 8) & 0xFF)
|
||||
^ iSboxExt3(s1 & 0xFF);
|
||||
uint32_t v1 = iSboxExt0(s1 >> 24)
|
||||
^ iSboxExt1((s0 >> 16) & 0xFF)
|
||||
^ iSboxExt2((s3 >> 8) & 0xFF)
|
||||
^ iSboxExt3(s2 & 0xFF);
|
||||
uint32_t v2 = iSboxExt0(s2 >> 24)
|
||||
^ iSboxExt1((s1 >> 16) & 0xFF)
|
||||
^ iSboxExt2((s0 >> 8) & 0xFF)
|
||||
^ iSboxExt3(s3 & 0xFF);
|
||||
uint32_t v3 = iSboxExt0(s3 >> 24)
|
||||
^ iSboxExt1((s2 >> 16) & 0xFF)
|
||||
^ iSboxExt2((s1 >> 8) & 0xFF)
|
||||
^ iSboxExt3(s0 & 0xFF);
|
||||
s0 = v0;
|
||||
s1 = v1;
|
||||
s2 = v2;
|
||||
s3 = v3;
|
||||
s0 ^= skey[u << 2];
|
||||
s1 ^= skey[(u << 2) + 1];
|
||||
s2 ^= skey[(u << 2) + 2];
|
||||
s3 ^= skey[(u << 2) + 3];
|
||||
}
|
||||
t0 = ((uint32_t)iS[s0 >> 24] << 24)
|
||||
| ((uint32_t)iS[(s3 >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)iS[(s2 >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)iS[s1 & 0xFF];
|
||||
t1 = ((uint32_t)iS[s1 >> 24] << 24)
|
||||
| ((uint32_t)iS[(s0 >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)iS[(s3 >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)iS[s2 & 0xFF];
|
||||
t2 = ((uint32_t)iS[s2 >> 24] << 24)
|
||||
| ((uint32_t)iS[(s1 >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)iS[(s0 >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)iS[s3 & 0xFF];
|
||||
t3 = ((uint32_t)iS[s3 >> 24] << 24)
|
||||
| ((uint32_t)iS[(s2 >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)iS[(s1 >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)iS[s0 & 0xFF];
|
||||
s0 = t0 ^ skey[0];
|
||||
s1 = t1 ^ skey[1];
|
||||
s2 = t2 ^ skey[2];
|
||||
s3 = t3 ^ skey[3];
|
||||
br_enc32be(buf, s0);
|
||||
br_enc32be(buf + 4, s1);
|
||||
br_enc32be(buf + 8, s2);
|
||||
br_enc32be(buf + 12, s3);
|
||||
}
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
#define S br_aes_S
|
||||
|
||||
static const uint32_t Ssm0[] = {
|
||||
0xC66363A5, 0xF87C7C84, 0xEE777799, 0xF67B7B8D, 0xFFF2F20D, 0xD66B6BBD,
|
||||
0xDE6F6FB1, 0x91C5C554, 0x60303050, 0x02010103, 0xCE6767A9, 0x562B2B7D,
|
||||
0xE7FEFE19, 0xB5D7D762, 0x4DABABE6, 0xEC76769A, 0x8FCACA45, 0x1F82829D,
|
||||
0x89C9C940, 0xFA7D7D87, 0xEFFAFA15, 0xB25959EB, 0x8E4747C9, 0xFBF0F00B,
|
||||
0x41ADADEC, 0xB3D4D467, 0x5FA2A2FD, 0x45AFAFEA, 0x239C9CBF, 0x53A4A4F7,
|
||||
0xE4727296, 0x9BC0C05B, 0x75B7B7C2, 0xE1FDFD1C, 0x3D9393AE, 0x4C26266A,
|
||||
0x6C36365A, 0x7E3F3F41, 0xF5F7F702, 0x83CCCC4F, 0x6834345C, 0x51A5A5F4,
|
||||
0xD1E5E534, 0xF9F1F108, 0xE2717193, 0xABD8D873, 0x62313153, 0x2A15153F,
|
||||
0x0804040C, 0x95C7C752, 0x46232365, 0x9DC3C35E, 0x30181828, 0x379696A1,
|
||||
0x0A05050F, 0x2F9A9AB5, 0x0E070709, 0x24121236, 0x1B80809B, 0xDFE2E23D,
|
||||
0xCDEBEB26, 0x4E272769, 0x7FB2B2CD, 0xEA75759F, 0x1209091B, 0x1D83839E,
|
||||
0x582C2C74, 0x341A1A2E, 0x361B1B2D, 0xDC6E6EB2, 0xB45A5AEE, 0x5BA0A0FB,
|
||||
0xA45252F6, 0x763B3B4D, 0xB7D6D661, 0x7DB3B3CE, 0x5229297B, 0xDDE3E33E,
|
||||
0x5E2F2F71, 0x13848497, 0xA65353F5, 0xB9D1D168, 0x00000000, 0xC1EDED2C,
|
||||
0x40202060, 0xE3FCFC1F, 0x79B1B1C8, 0xB65B5BED, 0xD46A6ABE, 0x8DCBCB46,
|
||||
0x67BEBED9, 0x7239394B, 0x944A4ADE, 0x984C4CD4, 0xB05858E8, 0x85CFCF4A,
|
||||
0xBBD0D06B, 0xC5EFEF2A, 0x4FAAAAE5, 0xEDFBFB16, 0x864343C5, 0x9A4D4DD7,
|
||||
0x66333355, 0x11858594, 0x8A4545CF, 0xE9F9F910, 0x04020206, 0xFE7F7F81,
|
||||
0xA05050F0, 0x783C3C44, 0x259F9FBA, 0x4BA8A8E3, 0xA25151F3, 0x5DA3A3FE,
|
||||
0x804040C0, 0x058F8F8A, 0x3F9292AD, 0x219D9DBC, 0x70383848, 0xF1F5F504,
|
||||
0x63BCBCDF, 0x77B6B6C1, 0xAFDADA75, 0x42212163, 0x20101030, 0xE5FFFF1A,
|
||||
0xFDF3F30E, 0xBFD2D26D, 0x81CDCD4C, 0x180C0C14, 0x26131335, 0xC3ECEC2F,
|
||||
0xBE5F5FE1, 0x359797A2, 0x884444CC, 0x2E171739, 0x93C4C457, 0x55A7A7F2,
|
||||
0xFC7E7E82, 0x7A3D3D47, 0xC86464AC, 0xBA5D5DE7, 0x3219192B, 0xE6737395,
|
||||
0xC06060A0, 0x19818198, 0x9E4F4FD1, 0xA3DCDC7F, 0x44222266, 0x542A2A7E,
|
||||
0x3B9090AB, 0x0B888883, 0x8C4646CA, 0xC7EEEE29, 0x6BB8B8D3, 0x2814143C,
|
||||
0xA7DEDE79, 0xBC5E5EE2, 0x160B0B1D, 0xADDBDB76, 0xDBE0E03B, 0x64323256,
|
||||
0x743A3A4E, 0x140A0A1E, 0x924949DB, 0x0C06060A, 0x4824246C, 0xB85C5CE4,
|
||||
0x9FC2C25D, 0xBDD3D36E, 0x43ACACEF, 0xC46262A6, 0x399191A8, 0x319595A4,
|
||||
0xD3E4E437, 0xF279798B, 0xD5E7E732, 0x8BC8C843, 0x6E373759, 0xDA6D6DB7,
|
||||
0x018D8D8C, 0xB1D5D564, 0x9C4E4ED2, 0x49A9A9E0, 0xD86C6CB4, 0xAC5656FA,
|
||||
0xF3F4F407, 0xCFEAEA25, 0xCA6565AF, 0xF47A7A8E, 0x47AEAEE9, 0x10080818,
|
||||
0x6FBABAD5, 0xF0787888, 0x4A25256F, 0x5C2E2E72, 0x381C1C24, 0x57A6A6F1,
|
||||
0x73B4B4C7, 0x97C6C651, 0xCBE8E823, 0xA1DDDD7C, 0xE874749C, 0x3E1F1F21,
|
||||
0x964B4BDD, 0x61BDBDDC, 0x0D8B8B86, 0x0F8A8A85, 0xE0707090, 0x7C3E3E42,
|
||||
0x71B5B5C4, 0xCC6666AA, 0x904848D8, 0x06030305, 0xF7F6F601, 0x1C0E0E12,
|
||||
0xC26161A3, 0x6A35355F, 0xAE5757F9, 0x69B9B9D0, 0x17868691, 0x99C1C158,
|
||||
0x3A1D1D27, 0x279E9EB9, 0xD9E1E138, 0xEBF8F813, 0x2B9898B3, 0x22111133,
|
||||
0xD26969BB, 0xA9D9D970, 0x078E8E89, 0x339494A7, 0x2D9B9BB6, 0x3C1E1E22,
|
||||
0x15878792, 0xC9E9E920, 0x87CECE49, 0xAA5555FF, 0x50282878, 0xA5DFDF7A,
|
||||
0x038C8C8F, 0x59A1A1F8, 0x09898980, 0x1A0D0D17, 0x65BFBFDA, 0xD7E6E631,
|
||||
0x844242C6, 0xD06868B8, 0x824141C3, 0x299999B0, 0x5A2D2D77, 0x1E0F0F11,
|
||||
0x7BB0B0CB, 0xA85454FC, 0x6DBBBBD6, 0x2C16163A
|
||||
};
|
||||
|
||||
static inline uint32_t
|
||||
rotr(uint32_t x, int n)
|
||||
{
|
||||
return (x << (32 - n)) | (x >> n);
|
||||
}
|
||||
|
||||
#define SboxExt0(x) (Ssm0[x])
|
||||
#define SboxExt1(x) (rotr(Ssm0[x], 8))
|
||||
#define SboxExt2(x) (rotr(Ssm0[x], 16))
|
||||
#define SboxExt3(x) (rotr(Ssm0[x], 24))
|
||||
|
||||
|
||||
/* see bearssl.h */
|
||||
void
|
||||
br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint32_t s0, s1, s2, s3;
|
||||
uint32_t t0, t1, t2, t3;
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
s0 = br_dec32be(buf);
|
||||
s1 = br_dec32be(buf + 4);
|
||||
s2 = br_dec32be(buf + 8);
|
||||
s3 = br_dec32be(buf + 12);
|
||||
s0 ^= skey[0];
|
||||
s1 ^= skey[1];
|
||||
s2 ^= skey[2];
|
||||
s3 ^= skey[3];
|
||||
for (u = 1; u < num_rounds; u ++) {
|
||||
uint32_t v0, v1, v2, v3;
|
||||
|
||||
v0 = SboxExt0(s0 >> 24)
|
||||
^ SboxExt1((s1 >> 16) & 0xFF)
|
||||
^ SboxExt2((s2 >> 8) & 0xFF)
|
||||
^ SboxExt3(s3 & 0xFF);
|
||||
v1 = SboxExt0(s1 >> 24)
|
||||
^ SboxExt1((s2 >> 16) & 0xFF)
|
||||
^ SboxExt2((s3 >> 8) & 0xFF)
|
||||
^ SboxExt3(s0 & 0xFF);
|
||||
v2 = SboxExt0(s2 >> 24)
|
||||
^ SboxExt1((s3 >> 16) & 0xFF)
|
||||
^ SboxExt2((s0 >> 8) & 0xFF)
|
||||
^ SboxExt3(s1 & 0xFF);
|
||||
v3 = SboxExt0(s3 >> 24)
|
||||
^ SboxExt1((s0 >> 16) & 0xFF)
|
||||
^ SboxExt2((s1 >> 8) & 0xFF)
|
||||
^ SboxExt3(s2 & 0xFF);
|
||||
s0 = v0;
|
||||
s1 = v1;
|
||||
s2 = v2;
|
||||
s3 = v3;
|
||||
s0 ^= skey[u << 2];
|
||||
s1 ^= skey[(u << 2) + 1];
|
||||
s2 ^= skey[(u << 2) + 2];
|
||||
s3 ^= skey[(u << 2) + 3];
|
||||
}
|
||||
t0 = ((uint32_t)S[s0 >> 24] << 24)
|
||||
| ((uint32_t)S[(s1 >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)S[(s2 >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)S[s3 & 0xFF];
|
||||
t1 = ((uint32_t)S[s1 >> 24] << 24)
|
||||
| ((uint32_t)S[(s2 >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)S[(s3 >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)S[s0 & 0xFF];
|
||||
t2 = ((uint32_t)S[s2 >> 24] << 24)
|
||||
| ((uint32_t)S[(s3 >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)S[(s0 >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)S[s1 & 0xFF];
|
||||
t3 = ((uint32_t)S[s3 >> 24] << 24)
|
||||
| ((uint32_t)S[(s0 >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)S[(s1 >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)S[s2 & 0xFF];
|
||||
s0 = t0 ^ skey[num_rounds << 2];
|
||||
s1 = t1 ^ skey[(num_rounds << 2) + 1];
|
||||
s2 = t2 ^ skey[(num_rounds << 2) + 2];
|
||||
s3 = t3 ^ skey[(num_rounds << 2) + 3];
|
||||
br_enc32be(buf, s0);
|
||||
br_enc32be(buf + 4, s1);
|
||||
br_enc32be(buf + 8, s2);
|
||||
br_enc32be(buf + 12, s3);
|
||||
}
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static const uint32_t Rcon[] = {
|
||||
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
|
||||
0x40000000, 0x80000000, 0x1B000000, 0x36000000
|
||||
};
|
||||
|
||||
#define S br_aes_S
|
||||
|
||||
/* see inner.h */
|
||||
const unsigned char br_aes_S[] = {
|
||||
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
|
||||
0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
|
||||
0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26,
|
||||
0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
|
||||
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2,
|
||||
0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
|
||||
0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED,
|
||||
0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
|
||||
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F,
|
||||
0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
|
||||
0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC,
|
||||
0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14,
|
||||
0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
|
||||
0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D,
|
||||
0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
|
||||
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F,
|
||||
0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
|
||||
0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11,
|
||||
0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
|
||||
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F,
|
||||
0xB0, 0x54, 0xBB, 0x16
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
SubWord(uint32_t x)
|
||||
{
|
||||
return ((uint32_t)S[x >> 24] << 24)
|
||||
| ((uint32_t)S[(x >> 16) & 0xFF] << 16)
|
||||
| ((uint32_t)S[(x >> 8) & 0xFF] << 8)
|
||||
| (uint32_t)S[x & 0xFF];
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
unsigned
|
||||
br_aes_keysched(uint32_t *skey, const void *key, size_t key_len)
|
||||
{
|
||||
unsigned num_rounds;
|
||||
int i, j, k, nk, nkf;
|
||||
|
||||
switch (key_len) {
|
||||
case 16:
|
||||
num_rounds = 10;
|
||||
break;
|
||||
case 24:
|
||||
num_rounds = 12;
|
||||
break;
|
||||
case 32:
|
||||
num_rounds = 14;
|
||||
break;
|
||||
default:
|
||||
/* abort(); */
|
||||
return 0;
|
||||
}
|
||||
nk = (int)(key_len >> 2);
|
||||
nkf = (int)((num_rounds + 1) << 2);
|
||||
for (i = 0; i < nk; i ++) {
|
||||
skey[i] = br_dec32be((const unsigned char *)key + (i << 2));
|
||||
}
|
||||
for (i = nk, j = 0, k = 0; i < nkf; i ++) {
|
||||
uint32_t tmp;
|
||||
|
||||
tmp = skey[i - 1];
|
||||
if (j == 0) {
|
||||
tmp = (tmp << 8) | (tmp >> 24);
|
||||
tmp = SubWord(tmp) ^ Rcon[k];
|
||||
} else if (nk > 6 && j == 4) {
|
||||
tmp = SubWord(tmp);
|
||||
}
|
||||
skey[i] = skey[i - nk] ^ tmp;
|
||||
if (++ j == nk) {
|
||||
j = 0;
|
||||
k ++;
|
||||
}
|
||||
}
|
||||
return num_rounds;
|
||||
}
|
||||
|
|
@ -0,0 +1,328 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct_bitslice_Sbox(uint32_t *q)
|
||||
{
|
||||
/*
|
||||
* This S-box implementation is a straightforward translation of
|
||||
* the circuit described by Boyar and Peralta in "A new
|
||||
* combinational logic minimization technique with applications
|
||||
* to cryptology" (https://eprint.iacr.org/2009/191.pdf).
|
||||
*
|
||||
* Note that variables x* (input) and s* (output) are numbered
|
||||
* in "reverse" order (x0 is the high bit, x7 is the low bit).
|
||||
*/
|
||||
|
||||
uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
|
||||
uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
|
||||
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
|
||||
uint32_t y20, y21;
|
||||
uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
|
||||
uint32_t z10, z11, z12, z13, z14, z15, z16, z17;
|
||||
uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
|
||||
uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
|
||||
uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
|
||||
uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
|
||||
uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
|
||||
uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
|
||||
uint32_t t60, t61, t62, t63, t64, t65, t66, t67;
|
||||
uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
|
||||
|
||||
x0 = q[7];
|
||||
x1 = q[6];
|
||||
x2 = q[5];
|
||||
x3 = q[4];
|
||||
x4 = q[3];
|
||||
x5 = q[2];
|
||||
x6 = q[1];
|
||||
x7 = q[0];
|
||||
|
||||
/*
|
||||
* Top linear transformation.
|
||||
*/
|
||||
y14 = x3 ^ x5;
|
||||
y13 = x0 ^ x6;
|
||||
y9 = x0 ^ x3;
|
||||
y8 = x0 ^ x5;
|
||||
t0 = x1 ^ x2;
|
||||
y1 = t0 ^ x7;
|
||||
y4 = y1 ^ x3;
|
||||
y12 = y13 ^ y14;
|
||||
y2 = y1 ^ x0;
|
||||
y5 = y1 ^ x6;
|
||||
y3 = y5 ^ y8;
|
||||
t1 = x4 ^ y12;
|
||||
y15 = t1 ^ x5;
|
||||
y20 = t1 ^ x1;
|
||||
y6 = y15 ^ x7;
|
||||
y10 = y15 ^ t0;
|
||||
y11 = y20 ^ y9;
|
||||
y7 = x7 ^ y11;
|
||||
y17 = y10 ^ y11;
|
||||
y19 = y10 ^ y8;
|
||||
y16 = t0 ^ y11;
|
||||
y21 = y13 ^ y16;
|
||||
y18 = x0 ^ y16;
|
||||
|
||||
/*
|
||||
* Non-linear section.
|
||||
*/
|
||||
t2 = y12 & y15;
|
||||
t3 = y3 & y6;
|
||||
t4 = t3 ^ t2;
|
||||
t5 = y4 & x7;
|
||||
t6 = t5 ^ t2;
|
||||
t7 = y13 & y16;
|
||||
t8 = y5 & y1;
|
||||
t9 = t8 ^ t7;
|
||||
t10 = y2 & y7;
|
||||
t11 = t10 ^ t7;
|
||||
t12 = y9 & y11;
|
||||
t13 = y14 & y17;
|
||||
t14 = t13 ^ t12;
|
||||
t15 = y8 & y10;
|
||||
t16 = t15 ^ t12;
|
||||
t17 = t4 ^ t14;
|
||||
t18 = t6 ^ t16;
|
||||
t19 = t9 ^ t14;
|
||||
t20 = t11 ^ t16;
|
||||
t21 = t17 ^ y20;
|
||||
t22 = t18 ^ y19;
|
||||
t23 = t19 ^ y21;
|
||||
t24 = t20 ^ y18;
|
||||
|
||||
t25 = t21 ^ t22;
|
||||
t26 = t21 & t23;
|
||||
t27 = t24 ^ t26;
|
||||
t28 = t25 & t27;
|
||||
t29 = t28 ^ t22;
|
||||
t30 = t23 ^ t24;
|
||||
t31 = t22 ^ t26;
|
||||
t32 = t31 & t30;
|
||||
t33 = t32 ^ t24;
|
||||
t34 = t23 ^ t33;
|
||||
t35 = t27 ^ t33;
|
||||
t36 = t24 & t35;
|
||||
t37 = t36 ^ t34;
|
||||
t38 = t27 ^ t36;
|
||||
t39 = t29 & t38;
|
||||
t40 = t25 ^ t39;
|
||||
|
||||
t41 = t40 ^ t37;
|
||||
t42 = t29 ^ t33;
|
||||
t43 = t29 ^ t40;
|
||||
t44 = t33 ^ t37;
|
||||
t45 = t42 ^ t41;
|
||||
z0 = t44 & y15;
|
||||
z1 = t37 & y6;
|
||||
z2 = t33 & x7;
|
||||
z3 = t43 & y16;
|
||||
z4 = t40 & y1;
|
||||
z5 = t29 & y7;
|
||||
z6 = t42 & y11;
|
||||
z7 = t45 & y17;
|
||||
z8 = t41 & y10;
|
||||
z9 = t44 & y12;
|
||||
z10 = t37 & y3;
|
||||
z11 = t33 & y4;
|
||||
z12 = t43 & y13;
|
||||
z13 = t40 & y5;
|
||||
z14 = t29 & y2;
|
||||
z15 = t42 & y9;
|
||||
z16 = t45 & y14;
|
||||
z17 = t41 & y8;
|
||||
|
||||
/*
|
||||
* Bottom linear transformation.
|
||||
*/
|
||||
t46 = z15 ^ z16;
|
||||
t47 = z10 ^ z11;
|
||||
t48 = z5 ^ z13;
|
||||
t49 = z9 ^ z10;
|
||||
t50 = z2 ^ z12;
|
||||
t51 = z2 ^ z5;
|
||||
t52 = z7 ^ z8;
|
||||
t53 = z0 ^ z3;
|
||||
t54 = z6 ^ z7;
|
||||
t55 = z16 ^ z17;
|
||||
t56 = z12 ^ t48;
|
||||
t57 = t50 ^ t53;
|
||||
t58 = z4 ^ t46;
|
||||
t59 = z3 ^ t54;
|
||||
t60 = t46 ^ t57;
|
||||
t61 = z14 ^ t57;
|
||||
t62 = t52 ^ t58;
|
||||
t63 = t49 ^ t58;
|
||||
t64 = z4 ^ t59;
|
||||
t65 = t61 ^ t62;
|
||||
t66 = z1 ^ t63;
|
||||
s0 = t59 ^ t63;
|
||||
s6 = t56 ^ ~t62;
|
||||
s7 = t48 ^ ~t60;
|
||||
t67 = t64 ^ t65;
|
||||
s3 = t53 ^ t66;
|
||||
s4 = t51 ^ t66;
|
||||
s5 = t47 ^ t65;
|
||||
s1 = t64 ^ ~s3;
|
||||
s2 = t55 ^ ~t67;
|
||||
|
||||
q[7] = s0;
|
||||
q[6] = s1;
|
||||
q[5] = s2;
|
||||
q[4] = s3;
|
||||
q[3] = s4;
|
||||
q[2] = s5;
|
||||
q[1] = s6;
|
||||
q[0] = s7;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct_ortho(uint32_t *q)
|
||||
{
|
||||
#define SWAPN(cl, ch, s, x, y) do { \
|
||||
uint32_t a, b; \
|
||||
a = (x); \
|
||||
b = (y); \
|
||||
(x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \
|
||||
(y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \
|
||||
} while (0)
|
||||
|
||||
#define SWAP2(x, y) SWAPN(0x55555555, 0xAAAAAAAA, 1, x, y)
|
||||
#define SWAP4(x, y) SWAPN(0x33333333, 0xCCCCCCCC, 2, x, y)
|
||||
#define SWAP8(x, y) SWAPN(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)
|
||||
|
||||
SWAP2(q[0], q[1]);
|
||||
SWAP2(q[2], q[3]);
|
||||
SWAP2(q[4], q[5]);
|
||||
SWAP2(q[6], q[7]);
|
||||
|
||||
SWAP4(q[0], q[2]);
|
||||
SWAP4(q[1], q[3]);
|
||||
SWAP4(q[4], q[6]);
|
||||
SWAP4(q[5], q[7]);
|
||||
|
||||
SWAP8(q[0], q[4]);
|
||||
SWAP8(q[1], q[5]);
|
||||
SWAP8(q[2], q[6]);
|
||||
SWAP8(q[3], q[7]);
|
||||
}
|
||||
|
||||
static const unsigned char Rcon[] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
sub_word(uint32_t x)
|
||||
{
|
||||
uint32_t q[8];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
q[i] = x;
|
||||
}
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_Sbox(q);
|
||||
br_aes_ct_ortho(q);
|
||||
return q[0];
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
unsigned
|
||||
br_aes_ct_keysched(uint32_t *comp_skey, const void *key, size_t key_len)
|
||||
{
|
||||
unsigned num_rounds;
|
||||
int i, j, k, nk, nkf;
|
||||
uint32_t tmp;
|
||||
uint32_t skey[120];
|
||||
|
||||
switch (key_len) {
|
||||
case 16:
|
||||
num_rounds = 10;
|
||||
break;
|
||||
case 24:
|
||||
num_rounds = 12;
|
||||
break;
|
||||
case 32:
|
||||
num_rounds = 14;
|
||||
break;
|
||||
default:
|
||||
/* abort(); */
|
||||
return 0;
|
||||
}
|
||||
nk = (int)(key_len >> 2);
|
||||
nkf = (int)((num_rounds + 1) << 2);
|
||||
tmp = 0;
|
||||
for (i = 0; i < nk; i ++) {
|
||||
tmp = br_dec32le((const unsigned char *)key + (i << 2));
|
||||
skey[(i << 1) + 0] = tmp;
|
||||
skey[(i << 1) + 1] = tmp;
|
||||
}
|
||||
for (i = nk, j = 0, k = 0; i < nkf; i ++) {
|
||||
if (j == 0) {
|
||||
tmp = (tmp << 24) | (tmp >> 8);
|
||||
tmp = sub_word(tmp) ^ Rcon[k];
|
||||
} else if (nk > 6 && j == 4) {
|
||||
tmp = sub_word(tmp);
|
||||
}
|
||||
tmp ^= skey[(i - nk) << 1];
|
||||
skey[(i << 1) + 0] = tmp;
|
||||
skey[(i << 1) + 1] = tmp;
|
||||
if (++ j == nk) {
|
||||
j = 0;
|
||||
k ++;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < nkf; i += 4) {
|
||||
br_aes_ct_ortho(skey + (i << 1));
|
||||
}
|
||||
for (i = 0, j = 0; i < nkf; i ++, j += 2) {
|
||||
comp_skey[i] = (skey[j + 0] & 0x55555555)
|
||||
| (skey[j + 1] & 0xAAAAAAAA);
|
||||
}
|
||||
return num_rounds;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct_skey_expand(uint32_t *skey,
|
||||
unsigned num_rounds, const uint32_t *comp_skey)
|
||||
{
|
||||
unsigned u, v, n;
|
||||
|
||||
n = (num_rounds + 1) << 2;
|
||||
for (u = 0, v = 0; u < n; u ++, v += 2) {
|
||||
uint32_t x, y;
|
||||
|
||||
x = y = comp_skey[u];
|
||||
x &= 0x55555555;
|
||||
skey[v + 0] = x | (x << 1);
|
||||
y &= 0xAAAAAAAA;
|
||||
skey[v + 1] = y | (y >> 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,398 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct64_bitslice_Sbox(uint64_t *q)
|
||||
{
|
||||
/*
|
||||
* This S-box implementation is a straightforward translation of
|
||||
* the circuit described by Boyar and Peralta in "A new
|
||||
* combinational logic minimization technique with applications
|
||||
* to cryptology" (https://eprint.iacr.org/2009/191.pdf).
|
||||
*
|
||||
* Note that variables x* (input) and s* (output) are numbered
|
||||
* in "reverse" order (x0 is the high bit, x7 is the low bit).
|
||||
*/
|
||||
|
||||
uint64_t x0, x1, x2, x3, x4, x5, x6, x7;
|
||||
uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
|
||||
uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
|
||||
uint64_t y20, y21;
|
||||
uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
|
||||
uint64_t z10, z11, z12, z13, z14, z15, z16, z17;
|
||||
uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
|
||||
uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
|
||||
uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
|
||||
uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
|
||||
uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
|
||||
uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
|
||||
uint64_t t60, t61, t62, t63, t64, t65, t66, t67;
|
||||
uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
|
||||
|
||||
x0 = q[7];
|
||||
x1 = q[6];
|
||||
x2 = q[5];
|
||||
x3 = q[4];
|
||||
x4 = q[3];
|
||||
x5 = q[2];
|
||||
x6 = q[1];
|
||||
x7 = q[0];
|
||||
|
||||
/*
|
||||
* Top linear transformation.
|
||||
*/
|
||||
y14 = x3 ^ x5;
|
||||
y13 = x0 ^ x6;
|
||||
y9 = x0 ^ x3;
|
||||
y8 = x0 ^ x5;
|
||||
t0 = x1 ^ x2;
|
||||
y1 = t0 ^ x7;
|
||||
y4 = y1 ^ x3;
|
||||
y12 = y13 ^ y14;
|
||||
y2 = y1 ^ x0;
|
||||
y5 = y1 ^ x6;
|
||||
y3 = y5 ^ y8;
|
||||
t1 = x4 ^ y12;
|
||||
y15 = t1 ^ x5;
|
||||
y20 = t1 ^ x1;
|
||||
y6 = y15 ^ x7;
|
||||
y10 = y15 ^ t0;
|
||||
y11 = y20 ^ y9;
|
||||
y7 = x7 ^ y11;
|
||||
y17 = y10 ^ y11;
|
||||
y19 = y10 ^ y8;
|
||||
y16 = t0 ^ y11;
|
||||
y21 = y13 ^ y16;
|
||||
y18 = x0 ^ y16;
|
||||
|
||||
/*
|
||||
* Non-linear section.
|
||||
*/
|
||||
t2 = y12 & y15;
|
||||
t3 = y3 & y6;
|
||||
t4 = t3 ^ t2;
|
||||
t5 = y4 & x7;
|
||||
t6 = t5 ^ t2;
|
||||
t7 = y13 & y16;
|
||||
t8 = y5 & y1;
|
||||
t9 = t8 ^ t7;
|
||||
t10 = y2 & y7;
|
||||
t11 = t10 ^ t7;
|
||||
t12 = y9 & y11;
|
||||
t13 = y14 & y17;
|
||||
t14 = t13 ^ t12;
|
||||
t15 = y8 & y10;
|
||||
t16 = t15 ^ t12;
|
||||
t17 = t4 ^ t14;
|
||||
t18 = t6 ^ t16;
|
||||
t19 = t9 ^ t14;
|
||||
t20 = t11 ^ t16;
|
||||
t21 = t17 ^ y20;
|
||||
t22 = t18 ^ y19;
|
||||
t23 = t19 ^ y21;
|
||||
t24 = t20 ^ y18;
|
||||
|
||||
t25 = t21 ^ t22;
|
||||
t26 = t21 & t23;
|
||||
t27 = t24 ^ t26;
|
||||
t28 = t25 & t27;
|
||||
t29 = t28 ^ t22;
|
||||
t30 = t23 ^ t24;
|
||||
t31 = t22 ^ t26;
|
||||
t32 = t31 & t30;
|
||||
t33 = t32 ^ t24;
|
||||
t34 = t23 ^ t33;
|
||||
t35 = t27 ^ t33;
|
||||
t36 = t24 & t35;
|
||||
t37 = t36 ^ t34;
|
||||
t38 = t27 ^ t36;
|
||||
t39 = t29 & t38;
|
||||
t40 = t25 ^ t39;
|
||||
|
||||
t41 = t40 ^ t37;
|
||||
t42 = t29 ^ t33;
|
||||
t43 = t29 ^ t40;
|
||||
t44 = t33 ^ t37;
|
||||
t45 = t42 ^ t41;
|
||||
z0 = t44 & y15;
|
||||
z1 = t37 & y6;
|
||||
z2 = t33 & x7;
|
||||
z3 = t43 & y16;
|
||||
z4 = t40 & y1;
|
||||
z5 = t29 & y7;
|
||||
z6 = t42 & y11;
|
||||
z7 = t45 & y17;
|
||||
z8 = t41 & y10;
|
||||
z9 = t44 & y12;
|
||||
z10 = t37 & y3;
|
||||
z11 = t33 & y4;
|
||||
z12 = t43 & y13;
|
||||
z13 = t40 & y5;
|
||||
z14 = t29 & y2;
|
||||
z15 = t42 & y9;
|
||||
z16 = t45 & y14;
|
||||
z17 = t41 & y8;
|
||||
|
||||
/*
|
||||
* Bottom linear transformation.
|
||||
*/
|
||||
t46 = z15 ^ z16;
|
||||
t47 = z10 ^ z11;
|
||||
t48 = z5 ^ z13;
|
||||
t49 = z9 ^ z10;
|
||||
t50 = z2 ^ z12;
|
||||
t51 = z2 ^ z5;
|
||||
t52 = z7 ^ z8;
|
||||
t53 = z0 ^ z3;
|
||||
t54 = z6 ^ z7;
|
||||
t55 = z16 ^ z17;
|
||||
t56 = z12 ^ t48;
|
||||
t57 = t50 ^ t53;
|
||||
t58 = z4 ^ t46;
|
||||
t59 = z3 ^ t54;
|
||||
t60 = t46 ^ t57;
|
||||
t61 = z14 ^ t57;
|
||||
t62 = t52 ^ t58;
|
||||
t63 = t49 ^ t58;
|
||||
t64 = z4 ^ t59;
|
||||
t65 = t61 ^ t62;
|
||||
t66 = z1 ^ t63;
|
||||
s0 = t59 ^ t63;
|
||||
s6 = t56 ^ ~t62;
|
||||
s7 = t48 ^ ~t60;
|
||||
t67 = t64 ^ t65;
|
||||
s3 = t53 ^ t66;
|
||||
s4 = t51 ^ t66;
|
||||
s5 = t47 ^ t65;
|
||||
s1 = t64 ^ ~s3;
|
||||
s2 = t55 ^ ~t67;
|
||||
|
||||
q[7] = s0;
|
||||
q[6] = s1;
|
||||
q[5] = s2;
|
||||
q[4] = s3;
|
||||
q[3] = s4;
|
||||
q[2] = s5;
|
||||
q[1] = s6;
|
||||
q[0] = s7;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct64_ortho(uint64_t *q)
|
||||
{
|
||||
#define SWAPN(cl, ch, s, x, y) do { \
|
||||
uint64_t a, b; \
|
||||
a = (x); \
|
||||
b = (y); \
|
||||
(x) = (a & (uint64_t)cl) | ((b & (uint64_t)cl) << (s)); \
|
||||
(y) = ((a & (uint64_t)ch) >> (s)) | (b & (uint64_t)ch); \
|
||||
} while (0)
|
||||
|
||||
#define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y)
|
||||
#define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y)
|
||||
#define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y)
|
||||
|
||||
SWAP2(q[0], q[1]);
|
||||
SWAP2(q[2], q[3]);
|
||||
SWAP2(q[4], q[5]);
|
||||
SWAP2(q[6], q[7]);
|
||||
|
||||
SWAP4(q[0], q[2]);
|
||||
SWAP4(q[1], q[3]);
|
||||
SWAP4(q[4], q[6]);
|
||||
SWAP4(q[5], q[7]);
|
||||
|
||||
SWAP8(q[0], q[4]);
|
||||
SWAP8(q[1], q[5]);
|
||||
SWAP8(q[2], q[6]);
|
||||
SWAP8(q[3], q[7]);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w)
|
||||
{
|
||||
uint64_t x0, x1, x2, x3;
|
||||
|
||||
x0 = w[0];
|
||||
x1 = w[1];
|
||||
x2 = w[2];
|
||||
x3 = w[3];
|
||||
x0 |= (x0 << 16);
|
||||
x1 |= (x1 << 16);
|
||||
x2 |= (x2 << 16);
|
||||
x3 |= (x3 << 16);
|
||||
x0 &= (uint64_t)0x0000FFFF0000FFFF;
|
||||
x1 &= (uint64_t)0x0000FFFF0000FFFF;
|
||||
x2 &= (uint64_t)0x0000FFFF0000FFFF;
|
||||
x3 &= (uint64_t)0x0000FFFF0000FFFF;
|
||||
x0 |= (x0 << 8);
|
||||
x1 |= (x1 << 8);
|
||||
x2 |= (x2 << 8);
|
||||
x3 |= (x3 << 8);
|
||||
x0 &= (uint64_t)0x00FF00FF00FF00FF;
|
||||
x1 &= (uint64_t)0x00FF00FF00FF00FF;
|
||||
x2 &= (uint64_t)0x00FF00FF00FF00FF;
|
||||
x3 &= (uint64_t)0x00FF00FF00FF00FF;
|
||||
*q0 = x0 | (x2 << 8);
|
||||
*q1 = x1 | (x3 << 8);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1)
|
||||
{
|
||||
uint64_t x0, x1, x2, x3;
|
||||
|
||||
x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;
|
||||
x1 = q1 & (uint64_t)0x00FF00FF00FF00FF;
|
||||
x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
|
||||
x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
|
||||
x0 |= (x0 >> 8);
|
||||
x1 |= (x1 >> 8);
|
||||
x2 |= (x2 >> 8);
|
||||
x3 |= (x3 >> 8);
|
||||
x0 &= (uint64_t)0x0000FFFF0000FFFF;
|
||||
x1 &= (uint64_t)0x0000FFFF0000FFFF;
|
||||
x2 &= (uint64_t)0x0000FFFF0000FFFF;
|
||||
x3 &= (uint64_t)0x0000FFFF0000FFFF;
|
||||
w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16);
|
||||
w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16);
|
||||
w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16);
|
||||
w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);
|
||||
}
|
||||
|
||||
static const unsigned char Rcon[] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
sub_word(uint32_t x)
|
||||
{
|
||||
uint64_t q[8];
|
||||
|
||||
memset(q, 0, sizeof q);
|
||||
q[0] = x;
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_Sbox(q);
|
||||
br_aes_ct64_ortho(q);
|
||||
return (uint32_t)q[0];
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
unsigned
|
||||
br_aes_ct64_keysched(uint64_t *comp_skey, const void *key, size_t key_len)
|
||||
{
|
||||
unsigned num_rounds;
|
||||
int i, j, k, nk, nkf;
|
||||
uint32_t tmp;
|
||||
uint32_t skey[60];
|
||||
|
||||
switch (key_len) {
|
||||
case 16:
|
||||
num_rounds = 10;
|
||||
break;
|
||||
case 24:
|
||||
num_rounds = 12;
|
||||
break;
|
||||
case 32:
|
||||
num_rounds = 14;
|
||||
break;
|
||||
default:
|
||||
/* abort(); */
|
||||
return 0;
|
||||
}
|
||||
nk = (int)(key_len >> 2);
|
||||
nkf = (int)((num_rounds + 1) << 2);
|
||||
br_range_dec32le(skey, (key_len >> 2), key);
|
||||
tmp = skey[(key_len >> 2) - 1];
|
||||
for (i = nk, j = 0, k = 0; i < nkf; i ++) {
|
||||
if (j == 0) {
|
||||
tmp = (tmp << 24) | (tmp >> 8);
|
||||
tmp = sub_word(tmp) ^ Rcon[k];
|
||||
} else if (nk > 6 && j == 4) {
|
||||
tmp = sub_word(tmp);
|
||||
}
|
||||
tmp ^= skey[i - nk];
|
||||
skey[i] = tmp;
|
||||
if (++ j == nk) {
|
||||
j = 0;
|
||||
k ++;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0, j = 0; i < nkf; i += 4, j += 2) {
|
||||
uint64_t q[8];
|
||||
|
||||
br_aes_ct64_interleave_in(&q[0], &q[4], skey + i);
|
||||
q[1] = q[0];
|
||||
q[2] = q[0];
|
||||
q[3] = q[0];
|
||||
q[5] = q[4];
|
||||
q[6] = q[4];
|
||||
q[7] = q[4];
|
||||
br_aes_ct64_ortho(q);
|
||||
comp_skey[j + 0] =
|
||||
(q[0] & (uint64_t)0x1111111111111111)
|
||||
| (q[1] & (uint64_t)0x2222222222222222)
|
||||
| (q[2] & (uint64_t)0x4444444444444444)
|
||||
| (q[3] & (uint64_t)0x8888888888888888);
|
||||
comp_skey[j + 1] =
|
||||
(q[4] & (uint64_t)0x1111111111111111)
|
||||
| (q[5] & (uint64_t)0x2222222222222222)
|
||||
| (q[6] & (uint64_t)0x4444444444444444)
|
||||
| (q[7] & (uint64_t)0x8888888888888888);
|
||||
}
|
||||
return num_rounds;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct64_skey_expand(uint64_t *skey,
|
||||
unsigned num_rounds, const uint64_t *comp_skey)
|
||||
{
|
||||
unsigned u, v, n;
|
||||
|
||||
n = (num_rounds + 1) << 1;
|
||||
for (u = 0, v = 0; u < n; u ++, v += 4) {
|
||||
uint64_t x0, x1, x2, x3;
|
||||
|
||||
x0 = x1 = x2 = x3 = comp_skey[u];
|
||||
x0 &= (uint64_t)0x1111111111111111;
|
||||
x1 &= (uint64_t)0x2222222222222222;
|
||||
x2 &= (uint64_t)0x4444444444444444;
|
||||
x3 &= (uint64_t)0x8888888888888888;
|
||||
x1 >>= 1;
|
||||
x2 >>= 2;
|
||||
x3 >>= 3;
|
||||
skey[v + 0] = (x0 << 4) - x0;
|
||||
skey[v + 1] = (x1 << 4) - x1;
|
||||
skey[v + 2] = (x2 << 4) - x2;
|
||||
skey[v + 3] = (x3 << 4) - x3;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_ct64_cbcdec_vtable;
|
||||
ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint64_t sk_exp[120];
|
||||
uint32_t ivw[4];
|
||||
|
||||
br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
br_range_dec32le(ivw, 4, iv);
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
uint64_t q[8];
|
||||
uint32_t w1[16], w2[16];
|
||||
int i;
|
||||
|
||||
if (len >= 64) {
|
||||
br_range_dec32le(w1, 16, buf);
|
||||
} else {
|
||||
br_range_dec32le(w1, len >> 2, buf);
|
||||
}
|
||||
for (i = 0; i < 4; i ++) {
|
||||
br_aes_ct64_interleave_in(
|
||||
&q[i], &q[i + 4], w1 + (i << 2));
|
||||
}
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct64_ortho(q);
|
||||
for (i = 0; i < 4; i ++) {
|
||||
br_aes_ct64_interleave_out(
|
||||
w2 + (i << 2), q[i], q[i + 4]);
|
||||
}
|
||||
for (i = 0; i < 4; i ++) {
|
||||
w2[i] ^= ivw[i];
|
||||
}
|
||||
if (len >= 64) {
|
||||
for (i = 4; i < 16; i ++) {
|
||||
w2[i] ^= w1[i - 4];
|
||||
}
|
||||
memcpy(ivw, w1 + 12, sizeof ivw);
|
||||
br_range_enc32le(buf, w2, 16);
|
||||
} else {
|
||||
int j;
|
||||
|
||||
j = (int)(len >> 2);
|
||||
for (i = 4; i < j; i ++) {
|
||||
w2[i] ^= w1[i - 4];
|
||||
}
|
||||
memcpy(ivw, w1 + j - 4, sizeof ivw);
|
||||
br_range_enc32le(buf, w2, j);
|
||||
break;
|
||||
}
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
}
|
||||
br_range_enc32le(iv, ivw, 4);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable = {
|
||||
sizeof(br_aes_ct64_cbcdec_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_aes_ct64_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_aes_ct64_cbcdec_run
|
||||
};
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_cbcenc_init(br_aes_ct64_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_ct64_cbcenc_vtable;
|
||||
ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_cbcenc_run(const br_aes_ct64_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint64_t sk_exp[120];
|
||||
uint32_t ivw[4];
|
||||
|
||||
br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
br_range_dec32le(ivw, 4, iv);
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
uint32_t w[4];
|
||||
uint64_t q[8];
|
||||
|
||||
w[0] = ivw[0] ^ br_dec32le(buf);
|
||||
w[1] = ivw[1] ^ br_dec32le(buf + 4);
|
||||
w[2] = ivw[2] ^ br_dec32le(buf + 8);
|
||||
w[3] = ivw[3] ^ br_dec32le(buf + 12);
|
||||
br_aes_ct64_interleave_in(&q[0], &q[4], w);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_interleave_out(w, q[0], q[4]);
|
||||
memcpy(ivw, w, sizeof w);
|
||||
br_enc32le(buf, w[0]);
|
||||
br_enc32le(buf + 4, w[1]);
|
||||
br_enc32le(buf + 8, w[2]);
|
||||
br_enc32le(buf + 12, w[3]);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
br_range_enc32le(iv, ivw, 4);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_aes_ct64_cbcenc_vtable = {
|
||||
sizeof(br_aes_ct64_cbcenc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_aes_ct64_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_aes_ct64_cbcenc_run
|
||||
};
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_ct64_ctr_vtable;
|
||||
ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
xorbuf(void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
*d ++ ^= *s ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
uint32_t
|
||||
br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint32_t ivw[16];
|
||||
uint64_t sk_exp[120];
|
||||
|
||||
br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
br_range_dec32le(ivw, 3, iv);
|
||||
memcpy(ivw + 4, ivw, 3 * sizeof(uint32_t));
|
||||
memcpy(ivw + 8, ivw, 3 * sizeof(uint32_t));
|
||||
memcpy(ivw + 12, ivw, 3 * sizeof(uint32_t));
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
uint64_t q[8];
|
||||
uint32_t w[16];
|
||||
unsigned char tmp[64];
|
||||
int i;
|
||||
|
||||
/*
|
||||
* TODO: see if we can save on the first br_aes_ct64_ortho()
|
||||
* call, since iv0/iv1/iv2 are constant for the whole run.
|
||||
*/
|
||||
memcpy(w, ivw, sizeof ivw);
|
||||
w[3] = br_swap32(cc);
|
||||
w[7] = br_swap32(cc + 1);
|
||||
w[11] = br_swap32(cc + 2);
|
||||
w[15] = br_swap32(cc + 3);
|
||||
for (i = 0; i < 4; i ++) {
|
||||
br_aes_ct64_interleave_in(
|
||||
&q[i], &q[i + 4], w + (i << 2));
|
||||
}
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct64_ortho(q);
|
||||
for (i = 0; i < 4; i ++) {
|
||||
br_aes_ct64_interleave_out(
|
||||
w + (i << 2), q[i], q[i + 4]);
|
||||
}
|
||||
br_range_enc32le(tmp, w, 16);
|
||||
if (len <= 64) {
|
||||
xorbuf(buf, tmp, len);
|
||||
cc += (uint32_t)len >> 4;
|
||||
break;
|
||||
}
|
||||
xorbuf(buf, tmp, 64);
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
cc += 4;
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class br_aes_ct64_ctr_vtable = {
|
||||
sizeof(br_aes_ct64_ctr_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctr_class **, const void *, size_t))
|
||||
&br_aes_ct64_ctr_init,
|
||||
(uint32_t (*)(const br_block_ctr_class *const *,
|
||||
const void *, uint32_t, void *, size_t))
|
||||
&br_aes_ct64_ctr_run
|
||||
};
|
||||
|
|
@ -0,0 +1,433 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_ct64_ctrcbc_vtable;
|
||||
ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
xorbuf(void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
*d ++ ^= *s ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
|
||||
void *ctr, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned char *ivbuf;
|
||||
uint32_t iv0, iv1, iv2, iv3;
|
||||
uint64_t sk_exp[120];
|
||||
|
||||
br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
|
||||
/*
|
||||
* We keep the counter as four 32-bit values, with big-endian
|
||||
* convention, because that's what is expected for purposes of
|
||||
* incrementing the counter value.
|
||||
*/
|
||||
ivbuf = ctr;
|
||||
iv0 = br_dec32be(ivbuf + 0);
|
||||
iv1 = br_dec32be(ivbuf + 4);
|
||||
iv2 = br_dec32be(ivbuf + 8);
|
||||
iv3 = br_dec32be(ivbuf + 12);
|
||||
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
uint64_t q[8];
|
||||
uint32_t w[16];
|
||||
unsigned char tmp[64];
|
||||
int i, j;
|
||||
|
||||
/*
|
||||
* The bitslice implementation expects values in
|
||||
* little-endian convention, so we have to byteswap them.
|
||||
*/
|
||||
j = (len >= 64) ? 16 : (int)(len >> 2);
|
||||
for (i = 0; i < j; i += 4) {
|
||||
uint32_t carry;
|
||||
|
||||
w[i + 0] = br_swap32(iv0);
|
||||
w[i + 1] = br_swap32(iv1);
|
||||
w[i + 2] = br_swap32(iv2);
|
||||
w[i + 3] = br_swap32(iv3);
|
||||
iv3 ++;
|
||||
carry = ~(iv3 | -iv3) >> 31;
|
||||
iv2 += carry;
|
||||
carry &= -(~(iv2 | -iv2) >> 31);
|
||||
iv1 += carry;
|
||||
carry &= -(~(iv1 | -iv1) >> 31);
|
||||
iv0 += carry;
|
||||
}
|
||||
memset(w + i, 0, (16 - i) * sizeof(uint32_t));
|
||||
|
||||
for (i = 0; i < 4; i ++) {
|
||||
br_aes_ct64_interleave_in(
|
||||
&q[i], &q[i + 4], w + (i << 2));
|
||||
}
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct64_ortho(q);
|
||||
for (i = 0; i < 4; i ++) {
|
||||
br_aes_ct64_interleave_out(
|
||||
w + (i << 2), q[i], q[i + 4]);
|
||||
}
|
||||
|
||||
br_range_enc32le(tmp, w, 16);
|
||||
if (len <= 64) {
|
||||
xorbuf(buf, tmp, len);
|
||||
break;
|
||||
}
|
||||
xorbuf(buf, tmp, 64);
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
}
|
||||
br_enc32be(ivbuf + 0, iv0);
|
||||
br_enc32be(ivbuf + 4, iv1);
|
||||
br_enc32be(ivbuf + 8, iv2);
|
||||
br_enc32be(ivbuf + 12, iv3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
|
||||
void *cbcmac, const void *data, size_t len)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
uint32_t cm0, cm1, cm2, cm3;
|
||||
uint64_t q[8];
|
||||
uint64_t sk_exp[120];
|
||||
|
||||
br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
|
||||
cm0 = br_dec32le((unsigned char *)cbcmac + 0);
|
||||
cm1 = br_dec32le((unsigned char *)cbcmac + 4);
|
||||
cm2 = br_dec32le((unsigned char *)cbcmac + 8);
|
||||
cm3 = br_dec32le((unsigned char *)cbcmac + 12);
|
||||
|
||||
buf = data;
|
||||
memset(q, 0, sizeof q);
|
||||
while (len > 0) {
|
||||
uint32_t w[4];
|
||||
|
||||
w[0] = cm0 ^ br_dec32le(buf + 0);
|
||||
w[1] = cm1 ^ br_dec32le(buf + 4);
|
||||
w[2] = cm2 ^ br_dec32le(buf + 8);
|
||||
w[3] = cm3 ^ br_dec32le(buf + 12);
|
||||
|
||||
br_aes_ct64_interleave_in(&q[0], &q[4], w);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_interleave_out(w, q[0], q[4]);
|
||||
|
||||
cm0 = w[0];
|
||||
cm1 = w[1];
|
||||
cm2 = w[2];
|
||||
cm3 = w[3];
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
|
||||
br_enc32le((unsigned char *)cbcmac + 0, cm0);
|
||||
br_enc32le((unsigned char *)cbcmac + 4, cm1);
|
||||
br_enc32le((unsigned char *)cbcmac + 8, cm2);
|
||||
br_enc32le((unsigned char *)cbcmac + 12, cm3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
/*
|
||||
* When encrypting, the CBC-MAC processing must be lagging by
|
||||
* one block, since it operates on the encrypted values, so
|
||||
* it must wait for that encryption to complete.
|
||||
*/
|
||||
|
||||
unsigned char *buf;
|
||||
unsigned char *ivbuf;
|
||||
uint32_t iv0, iv1, iv2, iv3;
|
||||
uint32_t cm0, cm1, cm2, cm3;
|
||||
uint64_t sk_exp[120];
|
||||
uint64_t q[8];
|
||||
int first_iter;
|
||||
|
||||
br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
|
||||
/*
|
||||
* We keep the counter as four 32-bit values, with big-endian
|
||||
* convention, because that's what is expected for purposes of
|
||||
* incrementing the counter value.
|
||||
*/
|
||||
ivbuf = ctr;
|
||||
iv0 = br_dec32be(ivbuf + 0);
|
||||
iv1 = br_dec32be(ivbuf + 4);
|
||||
iv2 = br_dec32be(ivbuf + 8);
|
||||
iv3 = br_dec32be(ivbuf + 12);
|
||||
|
||||
/*
|
||||
* The current CBC-MAC value is kept in little-endian convention.
|
||||
*/
|
||||
cm0 = br_dec32le((unsigned char *)cbcmac + 0);
|
||||
cm1 = br_dec32le((unsigned char *)cbcmac + 4);
|
||||
cm2 = br_dec32le((unsigned char *)cbcmac + 8);
|
||||
cm3 = br_dec32le((unsigned char *)cbcmac + 12);
|
||||
|
||||
buf = data;
|
||||
first_iter = 1;
|
||||
memset(q, 0, sizeof q);
|
||||
while (len > 0) {
|
||||
uint32_t w[8], carry;
|
||||
|
||||
/*
|
||||
* The bitslice implementation expects values in
|
||||
* little-endian convention, so we have to byteswap them.
|
||||
*/
|
||||
w[0] = br_swap32(iv0);
|
||||
w[1] = br_swap32(iv1);
|
||||
w[2] = br_swap32(iv2);
|
||||
w[3] = br_swap32(iv3);
|
||||
iv3 ++;
|
||||
carry = ~(iv3 | -iv3) >> 31;
|
||||
iv2 += carry;
|
||||
carry &= -(~(iv2 | -iv2) >> 31);
|
||||
iv1 += carry;
|
||||
carry &= -(~(iv1 | -iv1) >> 31);
|
||||
iv0 += carry;
|
||||
|
||||
/*
|
||||
* The block for CBC-MAC.
|
||||
*/
|
||||
w[4] = cm0;
|
||||
w[5] = cm1;
|
||||
w[6] = cm2;
|
||||
w[7] = cm3;
|
||||
|
||||
br_aes_ct64_interleave_in(&q[0], &q[4], w);
|
||||
br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_interleave_out(w, q[0], q[4]);
|
||||
br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
|
||||
|
||||
/*
|
||||
* We do the XOR with the plaintext in 32-bit registers,
|
||||
* so that the value are available for CBC-MAC processing
|
||||
* as well.
|
||||
*/
|
||||
w[0] ^= br_dec32le(buf + 0);
|
||||
w[1] ^= br_dec32le(buf + 4);
|
||||
w[2] ^= br_dec32le(buf + 8);
|
||||
w[3] ^= br_dec32le(buf + 12);
|
||||
br_enc32le(buf + 0, w[0]);
|
||||
br_enc32le(buf + 4, w[1]);
|
||||
br_enc32le(buf + 8, w[2]);
|
||||
br_enc32le(buf + 12, w[3]);
|
||||
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
|
||||
/*
|
||||
* We set the cm* values to the block to encrypt in the
|
||||
* next iteration.
|
||||
*/
|
||||
if (first_iter) {
|
||||
first_iter = 0;
|
||||
cm0 ^= w[0];
|
||||
cm1 ^= w[1];
|
||||
cm2 ^= w[2];
|
||||
cm3 ^= w[3];
|
||||
} else {
|
||||
cm0 = w[0] ^ w[4];
|
||||
cm1 = w[1] ^ w[5];
|
||||
cm2 = w[2] ^ w[6];
|
||||
cm3 = w[3] ^ w[7];
|
||||
}
|
||||
|
||||
/*
|
||||
* If this was the last iteration, then compute the
|
||||
* extra block encryption to complete CBC-MAC.
|
||||
*/
|
||||
if (len == 0) {
|
||||
w[0] = cm0;
|
||||
w[1] = cm1;
|
||||
w[2] = cm2;
|
||||
w[3] = cm3;
|
||||
br_aes_ct64_interleave_in(&q[0], &q[4], w);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_encrypt(
|
||||
ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_interleave_out(w, q[0], q[4]);
|
||||
cm0 = w[0];
|
||||
cm1 = w[1];
|
||||
cm2 = w[2];
|
||||
cm3 = w[3];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
br_enc32be(ivbuf + 0, iv0);
|
||||
br_enc32be(ivbuf + 4, iv1);
|
||||
br_enc32be(ivbuf + 8, iv2);
|
||||
br_enc32be(ivbuf + 12, iv3);
|
||||
br_enc32le((unsigned char *)cbcmac + 0, cm0);
|
||||
br_enc32le((unsigned char *)cbcmac + 4, cm1);
|
||||
br_enc32le((unsigned char *)cbcmac + 8, cm2);
|
||||
br_enc32le((unsigned char *)cbcmac + 12, cm3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned char *ivbuf;
|
||||
uint32_t iv0, iv1, iv2, iv3;
|
||||
uint32_t cm0, cm1, cm2, cm3;
|
||||
uint64_t sk_exp[120];
|
||||
uint64_t q[8];
|
||||
|
||||
br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
|
||||
/*
|
||||
* We keep the counter as four 32-bit values, with big-endian
|
||||
* convention, because that's what is expected for purposes of
|
||||
* incrementing the counter value.
|
||||
*/
|
||||
ivbuf = ctr;
|
||||
iv0 = br_dec32be(ivbuf + 0);
|
||||
iv1 = br_dec32be(ivbuf + 4);
|
||||
iv2 = br_dec32be(ivbuf + 8);
|
||||
iv3 = br_dec32be(ivbuf + 12);
|
||||
|
||||
/*
|
||||
* The current CBC-MAC value is kept in little-endian convention.
|
||||
*/
|
||||
cm0 = br_dec32le((unsigned char *)cbcmac + 0);
|
||||
cm1 = br_dec32le((unsigned char *)cbcmac + 4);
|
||||
cm2 = br_dec32le((unsigned char *)cbcmac + 8);
|
||||
cm3 = br_dec32le((unsigned char *)cbcmac + 12);
|
||||
|
||||
buf = data;
|
||||
memset(q, 0, sizeof q);
|
||||
while (len > 0) {
|
||||
uint32_t w[8], carry;
|
||||
unsigned char tmp[16];
|
||||
|
||||
/*
|
||||
* The bitslice implementation expects values in
|
||||
* little-endian convention, so we have to byteswap them.
|
||||
*/
|
||||
w[0] = br_swap32(iv0);
|
||||
w[1] = br_swap32(iv1);
|
||||
w[2] = br_swap32(iv2);
|
||||
w[3] = br_swap32(iv3);
|
||||
iv3 ++;
|
||||
carry = ~(iv3 | -iv3) >> 31;
|
||||
iv2 += carry;
|
||||
carry &= -(~(iv2 | -iv2) >> 31);
|
||||
iv1 += carry;
|
||||
carry &= -(~(iv1 | -iv1) >> 31);
|
||||
iv0 += carry;
|
||||
|
||||
/*
|
||||
* The block for CBC-MAC.
|
||||
*/
|
||||
w[4] = cm0 ^ br_dec32le(buf + 0);
|
||||
w[5] = cm1 ^ br_dec32le(buf + 4);
|
||||
w[6] = cm2 ^ br_dec32le(buf + 8);
|
||||
w[7] = cm3 ^ br_dec32le(buf + 12);
|
||||
|
||||
br_aes_ct64_interleave_in(&q[0], &q[4], w);
|
||||
br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct64_ortho(q);
|
||||
br_aes_ct64_interleave_out(w, q[0], q[4]);
|
||||
br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
|
||||
|
||||
br_enc32le(tmp + 0, w[0]);
|
||||
br_enc32le(tmp + 4, w[1]);
|
||||
br_enc32le(tmp + 8, w[2]);
|
||||
br_enc32le(tmp + 12, w[3]);
|
||||
xorbuf(buf, tmp, 16);
|
||||
cm0 = w[4];
|
||||
cm1 = w[5];
|
||||
cm2 = w[6];
|
||||
cm3 = w[7];
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
|
||||
br_enc32be(ivbuf + 0, iv0);
|
||||
br_enc32be(ivbuf + 4, iv1);
|
||||
br_enc32be(ivbuf + 8, iv2);
|
||||
br_enc32be(ivbuf + 12, iv3);
|
||||
br_enc32le((unsigned char *)cbcmac + 0, cm0);
|
||||
br_enc32le((unsigned char *)cbcmac + 4, cm1);
|
||||
br_enc32le((unsigned char *)cbcmac + 8, cm2);
|
||||
br_enc32le((unsigned char *)cbcmac + 12, cm3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = {
|
||||
sizeof(br_aes_ct64_ctrcbc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
|
||||
&br_aes_ct64_ctrcbc_init,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_ct64_ctrcbc_encrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_ct64_ctrcbc_decrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, size_t))
|
||||
&br_aes_ct64_ctrcbc_ctr,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, const void *, size_t))
|
||||
&br_aes_ct64_ctrcbc_mac
|
||||
};
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct64_bitslice_invSbox(uint64_t *q)
|
||||
{
|
||||
/*
|
||||
* See br_aes_ct_bitslice_invSbox(). This is the natural extension
|
||||
* to 64-bit registers.
|
||||
*/
|
||||
uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
|
||||
|
||||
q0 = ~q[0];
|
||||
q1 = ~q[1];
|
||||
q2 = q[2];
|
||||
q3 = q[3];
|
||||
q4 = q[4];
|
||||
q5 = ~q[5];
|
||||
q6 = ~q[6];
|
||||
q7 = q[7];
|
||||
q[7] = q1 ^ q4 ^ q6;
|
||||
q[6] = q0 ^ q3 ^ q5;
|
||||
q[5] = q7 ^ q2 ^ q4;
|
||||
q[4] = q6 ^ q1 ^ q3;
|
||||
q[3] = q5 ^ q0 ^ q2;
|
||||
q[2] = q4 ^ q7 ^ q1;
|
||||
q[1] = q3 ^ q6 ^ q0;
|
||||
q[0] = q2 ^ q5 ^ q7;
|
||||
|
||||
br_aes_ct64_bitslice_Sbox(q);
|
||||
|
||||
q0 = ~q[0];
|
||||
q1 = ~q[1];
|
||||
q2 = q[2];
|
||||
q3 = q[3];
|
||||
q4 = q[4];
|
||||
q5 = ~q[5];
|
||||
q6 = ~q[6];
|
||||
q7 = q[7];
|
||||
q[7] = q1 ^ q4 ^ q6;
|
||||
q[6] = q0 ^ q3 ^ q5;
|
||||
q[5] = q7 ^ q2 ^ q4;
|
||||
q[4] = q6 ^ q1 ^ q3;
|
||||
q[3] = q5 ^ q0 ^ q2;
|
||||
q[2] = q4 ^ q7 ^ q1;
|
||||
q[1] = q3 ^ q6 ^ q0;
|
||||
q[0] = q2 ^ q5 ^ q7;
|
||||
}
|
||||
|
||||
static void
|
||||
add_round_key(uint64_t *q, const uint64_t *sk)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
q[i] ^= sk[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
inv_shift_rows(uint64_t *q)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
uint64_t x;
|
||||
|
||||
x = q[i];
|
||||
q[i] = (x & (uint64_t)0x000000000000FFFF)
|
||||
| ((x & (uint64_t)0x000000000FFF0000) << 4)
|
||||
| ((x & (uint64_t)0x00000000F0000000) >> 12)
|
||||
| ((x & (uint64_t)0x000000FF00000000) << 8)
|
||||
| ((x & (uint64_t)0x0000FF0000000000) >> 8)
|
||||
| ((x & (uint64_t)0x000F000000000000) << 12)
|
||||
| ((x & (uint64_t)0xFFF0000000000000) >> 4);
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
rotr32(uint64_t x)
|
||||
{
|
||||
return (x << 32) | (x >> 32);
|
||||
}
|
||||
|
||||
static void
|
||||
inv_mix_columns(uint64_t *q)
|
||||
{
|
||||
uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
|
||||
uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
|
||||
q0 = q[0];
|
||||
q1 = q[1];
|
||||
q2 = q[2];
|
||||
q3 = q[3];
|
||||
q4 = q[4];
|
||||
q5 = q[5];
|
||||
q6 = q[6];
|
||||
q7 = q[7];
|
||||
r0 = (q0 >> 16) | (q0 << 48);
|
||||
r1 = (q1 >> 16) | (q1 << 48);
|
||||
r2 = (q2 >> 16) | (q2 << 48);
|
||||
r3 = (q3 >> 16) | (q3 << 48);
|
||||
r4 = (q4 >> 16) | (q4 << 48);
|
||||
r5 = (q5 >> 16) | (q5 << 48);
|
||||
r6 = (q6 >> 16) | (q6 << 48);
|
||||
r7 = (q7 >> 16) | (q7 << 48);
|
||||
|
||||
q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr32(q0 ^ q5 ^ q6 ^ r0 ^ r5);
|
||||
q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
|
||||
q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr32(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
|
||||
q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr32(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
|
||||
q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
|
||||
q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
|
||||
q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr32(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
|
||||
q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr32(q4 ^ q5 ^ q7 ^ r4 ^ r7);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct64_bitslice_decrypt(unsigned num_rounds,
|
||||
const uint64_t *skey, uint64_t *q)
|
||||
{
|
||||
unsigned u;
|
||||
|
||||
add_round_key(q, skey + (num_rounds << 3));
|
||||
for (u = num_rounds - 1; u > 0; u --) {
|
||||
inv_shift_rows(q);
|
||||
br_aes_ct64_bitslice_invSbox(q);
|
||||
add_round_key(q, skey + (u << 3));
|
||||
inv_mix_columns(q);
|
||||
}
|
||||
inv_shift_rows(q);
|
||||
br_aes_ct64_bitslice_invSbox(q);
|
||||
add_round_key(q, skey);
|
||||
}
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static inline void
|
||||
add_round_key(uint64_t *q, const uint64_t *sk)
|
||||
{
|
||||
q[0] ^= sk[0];
|
||||
q[1] ^= sk[1];
|
||||
q[2] ^= sk[2];
|
||||
q[3] ^= sk[3];
|
||||
q[4] ^= sk[4];
|
||||
q[5] ^= sk[5];
|
||||
q[6] ^= sk[6];
|
||||
q[7] ^= sk[7];
|
||||
}
|
||||
|
||||
static inline void
|
||||
shift_rows(uint64_t *q)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
uint64_t x;
|
||||
|
||||
x = q[i];
|
||||
q[i] = (x & (uint64_t)0x000000000000FFFF)
|
||||
| ((x & (uint64_t)0x00000000FFF00000) >> 4)
|
||||
| ((x & (uint64_t)0x00000000000F0000) << 12)
|
||||
| ((x & (uint64_t)0x0000FF0000000000) >> 8)
|
||||
| ((x & (uint64_t)0x000000FF00000000) << 8)
|
||||
| ((x & (uint64_t)0xF000000000000000) >> 12)
|
||||
| ((x & (uint64_t)0x0FFF000000000000) << 4);
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
rotr32(uint64_t x)
|
||||
{
|
||||
return (x << 32) | (x >> 32);
|
||||
}
|
||||
|
||||
static inline void
|
||||
mix_columns(uint64_t *q)
|
||||
{
|
||||
uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
|
||||
uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
|
||||
q0 = q[0];
|
||||
q1 = q[1];
|
||||
q2 = q[2];
|
||||
q3 = q[3];
|
||||
q4 = q[4];
|
||||
q5 = q[5];
|
||||
q6 = q[6];
|
||||
q7 = q[7];
|
||||
r0 = (q0 >> 16) | (q0 << 48);
|
||||
r1 = (q1 >> 16) | (q1 << 48);
|
||||
r2 = (q2 >> 16) | (q2 << 48);
|
||||
r3 = (q3 >> 16) | (q3 << 48);
|
||||
r4 = (q4 >> 16) | (q4 << 48);
|
||||
r5 = (q5 >> 16) | (q5 << 48);
|
||||
r6 = (q6 >> 16) | (q6 << 48);
|
||||
r7 = (q7 >> 16) | (q7 << 48);
|
||||
|
||||
q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0);
|
||||
q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1);
|
||||
q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2);
|
||||
q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3);
|
||||
q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4);
|
||||
q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5);
|
||||
q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6);
|
||||
q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct64_bitslice_encrypt(unsigned num_rounds,
|
||||
const uint64_t *skey, uint64_t *q)
|
||||
{
|
||||
unsigned u;
|
||||
|
||||
add_round_key(q, skey);
|
||||
for (u = 1; u < num_rounds; u ++) {
|
||||
br_aes_ct64_bitslice_Sbox(q);
|
||||
shift_rows(q);
|
||||
mix_columns(q);
|
||||
add_round_key(q, skey + (u << 3));
|
||||
}
|
||||
br_aes_ct64_bitslice_Sbox(q);
|
||||
shift_rows(q);
|
||||
add_round_key(q, skey + (num_rounds << 3));
|
||||
}
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_ct_cbcdec_vtable;
|
||||
ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
uint32_t iv0, iv1, iv2, iv3;
|
||||
uint32_t sk_exp[120];
|
||||
|
||||
br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
ivbuf = iv;
|
||||
iv0 = br_dec32le(ivbuf);
|
||||
iv1 = br_dec32le(ivbuf + 4);
|
||||
iv2 = br_dec32le(ivbuf + 8);
|
||||
iv3 = br_dec32le(ivbuf + 12);
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
uint32_t q[8], sq[8];
|
||||
|
||||
q[0] = br_dec32le(buf);
|
||||
q[2] = br_dec32le(buf + 4);
|
||||
q[4] = br_dec32le(buf + 8);
|
||||
q[6] = br_dec32le(buf + 12);
|
||||
if (len >= 32) {
|
||||
q[1] = br_dec32le(buf + 16);
|
||||
q[3] = br_dec32le(buf + 20);
|
||||
q[5] = br_dec32le(buf + 24);
|
||||
q[7] = br_dec32le(buf + 28);
|
||||
} else {
|
||||
q[1] = 0;
|
||||
q[3] = 0;
|
||||
q[5] = 0;
|
||||
q[7] = 0;
|
||||
}
|
||||
memcpy(sq, q, sizeof q);
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct_ortho(q);
|
||||
br_enc32le(buf, q[0] ^ iv0);
|
||||
br_enc32le(buf + 4, q[2] ^ iv1);
|
||||
br_enc32le(buf + 8, q[4] ^ iv2);
|
||||
br_enc32le(buf + 12, q[6] ^ iv3);
|
||||
if (len < 32) {
|
||||
iv0 = sq[0];
|
||||
iv1 = sq[2];
|
||||
iv2 = sq[4];
|
||||
iv3 = sq[6];
|
||||
break;
|
||||
}
|
||||
br_enc32le(buf + 16, q[1] ^ sq[0]);
|
||||
br_enc32le(buf + 20, q[3] ^ sq[2]);
|
||||
br_enc32le(buf + 24, q[5] ^ sq[4]);
|
||||
br_enc32le(buf + 28, q[7] ^ sq[6]);
|
||||
iv0 = sq[1];
|
||||
iv1 = sq[3];
|
||||
iv2 = sq[5];
|
||||
iv3 = sq[7];
|
||||
buf += 32;
|
||||
len -= 32;
|
||||
}
|
||||
br_enc32le(ivbuf, iv0);
|
||||
br_enc32le(ivbuf + 4, iv1);
|
||||
br_enc32le(ivbuf + 8, iv2);
|
||||
br_enc32le(ivbuf + 12, iv3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_aes_ct_cbcdec_vtable = {
|
||||
sizeof(br_aes_ct_cbcdec_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_aes_ct_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_aes_ct_cbcdec_run
|
||||
};
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_cbcenc_init(br_aes_ct_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_ct_cbcenc_vtable;
|
||||
ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_cbcenc_run(const br_aes_ct_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
uint32_t q[8];
|
||||
uint32_t iv0, iv1, iv2, iv3;
|
||||
uint32_t sk_exp[120];
|
||||
|
||||
q[1] = 0;
|
||||
q[3] = 0;
|
||||
q[5] = 0;
|
||||
q[7] = 0;
|
||||
br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
ivbuf = iv;
|
||||
iv0 = br_dec32le(ivbuf);
|
||||
iv1 = br_dec32le(ivbuf + 4);
|
||||
iv2 = br_dec32le(ivbuf + 8);
|
||||
iv3 = br_dec32le(ivbuf + 12);
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
q[0] = iv0 ^ br_dec32le(buf);
|
||||
q[2] = iv1 ^ br_dec32le(buf + 4);
|
||||
q[4] = iv2 ^ br_dec32le(buf + 8);
|
||||
q[6] = iv3 ^ br_dec32le(buf + 12);
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct_ortho(q);
|
||||
iv0 = q[0];
|
||||
iv1 = q[2];
|
||||
iv2 = q[4];
|
||||
iv3 = q[6];
|
||||
br_enc32le(buf, iv0);
|
||||
br_enc32le(buf + 4, iv1);
|
||||
br_enc32le(buf + 8, iv2);
|
||||
br_enc32le(buf + 12, iv3);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
br_enc32le(ivbuf, iv0);
|
||||
br_enc32le(ivbuf + 4, iv1);
|
||||
br_enc32le(ivbuf + 8, iv2);
|
||||
br_enc32le(ivbuf + 12, iv3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_aes_ct_cbcenc_vtable = {
|
||||
sizeof(br_aes_ct_cbcenc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_aes_ct_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_aes_ct_cbcenc_run
|
||||
};
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_ct_ctr_vtable;
|
||||
ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
xorbuf(void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
*d ++ ^= *s ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
uint32_t
|
||||
br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
const unsigned char *ivbuf;
|
||||
uint32_t iv0, iv1, iv2;
|
||||
uint32_t sk_exp[120];
|
||||
|
||||
br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
ivbuf = iv;
|
||||
iv0 = br_dec32le(ivbuf);
|
||||
iv1 = br_dec32le(ivbuf + 4);
|
||||
iv2 = br_dec32le(ivbuf + 8);
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
uint32_t q[8];
|
||||
unsigned char tmp[32];
|
||||
|
||||
/*
|
||||
* TODO: see if we can save on the first br_aes_ct_ortho()
|
||||
* call, since iv0/iv1/iv2 are constant for the whole run.
|
||||
*/
|
||||
q[0] = q[1] = iv0;
|
||||
q[2] = q[3] = iv1;
|
||||
q[4] = q[5] = iv2;
|
||||
q[6] = br_swap32(cc);
|
||||
q[7] = br_swap32(cc + 1);
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct_ortho(q);
|
||||
br_enc32le(tmp, q[0]);
|
||||
br_enc32le(tmp + 4, q[2]);
|
||||
br_enc32le(tmp + 8, q[4]);
|
||||
br_enc32le(tmp + 12, q[6]);
|
||||
br_enc32le(tmp + 16, q[1]);
|
||||
br_enc32le(tmp + 20, q[3]);
|
||||
br_enc32le(tmp + 24, q[5]);
|
||||
br_enc32le(tmp + 28, q[7]);
|
||||
|
||||
if (len <= 32) {
|
||||
xorbuf(buf, tmp, len);
|
||||
cc ++;
|
||||
if (len > 16) {
|
||||
cc ++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
xorbuf(buf, tmp, 32);
|
||||
buf += 32;
|
||||
len -= 32;
|
||||
cc += 2;
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class br_aes_ct_ctr_vtable = {
|
||||
sizeof(br_aes_ct_ctr_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctr_class **, const void *, size_t))
|
||||
&br_aes_ct_ctr_init,
|
||||
(uint32_t (*)(const br_block_ctr_class *const *,
|
||||
const void *, uint32_t, void *, size_t))
|
||||
&br_aes_ct_ctr_run
|
||||
};
|
||||
|
|
@ -0,0 +1,422 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_ct_ctrcbc_vtable;
|
||||
ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
xorbuf(void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
*d ++ ^= *s ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
|
||||
void *ctr, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned char *ivbuf;
|
||||
uint32_t iv0, iv1, iv2, iv3;
|
||||
uint32_t sk_exp[120];
|
||||
|
||||
br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
|
||||
/*
|
||||
* We keep the counter as four 32-bit values, with big-endian
|
||||
* convention, because that's what is expected for purposes of
|
||||
* incrementing the counter value.
|
||||
*/
|
||||
ivbuf = ctr;
|
||||
iv0 = br_dec32be(ivbuf + 0);
|
||||
iv1 = br_dec32be(ivbuf + 4);
|
||||
iv2 = br_dec32be(ivbuf + 8);
|
||||
iv3 = br_dec32be(ivbuf + 12);
|
||||
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
uint32_t q[8], carry;
|
||||
unsigned char tmp[32];
|
||||
|
||||
/*
|
||||
* The bitslice implementation expects values in
|
||||
* little-endian convention, so we have to byteswap them.
|
||||
*/
|
||||
q[0] = br_swap32(iv0);
|
||||
q[2] = br_swap32(iv1);
|
||||
q[4] = br_swap32(iv2);
|
||||
q[6] = br_swap32(iv3);
|
||||
iv3 ++;
|
||||
carry = ~(iv3 | -iv3) >> 31;
|
||||
iv2 += carry;
|
||||
carry &= -(~(iv2 | -iv2) >> 31);
|
||||
iv1 += carry;
|
||||
carry &= -(~(iv1 | -iv1) >> 31);
|
||||
iv0 += carry;
|
||||
q[1] = br_swap32(iv0);
|
||||
q[3] = br_swap32(iv1);
|
||||
q[5] = br_swap32(iv2);
|
||||
q[7] = br_swap32(iv3);
|
||||
if (len > 16) {
|
||||
iv3 ++;
|
||||
carry = ~(iv3 | -iv3) >> 31;
|
||||
iv2 += carry;
|
||||
carry &= -(~(iv2 | -iv2) >> 31);
|
||||
iv1 += carry;
|
||||
carry &= -(~(iv1 | -iv1) >> 31);
|
||||
iv0 += carry;
|
||||
}
|
||||
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct_ortho(q);
|
||||
|
||||
br_enc32le(tmp, q[0]);
|
||||
br_enc32le(tmp + 4, q[2]);
|
||||
br_enc32le(tmp + 8, q[4]);
|
||||
br_enc32le(tmp + 12, q[6]);
|
||||
br_enc32le(tmp + 16, q[1]);
|
||||
br_enc32le(tmp + 20, q[3]);
|
||||
br_enc32le(tmp + 24, q[5]);
|
||||
br_enc32le(tmp + 28, q[7]);
|
||||
|
||||
if (len <= 32) {
|
||||
xorbuf(buf, tmp, len);
|
||||
break;
|
||||
}
|
||||
xorbuf(buf, tmp, 32);
|
||||
buf += 32;
|
||||
len -= 32;
|
||||
}
|
||||
br_enc32be(ivbuf + 0, iv0);
|
||||
br_enc32be(ivbuf + 4, iv1);
|
||||
br_enc32be(ivbuf + 8, iv2);
|
||||
br_enc32be(ivbuf + 12, iv3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
|
||||
void *cbcmac, const void *data, size_t len)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
uint32_t cm0, cm1, cm2, cm3;
|
||||
uint32_t q[8];
|
||||
uint32_t sk_exp[120];
|
||||
|
||||
br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
|
||||
buf = data;
|
||||
cm0 = br_dec32le((unsigned char *)cbcmac + 0);
|
||||
cm1 = br_dec32le((unsigned char *)cbcmac + 4);
|
||||
cm2 = br_dec32le((unsigned char *)cbcmac + 8);
|
||||
cm3 = br_dec32le((unsigned char *)cbcmac + 12);
|
||||
q[1] = 0;
|
||||
q[3] = 0;
|
||||
q[5] = 0;
|
||||
q[7] = 0;
|
||||
|
||||
while (len > 0) {
|
||||
q[0] = cm0 ^ br_dec32le(buf + 0);
|
||||
q[2] = cm1 ^ br_dec32le(buf + 4);
|
||||
q[4] = cm2 ^ br_dec32le(buf + 8);
|
||||
q[6] = cm3 ^ br_dec32le(buf + 12);
|
||||
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct_ortho(q);
|
||||
|
||||
cm0 = q[0];
|
||||
cm1 = q[2];
|
||||
cm2 = q[4];
|
||||
cm3 = q[6];
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
|
||||
br_enc32le((unsigned char *)cbcmac + 0, cm0);
|
||||
br_enc32le((unsigned char *)cbcmac + 4, cm1);
|
||||
br_enc32le((unsigned char *)cbcmac + 8, cm2);
|
||||
br_enc32le((unsigned char *)cbcmac + 12, cm3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
/*
|
||||
* When encrypting, the CBC-MAC processing must be lagging by
|
||||
* one block, since it operates on the encrypted values, so
|
||||
* it must wait for that encryption to complete.
|
||||
*/
|
||||
|
||||
unsigned char *buf;
|
||||
unsigned char *ivbuf;
|
||||
uint32_t iv0, iv1, iv2, iv3;
|
||||
uint32_t cm0, cm1, cm2, cm3;
|
||||
uint32_t sk_exp[120];
|
||||
int first_iter;
|
||||
|
||||
br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
|
||||
/*
|
||||
* We keep the counter as four 32-bit values, with big-endian
|
||||
* convention, because that's what is expected for purposes of
|
||||
* incrementing the counter value.
|
||||
*/
|
||||
ivbuf = ctr;
|
||||
iv0 = br_dec32be(ivbuf + 0);
|
||||
iv1 = br_dec32be(ivbuf + 4);
|
||||
iv2 = br_dec32be(ivbuf + 8);
|
||||
iv3 = br_dec32be(ivbuf + 12);
|
||||
|
||||
/*
|
||||
* The current CBC-MAC value is kept in little-endian convention.
|
||||
*/
|
||||
cm0 = br_dec32le((unsigned char *)cbcmac + 0);
|
||||
cm1 = br_dec32le((unsigned char *)cbcmac + 4);
|
||||
cm2 = br_dec32le((unsigned char *)cbcmac + 8);
|
||||
cm3 = br_dec32le((unsigned char *)cbcmac + 12);
|
||||
|
||||
buf = data;
|
||||
first_iter = 1;
|
||||
while (len > 0) {
|
||||
uint32_t q[8], carry;
|
||||
|
||||
/*
|
||||
* The bitslice implementation expects values in
|
||||
* little-endian convention, so we have to byteswap them.
|
||||
*/
|
||||
q[0] = br_swap32(iv0);
|
||||
q[2] = br_swap32(iv1);
|
||||
q[4] = br_swap32(iv2);
|
||||
q[6] = br_swap32(iv3);
|
||||
iv3 ++;
|
||||
carry = ~(iv3 | -iv3) >> 31;
|
||||
iv2 += carry;
|
||||
carry &= -(~(iv2 | -iv2) >> 31);
|
||||
iv1 += carry;
|
||||
carry &= -(~(iv1 | -iv1) >> 31);
|
||||
iv0 += carry;
|
||||
|
||||
/*
|
||||
* The odd values are used for CBC-MAC.
|
||||
*/
|
||||
q[1] = cm0;
|
||||
q[3] = cm1;
|
||||
q[5] = cm2;
|
||||
q[7] = cm3;
|
||||
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct_ortho(q);
|
||||
|
||||
/*
|
||||
* We do the XOR with the plaintext in 32-bit registers,
|
||||
* so that the value are available for CBC-MAC processing
|
||||
* as well.
|
||||
*/
|
||||
q[0] ^= br_dec32le(buf + 0);
|
||||
q[2] ^= br_dec32le(buf + 4);
|
||||
q[4] ^= br_dec32le(buf + 8);
|
||||
q[6] ^= br_dec32le(buf + 12);
|
||||
br_enc32le(buf + 0, q[0]);
|
||||
br_enc32le(buf + 4, q[2]);
|
||||
br_enc32le(buf + 8, q[4]);
|
||||
br_enc32le(buf + 12, q[6]);
|
||||
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
|
||||
/*
|
||||
* We set the cm* values to the block to encrypt in the
|
||||
* next iteration.
|
||||
*/
|
||||
if (first_iter) {
|
||||
first_iter = 0;
|
||||
cm0 ^= q[0];
|
||||
cm1 ^= q[2];
|
||||
cm2 ^= q[4];
|
||||
cm3 ^= q[6];
|
||||
} else {
|
||||
cm0 = q[0] ^ q[1];
|
||||
cm1 = q[2] ^ q[3];
|
||||
cm2 = q[4] ^ q[5];
|
||||
cm3 = q[6] ^ q[7];
|
||||
}
|
||||
|
||||
/*
|
||||
* If this was the last iteration, then compute the
|
||||
* extra block encryption to complete CBC-MAC.
|
||||
*/
|
||||
if (len == 0) {
|
||||
q[0] = cm0;
|
||||
q[2] = cm1;
|
||||
q[4] = cm2;
|
||||
q[6] = cm3;
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct_ortho(q);
|
||||
cm0 = q[0];
|
||||
cm1 = q[2];
|
||||
cm2 = q[4];
|
||||
cm3 = q[6];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
br_enc32be(ivbuf + 0, iv0);
|
||||
br_enc32be(ivbuf + 4, iv1);
|
||||
br_enc32be(ivbuf + 8, iv2);
|
||||
br_enc32be(ivbuf + 12, iv3);
|
||||
br_enc32le((unsigned char *)cbcmac + 0, cm0);
|
||||
br_enc32le((unsigned char *)cbcmac + 4, cm1);
|
||||
br_enc32le((unsigned char *)cbcmac + 8, cm2);
|
||||
br_enc32le((unsigned char *)cbcmac + 12, cm3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned char *ivbuf;
|
||||
uint32_t iv0, iv1, iv2, iv3;
|
||||
uint32_t cm0, cm1, cm2, cm3;
|
||||
uint32_t sk_exp[120];
|
||||
|
||||
br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
|
||||
/*
|
||||
* We keep the counter as four 32-bit values, with big-endian
|
||||
* convention, because that's what is expected for purposes of
|
||||
* incrementing the counter value.
|
||||
*/
|
||||
ivbuf = ctr;
|
||||
iv0 = br_dec32be(ivbuf + 0);
|
||||
iv1 = br_dec32be(ivbuf + 4);
|
||||
iv2 = br_dec32be(ivbuf + 8);
|
||||
iv3 = br_dec32be(ivbuf + 12);
|
||||
|
||||
/*
|
||||
* The current CBC-MAC value is kept in little-endian convention.
|
||||
*/
|
||||
cm0 = br_dec32le((unsigned char *)cbcmac + 0);
|
||||
cm1 = br_dec32le((unsigned char *)cbcmac + 4);
|
||||
cm2 = br_dec32le((unsigned char *)cbcmac + 8);
|
||||
cm3 = br_dec32le((unsigned char *)cbcmac + 12);
|
||||
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
uint32_t q[8], carry;
|
||||
unsigned char tmp[16];
|
||||
|
||||
/*
|
||||
* The bitslice implementation expects values in
|
||||
* little-endian convention, so we have to byteswap them.
|
||||
*/
|
||||
q[0] = br_swap32(iv0);
|
||||
q[2] = br_swap32(iv1);
|
||||
q[4] = br_swap32(iv2);
|
||||
q[6] = br_swap32(iv3);
|
||||
iv3 ++;
|
||||
carry = ~(iv3 | -iv3) >> 31;
|
||||
iv2 += carry;
|
||||
carry &= -(~(iv2 | -iv2) >> 31);
|
||||
iv1 += carry;
|
||||
carry &= -(~(iv1 | -iv1) >> 31);
|
||||
iv0 += carry;
|
||||
|
||||
/*
|
||||
* The odd values are used for CBC-MAC.
|
||||
*/
|
||||
q[1] = cm0 ^ br_dec32le(buf + 0);
|
||||
q[3] = cm1 ^ br_dec32le(buf + 4);
|
||||
q[5] = cm2 ^ br_dec32le(buf + 8);
|
||||
q[7] = cm3 ^ br_dec32le(buf + 12);
|
||||
|
||||
br_aes_ct_ortho(q);
|
||||
br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
|
||||
br_aes_ct_ortho(q);
|
||||
|
||||
br_enc32le(tmp + 0, q[0]);
|
||||
br_enc32le(tmp + 4, q[2]);
|
||||
br_enc32le(tmp + 8, q[4]);
|
||||
br_enc32le(tmp + 12, q[6]);
|
||||
xorbuf(buf, tmp, 16);
|
||||
cm0 = q[1];
|
||||
cm1 = q[3];
|
||||
cm2 = q[5];
|
||||
cm3 = q[7];
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
|
||||
br_enc32be(ivbuf + 0, iv0);
|
||||
br_enc32be(ivbuf + 4, iv1);
|
||||
br_enc32be(ivbuf + 8, iv2);
|
||||
br_enc32be(ivbuf + 12, iv3);
|
||||
br_enc32le((unsigned char *)cbcmac + 0, cm0);
|
||||
br_enc32le((unsigned char *)cbcmac + 4, cm1);
|
||||
br_enc32le((unsigned char *)cbcmac + 8, cm2);
|
||||
br_enc32le((unsigned char *)cbcmac + 12, cm3);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = {
|
||||
sizeof(br_aes_ct_ctrcbc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
|
||||
&br_aes_ct_ctrcbc_init,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_ct_ctrcbc_encrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_ct_ctrcbc_decrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, size_t))
|
||||
&br_aes_ct_ctrcbc_ctr,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, const void *, size_t))
|
||||
&br_aes_ct_ctrcbc_mac
|
||||
};
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct_bitslice_invSbox(uint32_t *q)
|
||||
{
|
||||
/*
|
||||
* AES S-box is:
|
||||
* S(x) = A(I(x)) ^ 0x63
|
||||
* where I() is inversion in GF(256), and A() is a linear
|
||||
* transform (0 is formally defined to be its own inverse).
|
||||
* Since inversion is an involution, the inverse S-box can be
|
||||
* computed from the S-box as:
|
||||
* iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
|
||||
* where B() is the inverse of A(). Indeed, for any y in GF(256):
|
||||
* iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
|
||||
*
|
||||
* Note: we reuse the implementation of the forward S-box,
|
||||
* instead of duplicating it here, so that total code size is
|
||||
* lower. By merging the B() transforms into the S-box circuit
|
||||
* we could make faster CBC decryption, but CBC decryption is
|
||||
* already quite faster than CBC encryption because we can
|
||||
* process two blocks in parallel.
|
||||
*/
|
||||
uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
|
||||
|
||||
q0 = ~q[0];
|
||||
q1 = ~q[1];
|
||||
q2 = q[2];
|
||||
q3 = q[3];
|
||||
q4 = q[4];
|
||||
q5 = ~q[5];
|
||||
q6 = ~q[6];
|
||||
q7 = q[7];
|
||||
q[7] = q1 ^ q4 ^ q6;
|
||||
q[6] = q0 ^ q3 ^ q5;
|
||||
q[5] = q7 ^ q2 ^ q4;
|
||||
q[4] = q6 ^ q1 ^ q3;
|
||||
q[3] = q5 ^ q0 ^ q2;
|
||||
q[2] = q4 ^ q7 ^ q1;
|
||||
q[1] = q3 ^ q6 ^ q0;
|
||||
q[0] = q2 ^ q5 ^ q7;
|
||||
|
||||
br_aes_ct_bitslice_Sbox(q);
|
||||
|
||||
q0 = ~q[0];
|
||||
q1 = ~q[1];
|
||||
q2 = q[2];
|
||||
q3 = q[3];
|
||||
q4 = q[4];
|
||||
q5 = ~q[5];
|
||||
q6 = ~q[6];
|
||||
q7 = q[7];
|
||||
q[7] = q1 ^ q4 ^ q6;
|
||||
q[6] = q0 ^ q3 ^ q5;
|
||||
q[5] = q7 ^ q2 ^ q4;
|
||||
q[4] = q6 ^ q1 ^ q3;
|
||||
q[3] = q5 ^ q0 ^ q2;
|
||||
q[2] = q4 ^ q7 ^ q1;
|
||||
q[1] = q3 ^ q6 ^ q0;
|
||||
q[0] = q2 ^ q5 ^ q7;
|
||||
}
|
||||
|
||||
static void
|
||||
add_round_key(uint32_t *q, const uint32_t *sk)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
q[i] ^= sk[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
inv_shift_rows(uint32_t *q)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
uint32_t x;
|
||||
|
||||
x = q[i];
|
||||
q[i] = (x & 0x000000FF)
|
||||
| ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6)
|
||||
| ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4)
|
||||
| ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2);
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rotr16(uint32_t x)
|
||||
{
|
||||
return (x << 16) | (x >> 16);
|
||||
}
|
||||
|
||||
static void
|
||||
inv_mix_columns(uint32_t *q)
|
||||
{
|
||||
uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
|
||||
uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
|
||||
q0 = q[0];
|
||||
q1 = q[1];
|
||||
q2 = q[2];
|
||||
q3 = q[3];
|
||||
q4 = q[4];
|
||||
q5 = q[5];
|
||||
q6 = q[6];
|
||||
q7 = q[7];
|
||||
r0 = (q0 >> 8) | (q0 << 24);
|
||||
r1 = (q1 >> 8) | (q1 << 24);
|
||||
r2 = (q2 >> 8) | (q2 << 24);
|
||||
r3 = (q3 >> 8) | (q3 << 24);
|
||||
r4 = (q4 >> 8) | (q4 << 24);
|
||||
r5 = (q5 >> 8) | (q5 << 24);
|
||||
r6 = (q6 >> 8) | (q6 << 24);
|
||||
r7 = (q7 >> 8) | (q7 << 24);
|
||||
|
||||
q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5);
|
||||
q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
|
||||
q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
|
||||
q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
|
||||
q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
|
||||
q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
|
||||
q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
|
||||
q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct_bitslice_decrypt(unsigned num_rounds,
|
||||
const uint32_t *skey, uint32_t *q)
|
||||
{
|
||||
unsigned u;
|
||||
|
||||
add_round_key(q, skey + (num_rounds << 3));
|
||||
for (u = num_rounds - 1; u > 0; u --) {
|
||||
inv_shift_rows(q);
|
||||
br_aes_ct_bitslice_invSbox(q);
|
||||
add_round_key(q, skey + (u << 3));
|
||||
inv_mix_columns(q);
|
||||
}
|
||||
inv_shift_rows(q);
|
||||
br_aes_ct_bitslice_invSbox(q);
|
||||
add_round_key(q, skey);
|
||||
}
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static inline void
|
||||
add_round_key(uint32_t *q, const uint32_t *sk)
|
||||
{
|
||||
q[0] ^= sk[0];
|
||||
q[1] ^= sk[1];
|
||||
q[2] ^= sk[2];
|
||||
q[3] ^= sk[3];
|
||||
q[4] ^= sk[4];
|
||||
q[5] ^= sk[5];
|
||||
q[6] ^= sk[6];
|
||||
q[7] ^= sk[7];
|
||||
}
|
||||
|
||||
static inline void
|
||||
shift_rows(uint32_t *q)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
uint32_t x;
|
||||
|
||||
x = q[i];
|
||||
q[i] = (x & 0x000000FF)
|
||||
| ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6)
|
||||
| ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4)
|
||||
| ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2);
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
rotr16(uint32_t x)
|
||||
{
|
||||
return (x << 16) | (x >> 16);
|
||||
}
|
||||
|
||||
static inline void
|
||||
mix_columns(uint32_t *q)
|
||||
{
|
||||
uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
|
||||
uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
|
||||
|
||||
q0 = q[0];
|
||||
q1 = q[1];
|
||||
q2 = q[2];
|
||||
q3 = q[3];
|
||||
q4 = q[4];
|
||||
q5 = q[5];
|
||||
q6 = q[6];
|
||||
q7 = q[7];
|
||||
r0 = (q0 >> 8) | (q0 << 24);
|
||||
r1 = (q1 >> 8) | (q1 << 24);
|
||||
r2 = (q2 >> 8) | (q2 << 24);
|
||||
r3 = (q3 >> 8) | (q3 << 24);
|
||||
r4 = (q4 >> 8) | (q4 << 24);
|
||||
r5 = (q5 >> 8) | (q5 << 24);
|
||||
r6 = (q6 >> 8) | (q6 << 24);
|
||||
r7 = (q7 >> 8) | (q7 << 24);
|
||||
|
||||
q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0);
|
||||
q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1);
|
||||
q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2);
|
||||
q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3);
|
||||
q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4);
|
||||
q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5);
|
||||
q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6);
|
||||
q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_ct_bitslice_encrypt(unsigned num_rounds,
|
||||
const uint32_t *skey, uint32_t *q)
|
||||
{
|
||||
unsigned u;
|
||||
|
||||
add_round_key(q, skey);
|
||||
for (u = 1; u < num_rounds; u ++) {
|
||||
br_aes_ct_bitslice_Sbox(q);
|
||||
shift_rows(q);
|
||||
mix_columns(q);
|
||||
add_round_key(q, skey + (u << 3));
|
||||
}
|
||||
br_aes_ct_bitslice_Sbox(q);
|
||||
shift_rows(q);
|
||||
add_round_key(q, skey + (num_rounds << 3));
|
||||
}
|
||||
|
|
@ -0,0 +1,445 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* This code contains the AES key schedule implementation using the
|
||||
* POWER8 opcodes.
|
||||
*/
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
static void
|
||||
key_schedule_128(unsigned char *sk, const unsigned char *key)
|
||||
{
|
||||
long cc;
|
||||
|
||||
static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B };
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
|
||||
/*
|
||||
* We use the VSX instructions for loading and storing the
|
||||
* key/subkeys, since they support unaligned accesses. The rest
|
||||
* of the computation is VMX only. VMX register 0 is VSX
|
||||
* register 32.
|
||||
*/
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* v0 = all-zero word
|
||||
* v1 = constant -8 / +8, copied into four words
|
||||
* v2 = current subkey
|
||||
* v3 = Rcon (x4 words)
|
||||
* v6 = constant 8, copied into four words
|
||||
* v7 = constant 0x11B, copied into four words
|
||||
* v8 = constant for byteswapping words
|
||||
*/
|
||||
vspltisw(0, 0)
|
||||
#if BR_POWER8_LE
|
||||
vspltisw(1, -8)
|
||||
#else
|
||||
vspltisw(1, 8)
|
||||
#endif
|
||||
lxvw4x(34, 0, %[key])
|
||||
vspltisw(3, 1)
|
||||
vspltisw(6, 8)
|
||||
lxvw4x(39, 0, %[fmod])
|
||||
#if BR_POWER8_LE
|
||||
lxvw4x(40, 0, %[idx2be])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* First subkey is a copy of the key itself.
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(4, 2, 2, 8)
|
||||
stxvw4x(36, 0, %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Loop must run 10 times.
|
||||
*/
|
||||
li(%[cc], 10)
|
||||
mtctr(%[cc])
|
||||
label(loop)
|
||||
/* Increment subkey address */
|
||||
addi(%[sk], %[sk], 16)
|
||||
|
||||
/* Compute SubWord(RotWord(temp)) xor Rcon (into v4, splat) */
|
||||
vrlw(4, 2, 1)
|
||||
vsbox(4, 4)
|
||||
#if BR_POWER8_LE
|
||||
vxor(4, 4, 3)
|
||||
#else
|
||||
vsldoi(5, 3, 0, 3)
|
||||
vxor(4, 4, 5)
|
||||
#endif
|
||||
vspltw(4, 4, 3)
|
||||
|
||||
/* XOR words for next subkey */
|
||||
vsldoi(5, 0, 2, 12)
|
||||
vxor(2, 2, 5)
|
||||
vsldoi(5, 0, 2, 12)
|
||||
vxor(2, 2, 5)
|
||||
vsldoi(5, 0, 2, 12)
|
||||
vxor(2, 2, 5)
|
||||
vxor(2, 2, 4)
|
||||
|
||||
/* Store next subkey */
|
||||
#if BR_POWER8_LE
|
||||
vperm(4, 2, 2, 8)
|
||||
stxvw4x(36, 0, %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
#endif
|
||||
|
||||
/* Update Rcon */
|
||||
vadduwm(3, 3, 3)
|
||||
vsrw(4, 3, 6)
|
||||
vsubuwm(4, 0, 4)
|
||||
vand(4, 4, 7)
|
||||
vxor(3, 3, 4)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [sk] "+b" (sk), [cc] "+b" (cc)
|
||||
: [key] "b" (key), [fmod] "b" (fmod)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
key_schedule_192(unsigned char *sk, const unsigned char *key)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
|
||||
/*
|
||||
* We use the VSX instructions for loading and storing the
|
||||
* key/subkeys, since they support unaligned accesses. The rest
|
||||
* of the computation is VMX only. VMX register 0 is VSX
|
||||
* register 32.
|
||||
*/
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* v0 = all-zero word
|
||||
* v1 = constant -8 / +8, copied into four words
|
||||
* v2, v3 = current subkey
|
||||
* v5 = Rcon (x4 words) (already shifted on big-endian)
|
||||
* v6 = constant 8, copied into four words
|
||||
* v8 = constant for byteswapping words
|
||||
*
|
||||
* The left two words of v3 are ignored.
|
||||
*/
|
||||
vspltisw(0, 0)
|
||||
#if BR_POWER8_LE
|
||||
vspltisw(1, -8)
|
||||
#else
|
||||
vspltisw(1, 8)
|
||||
#endif
|
||||
li(%[cc], 8)
|
||||
lxvw4x(34, 0, %[key])
|
||||
lxvw4x(35, %[cc], %[key])
|
||||
vsldoi(3, 3, 0, 8)
|
||||
vspltisw(5, 1)
|
||||
#if !BR_POWER8_LE
|
||||
vsldoi(5, 5, 0, 3)
|
||||
#endif
|
||||
vspltisw(6, 8)
|
||||
#if BR_POWER8_LE
|
||||
lxvw4x(40, 0, %[idx2be])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Loop must run 8 times. Each iteration produces 256
|
||||
* bits of subkeys, with a 64-bit overlap.
|
||||
*/
|
||||
li(%[cc], 8)
|
||||
mtctr(%[cc])
|
||||
li(%[cc], 16)
|
||||
label(loop)
|
||||
|
||||
/*
|
||||
* Last 6 words in v2:v3l. Compute next 6 words into
|
||||
* v3r:v4.
|
||||
*/
|
||||
vrlw(10, 3, 1)
|
||||
vsbox(10, 10)
|
||||
vxor(10, 10, 5)
|
||||
vspltw(10, 10, 1)
|
||||
vsldoi(11, 0, 10, 8)
|
||||
|
||||
vsldoi(12, 0, 2, 12)
|
||||
vxor(12, 2, 12)
|
||||
vsldoi(13, 0, 12, 12)
|
||||
vxor(12, 12, 13)
|
||||
vsldoi(13, 0, 12, 12)
|
||||
vxor(12, 12, 13)
|
||||
|
||||
vspltw(13, 12, 3)
|
||||
vxor(13, 13, 3)
|
||||
vsldoi(14, 0, 3, 12)
|
||||
vxor(13, 13, 14)
|
||||
|
||||
vsldoi(4, 12, 13, 8)
|
||||
vsldoi(14, 0, 3, 8)
|
||||
vsldoi(3, 14, 12, 8)
|
||||
|
||||
vxor(3, 3, 11)
|
||||
vxor(4, 4, 10)
|
||||
|
||||
/*
|
||||
* Update Rcon. Since for a 192-bit key, we use only 8
|
||||
* such constants, we will not hit the field modulus,
|
||||
* so a simple shift (addition) works well.
|
||||
*/
|
||||
vadduwm(5, 5, 5)
|
||||
|
||||
/*
|
||||
* Write out the two left 128-bit words
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(10, 2, 2, 8)
|
||||
vperm(11, 3, 3, 8)
|
||||
stxvw4x(42, 0, %[sk])
|
||||
stxvw4x(43, %[cc], %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
stxvw4x(35, %[cc], %[sk])
|
||||
#endif
|
||||
addi(%[sk], %[sk], 24)
|
||||
|
||||
/*
|
||||
* Shift words for next iteration.
|
||||
*/
|
||||
vsldoi(2, 3, 4, 8)
|
||||
vsldoi(3, 4, 0, 8)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
/*
|
||||
* The loop wrote the first 50 subkey words, but we need
|
||||
* to produce 52, so we must do one last write.
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(10, 2, 2, 8)
|
||||
stxvw4x(42, 0, %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
#endif
|
||||
|
||||
: [sk] "+b" (sk), [cc] "+b" (cc)
|
||||
: [key] "b" (key)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
key_schedule_256(unsigned char *sk, const unsigned char *key)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
|
||||
/*
|
||||
* We use the VSX instructions for loading and storing the
|
||||
* key/subkeys, since they support unaligned accesses. The rest
|
||||
* of the computation is VMX only. VMX register 0 is VSX
|
||||
* register 32.
|
||||
*/
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* v0 = all-zero word
|
||||
* v1 = constant -8 / +8, copied into four words
|
||||
* v2, v3 = current subkey
|
||||
* v6 = Rcon (x4 words) (already shifted on big-endian)
|
||||
* v7 = constant 8, copied into four words
|
||||
* v8 = constant for byteswapping words
|
||||
*
|
||||
* The left two words of v3 are ignored.
|
||||
*/
|
||||
vspltisw(0, 0)
|
||||
#if BR_POWER8_LE
|
||||
vspltisw(1, -8)
|
||||
#else
|
||||
vspltisw(1, 8)
|
||||
#endif
|
||||
li(%[cc], 16)
|
||||
lxvw4x(34, 0, %[key])
|
||||
lxvw4x(35, %[cc], %[key])
|
||||
vspltisw(6, 1)
|
||||
#if !BR_POWER8_LE
|
||||
vsldoi(6, 6, 0, 3)
|
||||
#endif
|
||||
vspltisw(7, 8)
|
||||
#if BR_POWER8_LE
|
||||
lxvw4x(40, 0, %[idx2be])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Loop must run 7 times. Each iteration produces two
|
||||
* subkeys.
|
||||
*/
|
||||
li(%[cc], 7)
|
||||
mtctr(%[cc])
|
||||
li(%[cc], 16)
|
||||
label(loop)
|
||||
|
||||
/*
|
||||
* Current words are in v2:v3. Compute next word in v4.
|
||||
*/
|
||||
vrlw(10, 3, 1)
|
||||
vsbox(10, 10)
|
||||
vxor(10, 10, 6)
|
||||
vspltw(10, 10, 3)
|
||||
|
||||
vsldoi(4, 0, 2, 12)
|
||||
vxor(4, 2, 4)
|
||||
vsldoi(5, 0, 4, 12)
|
||||
vxor(4, 4, 5)
|
||||
vsldoi(5, 0, 4, 12)
|
||||
vxor(4, 4, 5)
|
||||
vxor(4, 4, 10)
|
||||
|
||||
/*
|
||||
* Then other word in v5.
|
||||
*/
|
||||
vsbox(10, 4)
|
||||
vspltw(10, 10, 3)
|
||||
|
||||
vsldoi(5, 0, 3, 12)
|
||||
vxor(5, 3, 5)
|
||||
vsldoi(11, 0, 5, 12)
|
||||
vxor(5, 5, 11)
|
||||
vsldoi(11, 0, 5, 12)
|
||||
vxor(5, 5, 11)
|
||||
vxor(5, 5, 10)
|
||||
|
||||
/*
|
||||
* Update Rcon. Since for a 256-bit key, we use only 7
|
||||
* such constants, we will not hit the field modulus,
|
||||
* so a simple shift (addition) works well.
|
||||
*/
|
||||
vadduwm(6, 6, 6)
|
||||
|
||||
/*
|
||||
* Write out the two left 128-bit words
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(10, 2, 2, 8)
|
||||
vperm(11, 3, 3, 8)
|
||||
stxvw4x(42, 0, %[sk])
|
||||
stxvw4x(43, %[cc], %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
stxvw4x(35, %[cc], %[sk])
|
||||
#endif
|
||||
addi(%[sk], %[sk], 32)
|
||||
|
||||
/*
|
||||
* Replace v2:v3 with v4:v5.
|
||||
*/
|
||||
vxor(2, 0, 4)
|
||||
vxor(3, 0, 5)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
/*
|
||||
* The loop wrote the first 14 subkeys, but we need 15,
|
||||
* so we must do an extra write.
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(10, 2, 2, 8)
|
||||
stxvw4x(42, 0, %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
#endif
|
||||
|
||||
: [sk] "+b" (sk), [cc] "+b" (cc)
|
||||
: [key] "b" (key)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
br_aes_pwr8_supported(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
unsigned
|
||||
br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len)
|
||||
{
|
||||
switch (len) {
|
||||
case 16:
|
||||
key_schedule_128(sk, key);
|
||||
return 10;
|
||||
case 24:
|
||||
key_schedule_192(sk, key);
|
||||
return 12;
|
||||
default:
|
||||
key_schedule_256(sk, key);
|
||||
return 14;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,670 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
|
||||
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcdec_128(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v10
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v24.
|
||||
*/
|
||||
lxvw4x(56, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(24, 24, 24, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next ciphertext words in v16..v19. Also save them
|
||||
* in v20..v23.
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[buf])
|
||||
lxvw4x(49, %[cc1], %[buf])
|
||||
lxvw4x(50, %[cc2], %[buf])
|
||||
lxvw4x(51, %[cc3], %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
vand(20, 16, 16)
|
||||
vand(21, 17, 17)
|
||||
vand(22, 18, 18)
|
||||
vand(23, 19, 19)
|
||||
|
||||
/*
|
||||
* Decrypt the blocks.
|
||||
*/
|
||||
vxor(16, 16, 10)
|
||||
vxor(17, 17, 10)
|
||||
vxor(18, 18, 10)
|
||||
vxor(19, 19, 10)
|
||||
vncipher(16, 16, 9)
|
||||
vncipher(17, 17, 9)
|
||||
vncipher(18, 18, 9)
|
||||
vncipher(19, 19, 9)
|
||||
vncipher(16, 16, 8)
|
||||
vncipher(17, 17, 8)
|
||||
vncipher(18, 18, 8)
|
||||
vncipher(19, 19, 8)
|
||||
vncipher(16, 16, 7)
|
||||
vncipher(17, 17, 7)
|
||||
vncipher(18, 18, 7)
|
||||
vncipher(19, 19, 7)
|
||||
vncipher(16, 16, 6)
|
||||
vncipher(17, 17, 6)
|
||||
vncipher(18, 18, 6)
|
||||
vncipher(19, 19, 6)
|
||||
vncipher(16, 16, 5)
|
||||
vncipher(17, 17, 5)
|
||||
vncipher(18, 18, 5)
|
||||
vncipher(19, 19, 5)
|
||||
vncipher(16, 16, 4)
|
||||
vncipher(17, 17, 4)
|
||||
vncipher(18, 18, 4)
|
||||
vncipher(19, 19, 4)
|
||||
vncipher(16, 16, 3)
|
||||
vncipher(17, 17, 3)
|
||||
vncipher(18, 18, 3)
|
||||
vncipher(19, 19, 3)
|
||||
vncipher(16, 16, 2)
|
||||
vncipher(17, 17, 2)
|
||||
vncipher(18, 18, 2)
|
||||
vncipher(19, 19, 2)
|
||||
vncipher(16, 16, 1)
|
||||
vncipher(17, 17, 1)
|
||||
vncipher(18, 18, 1)
|
||||
vncipher(19, 19, 1)
|
||||
vncipherlast(16, 16, 0)
|
||||
vncipherlast(17, 17, 0)
|
||||
vncipherlast(18, 18, 0)
|
||||
vncipherlast(19, 19, 0)
|
||||
|
||||
/*
|
||||
* XOR decrypted blocks with IV / previous block.
|
||||
*/
|
||||
vxor(16, 16, 24)
|
||||
vxor(17, 17, 20)
|
||||
vxor(18, 18, 21)
|
||||
vxor(19, 19, 22)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Fourth encrypted block is IV for next run.
|
||||
*/
|
||||
vand(24, 23, 23)
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcdec_192(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v12
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(43, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(44, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v24.
|
||||
*/
|
||||
lxvw4x(56, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(24, 24, 24, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next ciphertext words in v16..v19. Also save them
|
||||
* in v20..v23.
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[buf])
|
||||
lxvw4x(49, %[cc1], %[buf])
|
||||
lxvw4x(50, %[cc2], %[buf])
|
||||
lxvw4x(51, %[cc3], %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
vand(20, 16, 16)
|
||||
vand(21, 17, 17)
|
||||
vand(22, 18, 18)
|
||||
vand(23, 19, 19)
|
||||
|
||||
/*
|
||||
* Decrypt the blocks.
|
||||
*/
|
||||
vxor(16, 16, 12)
|
||||
vxor(17, 17, 12)
|
||||
vxor(18, 18, 12)
|
||||
vxor(19, 19, 12)
|
||||
vncipher(16, 16, 11)
|
||||
vncipher(17, 17, 11)
|
||||
vncipher(18, 18, 11)
|
||||
vncipher(19, 19, 11)
|
||||
vncipher(16, 16, 10)
|
||||
vncipher(17, 17, 10)
|
||||
vncipher(18, 18, 10)
|
||||
vncipher(19, 19, 10)
|
||||
vncipher(16, 16, 9)
|
||||
vncipher(17, 17, 9)
|
||||
vncipher(18, 18, 9)
|
||||
vncipher(19, 19, 9)
|
||||
vncipher(16, 16, 8)
|
||||
vncipher(17, 17, 8)
|
||||
vncipher(18, 18, 8)
|
||||
vncipher(19, 19, 8)
|
||||
vncipher(16, 16, 7)
|
||||
vncipher(17, 17, 7)
|
||||
vncipher(18, 18, 7)
|
||||
vncipher(19, 19, 7)
|
||||
vncipher(16, 16, 6)
|
||||
vncipher(17, 17, 6)
|
||||
vncipher(18, 18, 6)
|
||||
vncipher(19, 19, 6)
|
||||
vncipher(16, 16, 5)
|
||||
vncipher(17, 17, 5)
|
||||
vncipher(18, 18, 5)
|
||||
vncipher(19, 19, 5)
|
||||
vncipher(16, 16, 4)
|
||||
vncipher(17, 17, 4)
|
||||
vncipher(18, 18, 4)
|
||||
vncipher(19, 19, 4)
|
||||
vncipher(16, 16, 3)
|
||||
vncipher(17, 17, 3)
|
||||
vncipher(18, 18, 3)
|
||||
vncipher(19, 19, 3)
|
||||
vncipher(16, 16, 2)
|
||||
vncipher(17, 17, 2)
|
||||
vncipher(18, 18, 2)
|
||||
vncipher(19, 19, 2)
|
||||
vncipher(16, 16, 1)
|
||||
vncipher(17, 17, 1)
|
||||
vncipher(18, 18, 1)
|
||||
vncipher(19, 19, 1)
|
||||
vncipherlast(16, 16, 0)
|
||||
vncipherlast(17, 17, 0)
|
||||
vncipherlast(18, 18, 0)
|
||||
vncipherlast(19, 19, 0)
|
||||
|
||||
/*
|
||||
* XOR decrypted blocks with IV / previous block.
|
||||
*/
|
||||
vxor(16, 16, 24)
|
||||
vxor(17, 17, 20)
|
||||
vxor(18, 18, 21)
|
||||
vxor(19, 19, 22)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Fourth encrypted block is IV for next run.
|
||||
*/
|
||||
vand(24, 23, 23)
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcdec_256(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v14
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(43, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(44, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(45, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(46, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v24.
|
||||
*/
|
||||
lxvw4x(56, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(24, 24, 24, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next ciphertext words in v16..v19. Also save them
|
||||
* in v20..v23.
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[buf])
|
||||
lxvw4x(49, %[cc1], %[buf])
|
||||
lxvw4x(50, %[cc2], %[buf])
|
||||
lxvw4x(51, %[cc3], %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
vand(20, 16, 16)
|
||||
vand(21, 17, 17)
|
||||
vand(22, 18, 18)
|
||||
vand(23, 19, 19)
|
||||
|
||||
/*
|
||||
* Decrypt the blocks.
|
||||
*/
|
||||
vxor(16, 16, 14)
|
||||
vxor(17, 17, 14)
|
||||
vxor(18, 18, 14)
|
||||
vxor(19, 19, 14)
|
||||
vncipher(16, 16, 13)
|
||||
vncipher(17, 17, 13)
|
||||
vncipher(18, 18, 13)
|
||||
vncipher(19, 19, 13)
|
||||
vncipher(16, 16, 12)
|
||||
vncipher(17, 17, 12)
|
||||
vncipher(18, 18, 12)
|
||||
vncipher(19, 19, 12)
|
||||
vncipher(16, 16, 11)
|
||||
vncipher(17, 17, 11)
|
||||
vncipher(18, 18, 11)
|
||||
vncipher(19, 19, 11)
|
||||
vncipher(16, 16, 10)
|
||||
vncipher(17, 17, 10)
|
||||
vncipher(18, 18, 10)
|
||||
vncipher(19, 19, 10)
|
||||
vncipher(16, 16, 9)
|
||||
vncipher(17, 17, 9)
|
||||
vncipher(18, 18, 9)
|
||||
vncipher(19, 19, 9)
|
||||
vncipher(16, 16, 8)
|
||||
vncipher(17, 17, 8)
|
||||
vncipher(18, 18, 8)
|
||||
vncipher(19, 19, 8)
|
||||
vncipher(16, 16, 7)
|
||||
vncipher(17, 17, 7)
|
||||
vncipher(18, 18, 7)
|
||||
vncipher(19, 19, 7)
|
||||
vncipher(16, 16, 6)
|
||||
vncipher(17, 17, 6)
|
||||
vncipher(18, 18, 6)
|
||||
vncipher(19, 19, 6)
|
||||
vncipher(16, 16, 5)
|
||||
vncipher(17, 17, 5)
|
||||
vncipher(18, 18, 5)
|
||||
vncipher(19, 19, 5)
|
||||
vncipher(16, 16, 4)
|
||||
vncipher(17, 17, 4)
|
||||
vncipher(18, 18, 4)
|
||||
vncipher(19, 19, 4)
|
||||
vncipher(16, 16, 3)
|
||||
vncipher(17, 17, 3)
|
||||
vncipher(18, 18, 3)
|
||||
vncipher(19, 19, 3)
|
||||
vncipher(16, 16, 2)
|
||||
vncipher(17, 17, 2)
|
||||
vncipher(18, 18, 2)
|
||||
vncipher(19, 19, 2)
|
||||
vncipher(16, 16, 1)
|
||||
vncipher(17, 17, 1)
|
||||
vncipher(18, 18, 1)
|
||||
vncipher(19, 19, 1)
|
||||
vncipherlast(16, 16, 0)
|
||||
vncipherlast(17, 17, 0)
|
||||
vncipherlast(18, 18, 0)
|
||||
vncipherlast(19, 19, 0)
|
||||
|
||||
/*
|
||||
* XOR decrypted blocks with IV / previous block.
|
||||
*/
|
||||
vxor(16, 16, 24)
|
||||
vxor(17, 17, 20)
|
||||
vxor(18, 18, 21)
|
||||
vxor(19, 19, 22)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Fourth encrypted block is IV for next run.
|
||||
*/
|
||||
vand(24, 23, 23)
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char nextiv[16];
|
||||
unsigned char *buf;
|
||||
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
buf = data;
|
||||
memcpy(nextiv, buf + len - 16, 16);
|
||||
if (len >= 64) {
|
||||
size_t num_blocks;
|
||||
unsigned char tmp[16];
|
||||
|
||||
num_blocks = (len >> 4) & ~(size_t)3;
|
||||
memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
|
||||
break;
|
||||
case 12:
|
||||
cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
|
||||
break;
|
||||
default:
|
||||
cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
|
||||
break;
|
||||
}
|
||||
buf += num_blocks << 4;
|
||||
len &= 63;
|
||||
memcpy(iv, tmp, 16);
|
||||
}
|
||||
if (len > 0) {
|
||||
unsigned char tmp[64];
|
||||
|
||||
memcpy(tmp, buf, len);
|
||||
memset(tmp + len, 0, (sizeof tmp) - len);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
cbcdec_128(ctx->skey.skni, iv, tmp, 4);
|
||||
break;
|
||||
case 12:
|
||||
cbcdec_192(ctx->skey.skni, iv, tmp, 4);
|
||||
break;
|
||||
default:
|
||||
cbcdec_256(ctx->skey.skni, iv, tmp, 4);
|
||||
break;
|
||||
}
|
||||
memcpy(buf, tmp, len);
|
||||
}
|
||||
memcpy(iv, nextiv, 16);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
|
||||
sizeof(br_aes_pwr8_cbcdec_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_aes_pwr8_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_aes_pwr8_cbcdec_run
|
||||
};
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class *
|
||||
br_aes_pwr8_cbcdec_get_vtable(void)
|
||||
{
|
||||
return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class *
|
||||
br_aes_pwr8_cbcdec_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,417 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
|
||||
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcenc_128(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t len)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v10
|
||||
*/
|
||||
lxvw4x(32, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(33, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(34, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(35, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(36, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(37, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(38, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(39, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(40, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(41, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(42, %[cc], %[sk])
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v16.
|
||||
*/
|
||||
lxvw4x(48, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next plaintext word and XOR with current IV.
|
||||
*/
|
||||
lxvw4x(49, 0, %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 17, 17, 15)
|
||||
#endif
|
||||
vxor(16, 16, 17)
|
||||
|
||||
/*
|
||||
* Encrypt the block.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipherlast(16, 16, 10)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 16, 16, 15)
|
||||
stxvw4x(49, 0, %[buf])
|
||||
#else
|
||||
stxvw4x(48, 0, %[buf])
|
||||
#endif
|
||||
addi(%[buf], %[buf], 16)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcenc_192(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t len)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v12
|
||||
*/
|
||||
lxvw4x(32, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(33, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(34, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(35, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(36, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(37, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(38, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(39, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(40, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(41, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(42, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(43, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(44, %[cc], %[sk])
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v16.
|
||||
*/
|
||||
lxvw4x(48, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next plaintext word and XOR with current IV.
|
||||
*/
|
||||
lxvw4x(49, 0, %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 17, 17, 15)
|
||||
#endif
|
||||
vxor(16, 16, 17)
|
||||
|
||||
/*
|
||||
* Encrypt the block.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(16, 16, 10)
|
||||
vcipher(16, 16, 11)
|
||||
vcipherlast(16, 16, 12)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 16, 16, 15)
|
||||
stxvw4x(49, 0, %[buf])
|
||||
#else
|
||||
stxvw4x(48, 0, %[buf])
|
||||
#endif
|
||||
addi(%[buf], %[buf], 16)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcenc_256(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t len)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v14
|
||||
*/
|
||||
lxvw4x(32, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(33, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(34, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(35, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(36, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(37, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(38, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(39, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(40, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(41, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(42, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(43, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(44, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(45, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(46, %[cc], %[sk])
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v16.
|
||||
*/
|
||||
lxvw4x(48, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next plaintext word and XOR with current IV.
|
||||
*/
|
||||
lxvw4x(49, 0, %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 17, 17, 15)
|
||||
#endif
|
||||
vxor(16, 16, 17)
|
||||
|
||||
/*
|
||||
* Encrypt the block.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(16, 16, 10)
|
||||
vcipher(16, 16, 11)
|
||||
vcipher(16, 16, 12)
|
||||
vcipher(16, 16, 13)
|
||||
vcipherlast(16, 16, 14)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 16, 16, 15)
|
||||
stxvw4x(49, 0, %[buf])
|
||||
#else
|
||||
stxvw4x(48, 0, %[buf])
|
||||
#endif
|
||||
addi(%[buf], %[buf], 16)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
if (len > 0) {
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
cbcenc_128(ctx->skey.skni, iv, data, len);
|
||||
break;
|
||||
case 12:
|
||||
cbcenc_192(ctx->skey.skni, iv, data, len);
|
||||
break;
|
||||
default:
|
||||
cbcenc_256(ctx->skey.skni, iv, data, len);
|
||||
break;
|
||||
}
|
||||
memcpy(iv, (unsigned char *)data + (len - 16), 16);
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
|
||||
sizeof(br_aes_pwr8_cbcenc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_aes_pwr8_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_aes_pwr8_cbcenc_run
|
||||
};
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class *
|
||||
br_aes_pwr8_cbcenc_get_vtable(void)
|
||||
{
|
||||
return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class *
|
||||
br_aes_pwr8_cbcenc_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,717 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_pwr8_ctr_vtable;
|
||||
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
|
||||
unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
static const uint32_t ctrinc[] = {
|
||||
0, 0, 0, 4
|
||||
};
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v10
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* v28 = increment for IV counter.
|
||||
*/
|
||||
lxvw4x(60, 0, %[ctrinc])
|
||||
|
||||
/*
|
||||
* Load IV into v16..v19
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[ivbuf])
|
||||
lxvw4x(49, %[cc1], %[ivbuf])
|
||||
lxvw4x(50, %[cc2], %[ivbuf])
|
||||
lxvw4x(51, %[cc3], %[ivbuf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Compute next IV into v24..v27
|
||||
*/
|
||||
vadduwm(24, 16, 28)
|
||||
vadduwm(25, 17, 28)
|
||||
vadduwm(26, 18, 28)
|
||||
vadduwm(27, 19, 28)
|
||||
|
||||
/*
|
||||
* Load next data blocks. We do this early on but we
|
||||
* won't need them until IV encryption is done.
|
||||
*/
|
||||
lxvw4x(52, %[cc0], %[buf])
|
||||
lxvw4x(53, %[cc1], %[buf])
|
||||
lxvw4x(54, %[cc2], %[buf])
|
||||
lxvw4x(55, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Encrypt the current IV.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vxor(17, 17, 0)
|
||||
vxor(18, 18, 0)
|
||||
vxor(19, 19, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(17, 17, 1)
|
||||
vcipher(18, 18, 1)
|
||||
vcipher(19, 19, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(17, 17, 2)
|
||||
vcipher(18, 18, 2)
|
||||
vcipher(19, 19, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(17, 17, 3)
|
||||
vcipher(18, 18, 3)
|
||||
vcipher(19, 19, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(17, 17, 4)
|
||||
vcipher(18, 18, 4)
|
||||
vcipher(19, 19, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(17, 17, 5)
|
||||
vcipher(18, 18, 5)
|
||||
vcipher(19, 19, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(17, 17, 6)
|
||||
vcipher(18, 18, 6)
|
||||
vcipher(19, 19, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(17, 17, 7)
|
||||
vcipher(18, 18, 7)
|
||||
vcipher(19, 19, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(17, 17, 8)
|
||||
vcipher(18, 18, 8)
|
||||
vcipher(19, 19, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(17, 17, 9)
|
||||
vcipher(18, 18, 9)
|
||||
vcipher(19, 19, 9)
|
||||
vcipherlast(16, 16, 10)
|
||||
vcipherlast(17, 17, 10)
|
||||
vcipherlast(18, 18, 10)
|
||||
vcipherlast(19, 19, 10)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Load next plaintext word and XOR with encrypted IV.
|
||||
*/
|
||||
vxor(16, 20, 16)
|
||||
vxor(17, 21, 17)
|
||||
vxor(18, 22, 18)
|
||||
vxor(19, 23, 19)
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
/*
|
||||
* Update IV.
|
||||
*/
|
||||
vand(16, 24, 24)
|
||||
vand(17, 25, 25)
|
||||
vand(18, 26, 26)
|
||||
vand(19, 27, 27)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
|
||||
[ctrinc] "b" (ctrinc)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
|
||||
unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
static const uint32_t ctrinc[] = {
|
||||
0, 0, 0, 4
|
||||
};
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v12
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(43, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(44, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* v28 = increment for IV counter.
|
||||
*/
|
||||
lxvw4x(60, 0, %[ctrinc])
|
||||
|
||||
/*
|
||||
* Load IV into v16..v19
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[ivbuf])
|
||||
lxvw4x(49, %[cc1], %[ivbuf])
|
||||
lxvw4x(50, %[cc2], %[ivbuf])
|
||||
lxvw4x(51, %[cc3], %[ivbuf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Compute next IV into v24..v27
|
||||
*/
|
||||
vadduwm(24, 16, 28)
|
||||
vadduwm(25, 17, 28)
|
||||
vadduwm(26, 18, 28)
|
||||
vadduwm(27, 19, 28)
|
||||
|
||||
/*
|
||||
* Load next data blocks. We do this early on but we
|
||||
* won't need them until IV encryption is done.
|
||||
*/
|
||||
lxvw4x(52, %[cc0], %[buf])
|
||||
lxvw4x(53, %[cc1], %[buf])
|
||||
lxvw4x(54, %[cc2], %[buf])
|
||||
lxvw4x(55, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Encrypt the current IV.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vxor(17, 17, 0)
|
||||
vxor(18, 18, 0)
|
||||
vxor(19, 19, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(17, 17, 1)
|
||||
vcipher(18, 18, 1)
|
||||
vcipher(19, 19, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(17, 17, 2)
|
||||
vcipher(18, 18, 2)
|
||||
vcipher(19, 19, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(17, 17, 3)
|
||||
vcipher(18, 18, 3)
|
||||
vcipher(19, 19, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(17, 17, 4)
|
||||
vcipher(18, 18, 4)
|
||||
vcipher(19, 19, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(17, 17, 5)
|
||||
vcipher(18, 18, 5)
|
||||
vcipher(19, 19, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(17, 17, 6)
|
||||
vcipher(18, 18, 6)
|
||||
vcipher(19, 19, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(17, 17, 7)
|
||||
vcipher(18, 18, 7)
|
||||
vcipher(19, 19, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(17, 17, 8)
|
||||
vcipher(18, 18, 8)
|
||||
vcipher(19, 19, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(17, 17, 9)
|
||||
vcipher(18, 18, 9)
|
||||
vcipher(19, 19, 9)
|
||||
vcipher(16, 16, 10)
|
||||
vcipher(17, 17, 10)
|
||||
vcipher(18, 18, 10)
|
||||
vcipher(19, 19, 10)
|
||||
vcipher(16, 16, 11)
|
||||
vcipher(17, 17, 11)
|
||||
vcipher(18, 18, 11)
|
||||
vcipher(19, 19, 11)
|
||||
vcipherlast(16, 16, 12)
|
||||
vcipherlast(17, 17, 12)
|
||||
vcipherlast(18, 18, 12)
|
||||
vcipherlast(19, 19, 12)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Load next plaintext word and XOR with encrypted IV.
|
||||
*/
|
||||
vxor(16, 20, 16)
|
||||
vxor(17, 21, 17)
|
||||
vxor(18, 22, 18)
|
||||
vxor(19, 23, 19)
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
/*
|
||||
* Update IV.
|
||||
*/
|
||||
vand(16, 24, 24)
|
||||
vand(17, 25, 25)
|
||||
vand(18, 26, 26)
|
||||
vand(19, 27, 27)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
|
||||
[ctrinc] "b" (ctrinc)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
|
||||
unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
static const uint32_t ctrinc[] = {
|
||||
0, 0, 0, 4
|
||||
};
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v14
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(43, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(44, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(45, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(46, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* v28 = increment for IV counter.
|
||||
*/
|
||||
lxvw4x(60, 0, %[ctrinc])
|
||||
|
||||
/*
|
||||
* Load IV into v16..v19
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[ivbuf])
|
||||
lxvw4x(49, %[cc1], %[ivbuf])
|
||||
lxvw4x(50, %[cc2], %[ivbuf])
|
||||
lxvw4x(51, %[cc3], %[ivbuf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Compute next IV into v24..v27
|
||||
*/
|
||||
vadduwm(24, 16, 28)
|
||||
vadduwm(25, 17, 28)
|
||||
vadduwm(26, 18, 28)
|
||||
vadduwm(27, 19, 28)
|
||||
|
||||
/*
|
||||
* Load next data blocks. We do this early on but we
|
||||
* won't need them until IV encryption is done.
|
||||
*/
|
||||
lxvw4x(52, %[cc0], %[buf])
|
||||
lxvw4x(53, %[cc1], %[buf])
|
||||
lxvw4x(54, %[cc2], %[buf])
|
||||
lxvw4x(55, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Encrypt the current IV.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vxor(17, 17, 0)
|
||||
vxor(18, 18, 0)
|
||||
vxor(19, 19, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(17, 17, 1)
|
||||
vcipher(18, 18, 1)
|
||||
vcipher(19, 19, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(17, 17, 2)
|
||||
vcipher(18, 18, 2)
|
||||
vcipher(19, 19, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(17, 17, 3)
|
||||
vcipher(18, 18, 3)
|
||||
vcipher(19, 19, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(17, 17, 4)
|
||||
vcipher(18, 18, 4)
|
||||
vcipher(19, 19, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(17, 17, 5)
|
||||
vcipher(18, 18, 5)
|
||||
vcipher(19, 19, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(17, 17, 6)
|
||||
vcipher(18, 18, 6)
|
||||
vcipher(19, 19, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(17, 17, 7)
|
||||
vcipher(18, 18, 7)
|
||||
vcipher(19, 19, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(17, 17, 8)
|
||||
vcipher(18, 18, 8)
|
||||
vcipher(19, 19, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(17, 17, 9)
|
||||
vcipher(18, 18, 9)
|
||||
vcipher(19, 19, 9)
|
||||
vcipher(16, 16, 10)
|
||||
vcipher(17, 17, 10)
|
||||
vcipher(18, 18, 10)
|
||||
vcipher(19, 19, 10)
|
||||
vcipher(16, 16, 11)
|
||||
vcipher(17, 17, 11)
|
||||
vcipher(18, 18, 11)
|
||||
vcipher(19, 19, 11)
|
||||
vcipher(16, 16, 12)
|
||||
vcipher(17, 17, 12)
|
||||
vcipher(18, 18, 12)
|
||||
vcipher(19, 19, 12)
|
||||
vcipher(16, 16, 13)
|
||||
vcipher(17, 17, 13)
|
||||
vcipher(18, 18, 13)
|
||||
vcipher(19, 19, 13)
|
||||
vcipherlast(16, 16, 14)
|
||||
vcipherlast(17, 17, 14)
|
||||
vcipherlast(18, 18, 14)
|
||||
vcipherlast(19, 19, 14)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Load next plaintext word and XOR with encrypted IV.
|
||||
*/
|
||||
vxor(16, 20, 16)
|
||||
vxor(17, 21, 17)
|
||||
vxor(18, 22, 18)
|
||||
vxor(19, 23, 19)
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
/*
|
||||
* Update IV.
|
||||
*/
|
||||
vand(16, 24, 24)
|
||||
vand(17, 25, 25)
|
||||
vand(18, 26, 26)
|
||||
vand(19, 27, 27)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
|
||||
[ctrinc] "b" (ctrinc)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
uint32_t
|
||||
br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned char ivbuf[64];
|
||||
|
||||
buf = data;
|
||||
memcpy(ivbuf + 0, iv, 12);
|
||||
memcpy(ivbuf + 16, iv, 12);
|
||||
memcpy(ivbuf + 32, iv, 12);
|
||||
memcpy(ivbuf + 48, iv, 12);
|
||||
if (len >= 64) {
|
||||
br_enc32be(ivbuf + 12, cc + 0);
|
||||
br_enc32be(ivbuf + 28, cc + 1);
|
||||
br_enc32be(ivbuf + 44, cc + 2);
|
||||
br_enc32be(ivbuf + 60, cc + 3);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctr_128(ctx->skey.skni, ivbuf, buf,
|
||||
(len >> 4) & ~(size_t)3);
|
||||
break;
|
||||
case 12:
|
||||
ctr_192(ctx->skey.skni, ivbuf, buf,
|
||||
(len >> 4) & ~(size_t)3);
|
||||
break;
|
||||
default:
|
||||
ctr_256(ctx->skey.skni, ivbuf, buf,
|
||||
(len >> 4) & ~(size_t)3);
|
||||
break;
|
||||
}
|
||||
cc += (len >> 4) & ~(size_t)3;
|
||||
buf += len & ~(size_t)63;
|
||||
len &= 63;
|
||||
}
|
||||
if (len > 0) {
|
||||
unsigned char tmp[64];
|
||||
|
||||
memcpy(tmp, buf, len);
|
||||
memset(tmp + len, 0, (sizeof tmp) - len);
|
||||
br_enc32be(ivbuf + 12, cc + 0);
|
||||
br_enc32be(ivbuf + 28, cc + 1);
|
||||
br_enc32be(ivbuf + 44, cc + 2);
|
||||
br_enc32be(ivbuf + 60, cc + 3);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
|
||||
break;
|
||||
case 12:
|
||||
ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
|
||||
break;
|
||||
default:
|
||||
ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
|
||||
break;
|
||||
}
|
||||
memcpy(buf, tmp, len);
|
||||
cc += (len + 15) >> 4;
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
|
||||
sizeof(br_aes_pwr8_ctr_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctr_class **, const void *, size_t))
|
||||
&br_aes_pwr8_ctr_init,
|
||||
(uint32_t (*)(const br_block_ctr_class *const *,
|
||||
const void *, uint32_t, void *, size_t))
|
||||
&br_aes_pwr8_ctr_run
|
||||
};
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class *
|
||||
br_aes_pwr8_ctr_get_vtable(void)
|
||||
{
|
||||
return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class *
|
||||
br_aes_pwr8_ctr_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,946 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class *
|
||||
br_aes_pwr8_ctrcbc_get_vtable(void)
|
||||
{
|
||||
return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
|
||||
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Register conventions for CTR + CBC-MAC:
|
||||
*
|
||||
* AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
|
||||
* Register v15 contains the byteswap index register (little-endian only)
|
||||
* Register v16 contains the CTR counter value
|
||||
* Register v17 contains the CBC-MAC current value
|
||||
* Registers v18 to v27 are scratch
|
||||
* Counter increment uses v28, v29 and v30
|
||||
*
|
||||
* For CTR alone:
|
||||
*
|
||||
* AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
|
||||
* Register v15 contains the byteswap index register (little-endian only)
|
||||
* Registers v16 to v19 contain the CTR counter values (four blocks)
|
||||
* Registers v20 to v27 are scratch
|
||||
* Counter increment uses v28, v29 and v30
|
||||
*/
|
||||
|
||||
#define LOAD_SUBKEYS_128 \
|
||||
lxvw4x(32, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(33, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(34, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(35, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(36, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(37, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(38, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(39, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(40, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(41, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(42, %[cc], %[sk])
|
||||
|
||||
#define LOAD_SUBKEYS_192 \
|
||||
LOAD_SUBKEYS_128 \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(43, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(44, %[cc], %[sk])
|
||||
|
||||
#define LOAD_SUBKEYS_256 \
|
||||
LOAD_SUBKEYS_192 \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(45, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(46, %[cc], %[sk])
|
||||
|
||||
#define BLOCK_ENCRYPT_128(x) \
|
||||
vxor(x, x, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipherlast(x, x, 10)
|
||||
|
||||
#define BLOCK_ENCRYPT_192(x) \
|
||||
vxor(x, x, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(x, x, 10) \
|
||||
vcipher(x, x, 11) \
|
||||
vcipherlast(x, x, 12)
|
||||
|
||||
#define BLOCK_ENCRYPT_256(x) \
|
||||
vxor(x, x, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(x, x, 10) \
|
||||
vcipher(x, x, 11) \
|
||||
vcipher(x, x, 12) \
|
||||
vcipher(x, x, 13) \
|
||||
vcipherlast(x, x, 14)
|
||||
|
||||
#define BLOCK_ENCRYPT_X2_128(x, y) \
|
||||
vxor(x, x, 0) \
|
||||
vxor(y, y, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(y, y, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(y, y, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(y, y, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(y, y, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(y, y, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(y, y, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(y, y, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(y, y, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(y, y, 9) \
|
||||
vcipherlast(x, x, 10) \
|
||||
vcipherlast(y, y, 10)
|
||||
|
||||
#define BLOCK_ENCRYPT_X2_192(x, y) \
|
||||
vxor(x, x, 0) \
|
||||
vxor(y, y, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(y, y, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(y, y, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(y, y, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(y, y, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(y, y, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(y, y, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(y, y, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(y, y, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(y, y, 9) \
|
||||
vcipher(x, x, 10) \
|
||||
vcipher(y, y, 10) \
|
||||
vcipher(x, x, 11) \
|
||||
vcipher(y, y, 11) \
|
||||
vcipherlast(x, x, 12) \
|
||||
vcipherlast(y, y, 12)
|
||||
|
||||
#define BLOCK_ENCRYPT_X2_256(x, y) \
|
||||
vxor(x, x, 0) \
|
||||
vxor(y, y, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(y, y, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(y, y, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(y, y, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(y, y, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(y, y, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(y, y, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(y, y, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(y, y, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(y, y, 9) \
|
||||
vcipher(x, x, 10) \
|
||||
vcipher(y, y, 10) \
|
||||
vcipher(x, x, 11) \
|
||||
vcipher(y, y, 11) \
|
||||
vcipher(x, x, 12) \
|
||||
vcipher(y, y, 12) \
|
||||
vcipher(x, x, 13) \
|
||||
vcipher(y, y, 13) \
|
||||
vcipherlast(x, x, 14) \
|
||||
vcipherlast(y, y, 14)
|
||||
|
||||
#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
|
||||
vxor(x0, x0, 0) \
|
||||
vxor(x1, x1, 0) \
|
||||
vxor(x2, x2, 0) \
|
||||
vxor(x3, x3, 0) \
|
||||
vcipher(x0, x0, 1) \
|
||||
vcipher(x1, x1, 1) \
|
||||
vcipher(x2, x2, 1) \
|
||||
vcipher(x3, x3, 1) \
|
||||
vcipher(x0, x0, 2) \
|
||||
vcipher(x1, x1, 2) \
|
||||
vcipher(x2, x2, 2) \
|
||||
vcipher(x3, x3, 2) \
|
||||
vcipher(x0, x0, 3) \
|
||||
vcipher(x1, x1, 3) \
|
||||
vcipher(x2, x2, 3) \
|
||||
vcipher(x3, x3, 3) \
|
||||
vcipher(x0, x0, 4) \
|
||||
vcipher(x1, x1, 4) \
|
||||
vcipher(x2, x2, 4) \
|
||||
vcipher(x3, x3, 4) \
|
||||
vcipher(x0, x0, 5) \
|
||||
vcipher(x1, x1, 5) \
|
||||
vcipher(x2, x2, 5) \
|
||||
vcipher(x3, x3, 5) \
|
||||
vcipher(x0, x0, 6) \
|
||||
vcipher(x1, x1, 6) \
|
||||
vcipher(x2, x2, 6) \
|
||||
vcipher(x3, x3, 6) \
|
||||
vcipher(x0, x0, 7) \
|
||||
vcipher(x1, x1, 7) \
|
||||
vcipher(x2, x2, 7) \
|
||||
vcipher(x3, x3, 7) \
|
||||
vcipher(x0, x0, 8) \
|
||||
vcipher(x1, x1, 8) \
|
||||
vcipher(x2, x2, 8) \
|
||||
vcipher(x3, x3, 8) \
|
||||
vcipher(x0, x0, 9) \
|
||||
vcipher(x1, x1, 9) \
|
||||
vcipher(x2, x2, 9) \
|
||||
vcipher(x3, x3, 9) \
|
||||
vcipherlast(x0, x0, 10) \
|
||||
vcipherlast(x1, x1, 10) \
|
||||
vcipherlast(x2, x2, 10) \
|
||||
vcipherlast(x3, x3, 10)
|
||||
|
||||
#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
|
||||
vxor(x0, x0, 0) \
|
||||
vxor(x1, x1, 0) \
|
||||
vxor(x2, x2, 0) \
|
||||
vxor(x3, x3, 0) \
|
||||
vcipher(x0, x0, 1) \
|
||||
vcipher(x1, x1, 1) \
|
||||
vcipher(x2, x2, 1) \
|
||||
vcipher(x3, x3, 1) \
|
||||
vcipher(x0, x0, 2) \
|
||||
vcipher(x1, x1, 2) \
|
||||
vcipher(x2, x2, 2) \
|
||||
vcipher(x3, x3, 2) \
|
||||
vcipher(x0, x0, 3) \
|
||||
vcipher(x1, x1, 3) \
|
||||
vcipher(x2, x2, 3) \
|
||||
vcipher(x3, x3, 3) \
|
||||
vcipher(x0, x0, 4) \
|
||||
vcipher(x1, x1, 4) \
|
||||
vcipher(x2, x2, 4) \
|
||||
vcipher(x3, x3, 4) \
|
||||
vcipher(x0, x0, 5) \
|
||||
vcipher(x1, x1, 5) \
|
||||
vcipher(x2, x2, 5) \
|
||||
vcipher(x3, x3, 5) \
|
||||
vcipher(x0, x0, 6) \
|
||||
vcipher(x1, x1, 6) \
|
||||
vcipher(x2, x2, 6) \
|
||||
vcipher(x3, x3, 6) \
|
||||
vcipher(x0, x0, 7) \
|
||||
vcipher(x1, x1, 7) \
|
||||
vcipher(x2, x2, 7) \
|
||||
vcipher(x3, x3, 7) \
|
||||
vcipher(x0, x0, 8) \
|
||||
vcipher(x1, x1, 8) \
|
||||
vcipher(x2, x2, 8) \
|
||||
vcipher(x3, x3, 8) \
|
||||
vcipher(x0, x0, 9) \
|
||||
vcipher(x1, x1, 9) \
|
||||
vcipher(x2, x2, 9) \
|
||||
vcipher(x3, x3, 9) \
|
||||
vcipher(x0, x0, 10) \
|
||||
vcipher(x1, x1, 10) \
|
||||
vcipher(x2, x2, 10) \
|
||||
vcipher(x3, x3, 10) \
|
||||
vcipher(x0, x0, 11) \
|
||||
vcipher(x1, x1, 11) \
|
||||
vcipher(x2, x2, 11) \
|
||||
vcipher(x3, x3, 11) \
|
||||
vcipherlast(x0, x0, 12) \
|
||||
vcipherlast(x1, x1, 12) \
|
||||
vcipherlast(x2, x2, 12) \
|
||||
vcipherlast(x3, x3, 12)
|
||||
|
||||
#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
|
||||
vxor(x0, x0, 0) \
|
||||
vxor(x1, x1, 0) \
|
||||
vxor(x2, x2, 0) \
|
||||
vxor(x3, x3, 0) \
|
||||
vcipher(x0, x0, 1) \
|
||||
vcipher(x1, x1, 1) \
|
||||
vcipher(x2, x2, 1) \
|
||||
vcipher(x3, x3, 1) \
|
||||
vcipher(x0, x0, 2) \
|
||||
vcipher(x1, x1, 2) \
|
||||
vcipher(x2, x2, 2) \
|
||||
vcipher(x3, x3, 2) \
|
||||
vcipher(x0, x0, 3) \
|
||||
vcipher(x1, x1, 3) \
|
||||
vcipher(x2, x2, 3) \
|
||||
vcipher(x3, x3, 3) \
|
||||
vcipher(x0, x0, 4) \
|
||||
vcipher(x1, x1, 4) \
|
||||
vcipher(x2, x2, 4) \
|
||||
vcipher(x3, x3, 4) \
|
||||
vcipher(x0, x0, 5) \
|
||||
vcipher(x1, x1, 5) \
|
||||
vcipher(x2, x2, 5) \
|
||||
vcipher(x3, x3, 5) \
|
||||
vcipher(x0, x0, 6) \
|
||||
vcipher(x1, x1, 6) \
|
||||
vcipher(x2, x2, 6) \
|
||||
vcipher(x3, x3, 6) \
|
||||
vcipher(x0, x0, 7) \
|
||||
vcipher(x1, x1, 7) \
|
||||
vcipher(x2, x2, 7) \
|
||||
vcipher(x3, x3, 7) \
|
||||
vcipher(x0, x0, 8) \
|
||||
vcipher(x1, x1, 8) \
|
||||
vcipher(x2, x2, 8) \
|
||||
vcipher(x3, x3, 8) \
|
||||
vcipher(x0, x0, 9) \
|
||||
vcipher(x1, x1, 9) \
|
||||
vcipher(x2, x2, 9) \
|
||||
vcipher(x3, x3, 9) \
|
||||
vcipher(x0, x0, 10) \
|
||||
vcipher(x1, x1, 10) \
|
||||
vcipher(x2, x2, 10) \
|
||||
vcipher(x3, x3, 10) \
|
||||
vcipher(x0, x0, 11) \
|
||||
vcipher(x1, x1, 11) \
|
||||
vcipher(x2, x2, 11) \
|
||||
vcipher(x3, x3, 11) \
|
||||
vcipher(x0, x0, 12) \
|
||||
vcipher(x1, x1, 12) \
|
||||
vcipher(x2, x2, 12) \
|
||||
vcipher(x3, x3, 12) \
|
||||
vcipher(x0, x0, 13) \
|
||||
vcipher(x1, x1, 13) \
|
||||
vcipher(x2, x2, 13) \
|
||||
vcipher(x3, x3, 13) \
|
||||
vcipherlast(x0, x0, 14) \
|
||||
vcipherlast(x1, x1, 14) \
|
||||
vcipherlast(x2, x2, 14) \
|
||||
vcipherlast(x3, x3, 14)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#define BYTESWAP_INIT lxvw4x(47, 0, %[idx2be])
|
||||
#define BYTESWAP(x) vperm(x, x, x, 15)
|
||||
#define BYTESWAPX(d, s) vperm(d, s, s, 15)
|
||||
#define BYTESWAP_REG , [idx2be] "b" (idx2be)
|
||||
#else
|
||||
#define BYTESWAP_INIT
|
||||
#define BYTESWAP(x)
|
||||
#define BYTESWAPX(d, s) vand(d, s, s)
|
||||
#define BYTESWAP_REG
|
||||
#endif
|
||||
|
||||
static const uint32_t ctrinc[] = {
|
||||
0, 0, 0, 1
|
||||
};
|
||||
static const uint32_t ctrinc_x4[] = {
|
||||
0, 0, 0, 4
|
||||
};
|
||||
#define INCR_128_INIT lxvw4x(60, 0, %[ctrinc])
|
||||
#define INCR_128_X4_INIT lxvw4x(60, 0, %[ctrinc_x4])
|
||||
#define INCR_128(d, s) \
|
||||
vaddcuw(29, s, 28) \
|
||||
vadduwm(d, s, 28) \
|
||||
vsldoi(30, 29, 29, 4) \
|
||||
vaddcuw(29, d, 30) \
|
||||
vadduwm(d, d, 30) \
|
||||
vsldoi(30, 29, 29, 4) \
|
||||
vaddcuw(29, d, 30) \
|
||||
vadduwm(d, d, 30) \
|
||||
vsldoi(30, 29, 29, 4) \
|
||||
vadduwm(d, d, 30)
|
||||
|
||||
#define MKCTR(size) \
|
||||
static void \
|
||||
ctr_ ## size(const unsigned char *sk, \
|
||||
unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
|
||||
{ \
|
||||
long cc, cc0, cc1, cc2, cc3; \
|
||||
\
|
||||
cc = 0; \
|
||||
cc0 = 0; \
|
||||
cc1 = 16; \
|
||||
cc2 = 32; \
|
||||
cc3 = 48; \
|
||||
asm volatile ( \
|
||||
\
|
||||
/* \
|
||||
* Load subkeys into v0..v10 \
|
||||
*/ \
|
||||
LOAD_SUBKEYS_ ## size \
|
||||
li(%[cc], 0) \
|
||||
\
|
||||
BYTESWAP_INIT \
|
||||
INCR_128_X4_INIT \
|
||||
\
|
||||
/* \
|
||||
* Load current CTR counters into v16 to v19. \
|
||||
*/ \
|
||||
lxvw4x(48, %[cc0], %[ctrbuf]) \
|
||||
lxvw4x(49, %[cc1], %[ctrbuf]) \
|
||||
lxvw4x(50, %[cc2], %[ctrbuf]) \
|
||||
lxvw4x(51, %[cc3], %[ctrbuf]) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
BYTESWAP(18) \
|
||||
BYTESWAP(19) \
|
||||
\
|
||||
mtctr(%[num_blocks_x4]) \
|
||||
\
|
||||
label(loop) \
|
||||
/* \
|
||||
* Compute next counter values into v20..v23. \
|
||||
*/ \
|
||||
INCR_128(20, 16) \
|
||||
INCR_128(21, 17) \
|
||||
INCR_128(22, 18) \
|
||||
INCR_128(23, 19) \
|
||||
\
|
||||
/* \
|
||||
* Encrypt counter values and XOR into next data blocks. \
|
||||
*/ \
|
||||
lxvw4x(56, %[cc0], %[buf]) \
|
||||
lxvw4x(57, %[cc1], %[buf]) \
|
||||
lxvw4x(58, %[cc2], %[buf]) \
|
||||
lxvw4x(59, %[cc3], %[buf]) \
|
||||
BYTESWAP(24) \
|
||||
BYTESWAP(25) \
|
||||
BYTESWAP(26) \
|
||||
BYTESWAP(27) \
|
||||
BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
|
||||
vxor(16, 16, 24) \
|
||||
vxor(17, 17, 25) \
|
||||
vxor(18, 18, 26) \
|
||||
vxor(19, 19, 27) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
BYTESWAP(18) \
|
||||
BYTESWAP(19) \
|
||||
stxvw4x(48, %[cc0], %[buf]) \
|
||||
stxvw4x(49, %[cc1], %[buf]) \
|
||||
stxvw4x(50, %[cc2], %[buf]) \
|
||||
stxvw4x(51, %[cc3], %[buf]) \
|
||||
\
|
||||
/* \
|
||||
* Update counters and data pointer. \
|
||||
*/ \
|
||||
vand(16, 20, 20) \
|
||||
vand(17, 21, 21) \
|
||||
vand(18, 22, 22) \
|
||||
vand(19, 23, 23) \
|
||||
addi(%[buf], %[buf], 64) \
|
||||
\
|
||||
bdnz(loop) \
|
||||
\
|
||||
/* \
|
||||
* Write back new counter values. \
|
||||
*/ \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
BYTESWAP(18) \
|
||||
BYTESWAP(19) \
|
||||
stxvw4x(48, %[cc0], %[ctrbuf]) \
|
||||
stxvw4x(49, %[cc1], %[ctrbuf]) \
|
||||
stxvw4x(50, %[cc2], %[ctrbuf]) \
|
||||
stxvw4x(51, %[cc3], %[ctrbuf]) \
|
||||
\
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf), \
|
||||
[cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
|
||||
: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
|
||||
[num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
|
||||
BYTESWAP_REG \
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
|
||||
"v30", "ctr", "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
MKCTR(128)
|
||||
MKCTR(192)
|
||||
MKCTR(256)
|
||||
|
||||
#define MKCBCMAC(size) \
|
||||
static void \
|
||||
cbcmac_ ## size(const unsigned char *sk, \
|
||||
unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
|
||||
{ \
|
||||
long cc; \
|
||||
\
|
||||
cc = 0; \
|
||||
asm volatile ( \
|
||||
\
|
||||
/* \
|
||||
* Load subkeys into v0..v10 \
|
||||
*/ \
|
||||
LOAD_SUBKEYS_ ## size \
|
||||
li(%[cc], 0) \
|
||||
\
|
||||
BYTESWAP_INIT \
|
||||
\
|
||||
/* \
|
||||
* Load current CBC-MAC value into v16. \
|
||||
*/ \
|
||||
lxvw4x(48, %[cc], %[cbcmac]) \
|
||||
BYTESWAP(16) \
|
||||
\
|
||||
mtctr(%[num_blocks]) \
|
||||
\
|
||||
label(loop) \
|
||||
/* \
|
||||
* Load next block, XOR into current CBC-MAC value, \
|
||||
* and then encrypt it. \
|
||||
*/ \
|
||||
lxvw4x(49, %[cc], %[buf]) \
|
||||
BYTESWAP(17) \
|
||||
vxor(16, 16, 17) \
|
||||
BLOCK_ENCRYPT_ ## size(16) \
|
||||
addi(%[buf], %[buf], 16) \
|
||||
\
|
||||
bdnz(loop) \
|
||||
\
|
||||
/* \
|
||||
* Write back new CBC-MAC value. \
|
||||
*/ \
|
||||
BYTESWAP(16) \
|
||||
stxvw4x(48, %[cc], %[cbcmac]) \
|
||||
\
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf) \
|
||||
: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
|
||||
BYTESWAP_REG \
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
|
||||
"v30", "ctr", "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
MKCBCMAC(128)
|
||||
MKCBCMAC(192)
|
||||
MKCBCMAC(256)
|
||||
|
||||
#define MKENCRYPT(size) \
|
||||
static void \
|
||||
ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
|
||||
unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
|
||||
size_t num_blocks) \
|
||||
{ \
|
||||
long cc; \
|
||||
\
|
||||
cc = 0; \
|
||||
asm volatile ( \
|
||||
\
|
||||
/* \
|
||||
* Load subkeys into v0..v10 \
|
||||
*/ \
|
||||
LOAD_SUBKEYS_ ## size \
|
||||
li(%[cc], 0) \
|
||||
\
|
||||
BYTESWAP_INIT \
|
||||
INCR_128_INIT \
|
||||
\
|
||||
/* \
|
||||
* Load current CTR counter into v16, and current \
|
||||
* CBC-MAC IV into v17. \
|
||||
*/ \
|
||||
lxvw4x(48, %[cc], %[ctr]) \
|
||||
lxvw4x(49, %[cc], %[cbcmac]) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
\
|
||||
/* \
|
||||
* At each iteration, we do two parallel encryption: \
|
||||
* - new counter value for encryption of the next block; \
|
||||
* - CBC-MAC over the previous encrypted block. \
|
||||
* Thus, each plaintext block implies two AES instances, \
|
||||
* over two successive iterations. This requires a single \
|
||||
* counter encryption before the loop, and a single \
|
||||
* CBC-MAC encryption after the loop. \
|
||||
*/ \
|
||||
\
|
||||
/* \
|
||||
* Encrypt first block (into v20). \
|
||||
*/ \
|
||||
lxvw4x(52, %[cc], %[buf]) \
|
||||
BYTESWAP(20) \
|
||||
INCR_128(22, 16) \
|
||||
BLOCK_ENCRYPT_ ## size(16) \
|
||||
vxor(20, 20, 16) \
|
||||
BYTESWAPX(21, 20) \
|
||||
stxvw4x(53, %[cc], %[buf]) \
|
||||
vand(16, 22, 22) \
|
||||
addi(%[buf], %[buf], 16) \
|
||||
\
|
||||
/* \
|
||||
* Load loop counter; skip the loop if there is only \
|
||||
* one block in total (already handled by the boundary \
|
||||
* conditions). \
|
||||
*/ \
|
||||
mtctr(%[num_blocks]) \
|
||||
bdz(fastexit) \
|
||||
\
|
||||
label(loop) \
|
||||
/* \
|
||||
* Upon loop entry: \
|
||||
* v16 counter value for next block \
|
||||
* v17 current CBC-MAC value \
|
||||
* v20 encrypted previous block \
|
||||
*/ \
|
||||
vxor(17, 17, 20) \
|
||||
INCR_128(22, 16) \
|
||||
lxvw4x(52, %[cc], %[buf]) \
|
||||
BYTESWAP(20) \
|
||||
BLOCK_ENCRYPT_X2_ ## size(16, 17) \
|
||||
vxor(20, 20, 16) \
|
||||
BYTESWAPX(21, 20) \
|
||||
stxvw4x(53, %[cc], %[buf]) \
|
||||
addi(%[buf], %[buf], 16) \
|
||||
vand(16, 22, 22) \
|
||||
\
|
||||
bdnz(loop) \
|
||||
\
|
||||
label(fastexit) \
|
||||
vxor(17, 17, 20) \
|
||||
BLOCK_ENCRYPT_ ## size(17) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
stxvw4x(48, %[cc], %[ctr]) \
|
||||
stxvw4x(49, %[cc], %[cbcmac]) \
|
||||
\
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf) \
|
||||
: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
|
||||
[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
|
||||
BYTESWAP_REG \
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
|
||||
"v30", "ctr", "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
MKENCRYPT(128)
|
||||
MKENCRYPT(192)
|
||||
MKENCRYPT(256)
|
||||
|
||||
#define MKDECRYPT(size) \
|
||||
static void \
|
||||
ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
|
||||
unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
|
||||
size_t num_blocks) \
|
||||
{ \
|
||||
long cc; \
|
||||
\
|
||||
cc = 0; \
|
||||
asm volatile ( \
|
||||
\
|
||||
/* \
|
||||
* Load subkeys into v0..v10 \
|
||||
*/ \
|
||||
LOAD_SUBKEYS_ ## size \
|
||||
li(%[cc], 0) \
|
||||
\
|
||||
BYTESWAP_INIT \
|
||||
INCR_128_INIT \
|
||||
\
|
||||
/* \
|
||||
* Load current CTR counter into v16, and current \
|
||||
* CBC-MAC IV into v17. \
|
||||
*/ \
|
||||
lxvw4x(48, %[cc], %[ctr]) \
|
||||
lxvw4x(49, %[cc], %[cbcmac]) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
\
|
||||
/* \
|
||||
* At each iteration, we do two parallel encryption: \
|
||||
* - new counter value for decryption of the next block; \
|
||||
* - CBC-MAC over the next encrypted block. \
|
||||
* Each iteration performs the two AES instances related \
|
||||
* to the current block; there is thus no need for some \
|
||||
* extra pre-loop and post-loop work as in encryption. \
|
||||
*/ \
|
||||
\
|
||||
mtctr(%[num_blocks]) \
|
||||
\
|
||||
label(loop) \
|
||||
/* \
|
||||
* Upon loop entry: \
|
||||
* v16 counter value for next block \
|
||||
* v17 current CBC-MAC value \
|
||||
*/ \
|
||||
lxvw4x(52, %[cc], %[buf]) \
|
||||
BYTESWAP(20) \
|
||||
vxor(17, 17, 20) \
|
||||
INCR_128(22, 16) \
|
||||
BLOCK_ENCRYPT_X2_ ## size(16, 17) \
|
||||
vxor(20, 20, 16) \
|
||||
BYTESWAPX(21, 20) \
|
||||
stxvw4x(53, %[cc], %[buf]) \
|
||||
addi(%[buf], %[buf], 16) \
|
||||
vand(16, 22, 22) \
|
||||
\
|
||||
bdnz(loop) \
|
||||
\
|
||||
/* \
|
||||
* Store back counter and CBC-MAC value. \
|
||||
*/ \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
stxvw4x(48, %[cc], %[ctr]) \
|
||||
stxvw4x(49, %[cc], %[cbcmac]) \
|
||||
\
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf) \
|
||||
: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
|
||||
[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
|
||||
BYTESWAP_REG \
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
|
||||
"v30", "ctr", "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
MKDECRYPT(128)
|
||||
MKDECRYPT(192)
|
||||
MKDECRYPT(256)
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
case 12:
|
||||
ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
default:
|
||||
ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
case 12:
|
||||
ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
default:
|
||||
ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
incr_ctr(void *dst, const void *src)
|
||||
{
|
||||
uint64_t hi, lo;
|
||||
|
||||
hi = br_dec64be(src);
|
||||
lo = br_dec64be((const unsigned char *)src + 8);
|
||||
lo ++;
|
||||
hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
|
||||
br_enc64be(dst, hi);
|
||||
br_enc64be((unsigned char *)dst + 8, lo);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *data, size_t len)
|
||||
{
|
||||
unsigned char ctrbuf[64];
|
||||
|
||||
memcpy(ctrbuf, ctr, 16);
|
||||
incr_ctr(ctrbuf + 16, ctrbuf);
|
||||
incr_ctr(ctrbuf + 32, ctrbuf + 16);
|
||||
incr_ctr(ctrbuf + 48, ctrbuf + 32);
|
||||
if (len >= 64) {
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
|
||||
break;
|
||||
case 12:
|
||||
ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
|
||||
break;
|
||||
default:
|
||||
ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
|
||||
break;
|
||||
}
|
||||
data = (unsigned char *)data + (len & ~(size_t)63);
|
||||
len &= 63;
|
||||
}
|
||||
if (len > 0) {
|
||||
unsigned char tmp[64];
|
||||
|
||||
if (len >= 32) {
|
||||
if (len >= 48) {
|
||||
memcpy(ctr, ctrbuf + 48, 16);
|
||||
} else {
|
||||
memcpy(ctr, ctrbuf + 32, 16);
|
||||
}
|
||||
} else {
|
||||
if (len >= 16) {
|
||||
memcpy(ctr, ctrbuf + 16, 16);
|
||||
}
|
||||
}
|
||||
memcpy(tmp, data, len);
|
||||
memset(tmp + len, 0, (sizeof tmp) - len);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
|
||||
break;
|
||||
case 12:
|
||||
ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
|
||||
break;
|
||||
default:
|
||||
ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
|
||||
break;
|
||||
}
|
||||
memcpy(data, tmp, len);
|
||||
} else {
|
||||
memcpy(ctr, ctrbuf, 16);
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *cbcmac, const void *data, size_t len)
|
||||
{
|
||||
if (len > 0) {
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
|
||||
break;
|
||||
case 12:
|
||||
cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
|
||||
break;
|
||||
default:
|
||||
cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
|
||||
sizeof(br_aes_pwr8_ctrcbc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_init,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_encrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_decrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_ctr,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, const void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_mac
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class *
|
||||
br_aes_pwr8_ctrcbc_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_small_cbcdec_vtable;
|
||||
ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
|
||||
ivbuf = iv;
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
unsigned char tmp[16];
|
||||
int i;
|
||||
|
||||
memcpy(tmp, buf, 16);
|
||||
br_aes_small_decrypt(ctx->num_rounds, ctx->skey, buf);
|
||||
for (i = 0; i < 16; i ++) {
|
||||
buf[i] ^= ivbuf[i];
|
||||
}
|
||||
memcpy(ivbuf, tmp, 16);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_aes_small_cbcdec_vtable = {
|
||||
sizeof(br_aes_small_cbcdec_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_aes_small_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_aes_small_cbcdec_run
|
||||
};
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_cbcenc_init(br_aes_small_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_small_cbcenc_vtable;
|
||||
ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_cbcenc_run(const br_aes_small_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
|
||||
ivbuf = iv;
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++) {
|
||||
buf[i] ^= ivbuf[i];
|
||||
}
|
||||
br_aes_small_encrypt(ctx->num_rounds, ctx->skey, buf);
|
||||
memcpy(ivbuf, buf, 16);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_aes_small_cbcenc_vtable = {
|
||||
sizeof(br_aes_small_cbcenc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_aes_small_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_aes_small_cbcenc_run
|
||||
};
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_small_ctr_vtable;
|
||||
ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
xorbuf(void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
*d ++ ^= *s ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
uint32_t
|
||||
br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
unsigned char tmp[16];
|
||||
|
||||
memcpy(tmp, iv, 12);
|
||||
br_enc32be(tmp + 12, cc ++);
|
||||
br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
|
||||
if (len <= 16) {
|
||||
xorbuf(buf, tmp, len);
|
||||
break;
|
||||
}
|
||||
xorbuf(buf, tmp, 16);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class br_aes_small_ctr_vtable = {
|
||||
sizeof(br_aes_small_ctr_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctr_class **, const void *, size_t))
|
||||
&br_aes_small_ctr_init,
|
||||
(uint32_t (*)(const br_block_ctr_class *const *,
|
||||
const void *, uint32_t, void *, size_t))
|
||||
&br_aes_small_ctr_run
|
||||
};
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_small_ctrcbc_vtable;
|
||||
ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
xorbuf(void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
*d ++ ^= *s ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx,
|
||||
void *ctr, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *bctr;
|
||||
uint32_t cc0, cc1, cc2, cc3;
|
||||
|
||||
buf = data;
|
||||
bctr = ctr;
|
||||
cc3 = br_dec32be(bctr + 0);
|
||||
cc2 = br_dec32be(bctr + 4);
|
||||
cc1 = br_dec32be(bctr + 8);
|
||||
cc0 = br_dec32be(bctr + 12);
|
||||
while (len > 0) {
|
||||
unsigned char tmp[16];
|
||||
uint32_t carry;
|
||||
|
||||
br_enc32be(tmp + 0, cc3);
|
||||
br_enc32be(tmp + 4, cc2);
|
||||
br_enc32be(tmp + 8, cc1);
|
||||
br_enc32be(tmp + 12, cc0);
|
||||
br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
|
||||
xorbuf(buf, tmp, 16);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
cc0 ++;
|
||||
carry = (~(cc0 | -cc0)) >> 31;
|
||||
cc1 += carry;
|
||||
carry &= (~(cc1 | -cc1)) >> 31;
|
||||
cc2 += carry;
|
||||
carry &= (~(cc2 | -cc2)) >> 31;
|
||||
cc3 += carry;
|
||||
}
|
||||
br_enc32be(bctr + 0, cc3);
|
||||
br_enc32be(bctr + 4, cc2);
|
||||
br_enc32be(bctr + 8, cc1);
|
||||
br_enc32be(bctr + 12, cc0);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx,
|
||||
void *cbcmac, const void *data, size_t len)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
xorbuf(cbcmac, buf, 16);
|
||||
br_aes_small_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
|
||||
br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
|
||||
br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable = {
|
||||
sizeof(br_aes_small_ctrcbc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
|
||||
&br_aes_small_ctrcbc_init,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_small_ctrcbc_encrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_small_ctrcbc_decrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, size_t))
|
||||
&br_aes_small_ctrcbc_ctr,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, const void *, size_t))
|
||||
&br_aes_small_ctrcbc_mac
|
||||
};
|
||||
|
|
@ -0,0 +1,176 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* Inverse S-box.
|
||||
*/
|
||||
static const unsigned char iS[] = {
|
||||
0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
|
||||
0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
|
||||
0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
|
||||
0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
|
||||
0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
|
||||
0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
|
||||
0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
|
||||
0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
|
||||
0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
|
||||
0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
|
||||
0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
|
||||
0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
|
||||
0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
|
||||
0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
|
||||
0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
|
||||
0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
|
||||
0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
|
||||
0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
|
||||
0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
|
||||
0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
|
||||
0x55, 0x21, 0x0C, 0x7D
|
||||
};
|
||||
|
||||
static void
|
||||
add_round_key(unsigned *state, const uint32_t *skeys)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i += 4) {
|
||||
uint32_t k;
|
||||
|
||||
k = *skeys ++;
|
||||
state[i + 0] ^= (unsigned)(k >> 24);
|
||||
state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
|
||||
state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
|
||||
state[i + 3] ^= (unsigned)k & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
inv_sub_bytes(unsigned *state)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++) {
|
||||
state[i] = iS[state[i]];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
inv_shift_rows(unsigned *state)
|
||||
{
|
||||
unsigned tmp;
|
||||
|
||||
tmp = state[13];
|
||||
state[13] = state[9];
|
||||
state[9] = state[5];
|
||||
state[5] = state[1];
|
||||
state[1] = tmp;
|
||||
|
||||
tmp = state[2];
|
||||
state[2] = state[10];
|
||||
state[10] = tmp;
|
||||
tmp = state[6];
|
||||
state[6] = state[14];
|
||||
state[14] = tmp;
|
||||
|
||||
tmp = state[3];
|
||||
state[3] = state[7];
|
||||
state[7] = state[11];
|
||||
state[11] = state[15];
|
||||
state[15] = tmp;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
gf256red(unsigned x)
|
||||
{
|
||||
unsigned y;
|
||||
|
||||
y = x >> 8;
|
||||
return (x ^ y ^ (y << 1) ^ (y << 3) ^ (y << 4)) & 0xFF;
|
||||
}
|
||||
|
||||
static void
|
||||
inv_mix_columns(unsigned *state)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i += 4) {
|
||||
unsigned s0, s1, s2, s3;
|
||||
unsigned t0, t1, t2, t3;
|
||||
|
||||
s0 = state[i + 0];
|
||||
s1 = state[i + 1];
|
||||
s2 = state[i + 2];
|
||||
s3 = state[i + 3];
|
||||
t0 = (s0 << 1) ^ (s0 << 2) ^ (s0 << 3)
|
||||
^ s1 ^ (s1 << 1) ^ (s1 << 3)
|
||||
^ s2 ^ (s2 << 2) ^ (s2 << 3)
|
||||
^ s3 ^ (s3 << 3);
|
||||
t1 = s0 ^ (s0 << 3)
|
||||
^ (s1 << 1) ^ (s1 << 2) ^ (s1 << 3)
|
||||
^ s2 ^ (s2 << 1) ^ (s2 << 3)
|
||||
^ s3 ^ (s3 << 2) ^ (s3 << 3);
|
||||
t2 = s0 ^ (s0 << 2) ^ (s0 << 3)
|
||||
^ s1 ^ (s1 << 3)
|
||||
^ (s2 << 1) ^ (s2 << 2) ^ (s2 << 3)
|
||||
^ s3 ^ (s3 << 1) ^ (s3 << 3);
|
||||
t3 = s0 ^ (s0 << 1) ^ (s0 << 3)
|
||||
^ s1 ^ (s1 << 2) ^ (s1 << 3)
|
||||
^ s2 ^ (s2 << 3)
|
||||
^ (s3 << 1) ^ (s3 << 2) ^ (s3 << 3);
|
||||
state[i + 0] = gf256red(t0);
|
||||
state[i + 1] = gf256red(t1);
|
||||
state[i + 2] = gf256red(t2);
|
||||
state[i + 3] = gf256red(t3);
|
||||
}
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_small_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned state[16];
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
for (u = 0; u < 16; u ++) {
|
||||
state[u] = buf[u];
|
||||
}
|
||||
add_round_key(state, skey + (num_rounds << 2));
|
||||
for (u = num_rounds - 1; u > 0; u --) {
|
||||
inv_shift_rows(state);
|
||||
inv_sub_bytes(state);
|
||||
add_round_key(state, skey + (u << 2));
|
||||
inv_mix_columns(state);
|
||||
}
|
||||
inv_shift_rows(state);
|
||||
inv_sub_bytes(state);
|
||||
add_round_key(state, skey);
|
||||
for (u = 0; u < 16; u ++) {
|
||||
buf[u] = state[u];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
#define S br_aes_S
|
||||
|
||||
static void
|
||||
add_round_key(unsigned *state, const uint32_t *skeys)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i += 4) {
|
||||
uint32_t k;
|
||||
|
||||
k = *skeys ++;
|
||||
state[i + 0] ^= (unsigned)(k >> 24);
|
||||
state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
|
||||
state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
|
||||
state[i + 3] ^= (unsigned)k & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
sub_bytes(unsigned *state)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++) {
|
||||
state[i] = S[state[i]];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
shift_rows(unsigned *state)
|
||||
{
|
||||
unsigned tmp;
|
||||
|
||||
tmp = state[1];
|
||||
state[1] = state[5];
|
||||
state[5] = state[9];
|
||||
state[9] = state[13];
|
||||
state[13] = tmp;
|
||||
|
||||
tmp = state[2];
|
||||
state[2] = state[10];
|
||||
state[10] = tmp;
|
||||
tmp = state[6];
|
||||
state[6] = state[14];
|
||||
state[14] = tmp;
|
||||
|
||||
tmp = state[15];
|
||||
state[15] = state[11];
|
||||
state[11] = state[7];
|
||||
state[7] = state[3];
|
||||
state[3] = tmp;
|
||||
}
|
||||
|
||||
static void
|
||||
mix_columns(unsigned *state)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i += 4) {
|
||||
unsigned s0, s1, s2, s3;
|
||||
unsigned t0, t1, t2, t3;
|
||||
|
||||
s0 = state[i + 0];
|
||||
s1 = state[i + 1];
|
||||
s2 = state[i + 2];
|
||||
s3 = state[i + 3];
|
||||
t0 = (s0 << 1) ^ s1 ^ (s1 << 1) ^ s2 ^ s3;
|
||||
t1 = s0 ^ (s1 << 1) ^ s2 ^ (s2 << 1) ^ s3;
|
||||
t2 = s0 ^ s1 ^ (s2 << 1) ^ s3 ^ (s3 << 1);
|
||||
t3 = s0 ^ (s0 << 1) ^ s1 ^ s2 ^ (s3 << 1);
|
||||
state[i + 0] = t0 ^ ((unsigned)(-(int)(t0 >> 8)) & 0x11B);
|
||||
state[i + 1] = t1 ^ ((unsigned)(-(int)(t1 >> 8)) & 0x11B);
|
||||
state[i + 2] = t2 ^ ((unsigned)(-(int)(t2 >> 8)) & 0x11B);
|
||||
state[i + 3] = t3 ^ ((unsigned)(-(int)(t3 >> 8)) & 0x11B);
|
||||
}
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_aes_small_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned state[16];
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
for (u = 0; u < 16; u ++) {
|
||||
state[u] = buf[u];
|
||||
}
|
||||
add_round_key(state, skey);
|
||||
for (u = 1; u < num_rounds; u ++) {
|
||||
sub_bytes(state);
|
||||
shift_rows(state);
|
||||
mix_columns(state);
|
||||
add_round_key(state, skey + (u << 2));
|
||||
}
|
||||
sub_bytes(state);
|
||||
shift_rows(state);
|
||||
add_round_key(state, skey + (num_rounds << 2));
|
||||
for (u = 0; u < 16; u ++) {
|
||||
buf[u] = state[u];
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,240 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_ENABLE_INTRINSICS 1
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* This code contains the AES key schedule implementation using the
|
||||
* AES-NI opcodes.
|
||||
*/
|
||||
|
||||
#if BR_AES_X86NI
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
br_aes_x86ni_supported(void)
|
||||
{
|
||||
/*
|
||||
* Bit mask for features in ECX:
|
||||
* 19 SSE4.1 (used for _mm_insert_epi32(), for AES-CTR)
|
||||
* 25 AES-NI
|
||||
*/
|
||||
return br_cpuid(0, 0, 0x02080000, 0);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_UP
|
||||
|
||||
BR_TARGET("sse2,aes")
|
||||
static inline __m128i
|
||||
expand_step128(__m128i k, __m128i k2)
|
||||
{
|
||||
k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
|
||||
k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
|
||||
k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
|
||||
k2 = _mm_shuffle_epi32(k2, 0xFF);
|
||||
return _mm_xor_si128(k, k2);
|
||||
}
|
||||
|
||||
BR_TARGET("sse2,aes")
|
||||
static inline void
|
||||
expand_step192(__m128i *t1, __m128i *t2, __m128i *t3)
|
||||
{
|
||||
__m128i t4;
|
||||
|
||||
*t2 = _mm_shuffle_epi32(*t2, 0x55);
|
||||
t4 = _mm_slli_si128(*t1, 0x4);
|
||||
*t1 = _mm_xor_si128(*t1, t4);
|
||||
t4 = _mm_slli_si128(t4, 0x4);
|
||||
*t1 = _mm_xor_si128(*t1, t4);
|
||||
t4 = _mm_slli_si128(t4, 0x4);
|
||||
*t1 = _mm_xor_si128(*t1, t4);
|
||||
*t1 = _mm_xor_si128(*t1, *t2);
|
||||
*t2 = _mm_shuffle_epi32(*t1, 0xFF);
|
||||
t4 = _mm_slli_si128(*t3, 0x4);
|
||||
*t3 = _mm_xor_si128(*t3, t4);
|
||||
*t3 = _mm_xor_si128(*t3, *t2);
|
||||
}
|
||||
|
||||
BR_TARGET("sse2,aes")
|
||||
static inline void
|
||||
expand_step256_1(__m128i *t1, __m128i *t2)
|
||||
{
|
||||
__m128i t4;
|
||||
|
||||
*t2 = _mm_shuffle_epi32(*t2, 0xFF);
|
||||
t4 = _mm_slli_si128(*t1, 0x4);
|
||||
*t1 = _mm_xor_si128(*t1, t4);
|
||||
t4 = _mm_slli_si128(t4, 0x4);
|
||||
*t1 = _mm_xor_si128(*t1, t4);
|
||||
t4 = _mm_slli_si128(t4, 0x4);
|
||||
*t1 = _mm_xor_si128(*t1, t4);
|
||||
*t1 = _mm_xor_si128(*t1, *t2);
|
||||
}
|
||||
|
||||
BR_TARGET("sse2,aes")
|
||||
static inline void
|
||||
expand_step256_2(__m128i *t1, __m128i *t3)
|
||||
{
|
||||
__m128i t2, t4;
|
||||
|
||||
t4 = _mm_aeskeygenassist_si128(*t1, 0x0);
|
||||
t2 = _mm_shuffle_epi32(t4, 0xAA);
|
||||
t4 = _mm_slli_si128(*t3, 0x4);
|
||||
*t3 = _mm_xor_si128(*t3, t4);
|
||||
t4 = _mm_slli_si128(t4, 0x4);
|
||||
*t3 = _mm_xor_si128(*t3, t4);
|
||||
t4 = _mm_slli_si128(t4, 0x4);
|
||||
*t3 = _mm_xor_si128(*t3, t4);
|
||||
*t3 = _mm_xor_si128(*t3, t2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform key schedule for AES, encryption direction. Subkeys are written
|
||||
* in sk[], and the number of rounds is returned. Key length MUST be 16,
|
||||
* 24 or 32 bytes.
|
||||
*/
|
||||
BR_TARGET("sse2,aes")
|
||||
static unsigned
|
||||
x86ni_keysched(__m128i *sk, const void *key, size_t len)
|
||||
{
|
||||
const unsigned char *kb;
|
||||
|
||||
#define KEXP128(k, i, rcon) do { \
|
||||
k = expand_step128(k, _mm_aeskeygenassist_si128(k, rcon)); \
|
||||
sk[i] = k; \
|
||||
} while (0)
|
||||
|
||||
#define KEXP192(i, rcon1, rcon2) do { \
|
||||
sk[(i) + 0] = t1; \
|
||||
sk[(i) + 1] = t3; \
|
||||
t2 = _mm_aeskeygenassist_si128(t3, rcon1); \
|
||||
expand_step192(&t1, &t2, &t3); \
|
||||
sk[(i) + 1] = _mm_castpd_si128(_mm_shuffle_pd( \
|
||||
_mm_castsi128_pd(sk[(i) + 1]), \
|
||||
_mm_castsi128_pd(t1), 0)); \
|
||||
sk[(i) + 2] = _mm_castpd_si128(_mm_shuffle_pd( \
|
||||
_mm_castsi128_pd(t1), \
|
||||
_mm_castsi128_pd(t3), 1)); \
|
||||
t2 = _mm_aeskeygenassist_si128(t3, rcon2); \
|
||||
expand_step192(&t1, &t2, &t3); \
|
||||
} while (0)
|
||||
|
||||
#define KEXP256(i, rcon) do { \
|
||||
sk[(i) + 0] = t3; \
|
||||
t2 = _mm_aeskeygenassist_si128(t3, rcon); \
|
||||
expand_step256_1(&t1, &t2); \
|
||||
sk[(i) + 1] = t1; \
|
||||
expand_step256_2(&t1, &t3); \
|
||||
} while (0)
|
||||
|
||||
kb = key;
|
||||
switch (len) {
|
||||
__m128i t1, t2, t3;
|
||||
|
||||
case 16:
|
||||
t1 = _mm_loadu_si128((const void *)kb);
|
||||
sk[0] = t1;
|
||||
KEXP128(t1, 1, 0x01);
|
||||
KEXP128(t1, 2, 0x02);
|
||||
KEXP128(t1, 3, 0x04);
|
||||
KEXP128(t1, 4, 0x08);
|
||||
KEXP128(t1, 5, 0x10);
|
||||
KEXP128(t1, 6, 0x20);
|
||||
KEXP128(t1, 7, 0x40);
|
||||
KEXP128(t1, 8, 0x80);
|
||||
KEXP128(t1, 9, 0x1B);
|
||||
KEXP128(t1, 10, 0x36);
|
||||
return 10;
|
||||
|
||||
case 24:
|
||||
t1 = _mm_loadu_si128((const void *)kb);
|
||||
t3 = _mm_loadu_si128((const void *)(kb + 8));
|
||||
t3 = _mm_shuffle_epi32(t3, 0x4E);
|
||||
KEXP192(0, 0x01, 0x02);
|
||||
KEXP192(3, 0x04, 0x08);
|
||||
KEXP192(6, 0x10, 0x20);
|
||||
KEXP192(9, 0x40, 0x80);
|
||||
sk[12] = t1;
|
||||
return 12;
|
||||
|
||||
case 32:
|
||||
t1 = _mm_loadu_si128((const void *)kb);
|
||||
t3 = _mm_loadu_si128((const void *)(kb + 16));
|
||||
sk[0] = t1;
|
||||
KEXP256( 1, 0x01);
|
||||
KEXP256( 3, 0x02);
|
||||
KEXP256( 5, 0x04);
|
||||
KEXP256( 7, 0x08);
|
||||
KEXP256( 9, 0x10);
|
||||
KEXP256(11, 0x20);
|
||||
sk[13] = t3;
|
||||
t2 = _mm_aeskeygenassist_si128(t3, 0x40);
|
||||
expand_step256_1(&t1, &t2);
|
||||
sk[14] = t1;
|
||||
return 14;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef KEXP128
|
||||
#undef KEXP192
|
||||
#undef KEXP256
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
BR_TARGET("sse2,aes")
|
||||
unsigned
|
||||
br_aes_x86ni_keysched_enc(unsigned char *skni, const void *key, size_t len)
|
||||
{
|
||||
__m128i sk[15];
|
||||
unsigned num_rounds;
|
||||
|
||||
num_rounds = x86ni_keysched(sk, key, len);
|
||||
memcpy(skni, sk, (num_rounds + 1) << 4);
|
||||
return num_rounds;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
BR_TARGET("sse2,aes")
|
||||
unsigned
|
||||
br_aes_x86ni_keysched_dec(unsigned char *skni, const void *key, size_t len)
|
||||
{
|
||||
__m128i sk[15];
|
||||
unsigned u, num_rounds;
|
||||
|
||||
num_rounds = x86ni_keysched(sk, key, len);
|
||||
_mm_storeu_si128((void *)skni, sk[num_rounds]);
|
||||
for (u = 1; u < num_rounds; u ++) {
|
||||
_mm_storeu_si128((void *)(skni + (u << 4)),
|
||||
_mm_aesimc_si128(sk[num_rounds - u]));
|
||||
}
|
||||
_mm_storeu_si128((void *)(skni + (num_rounds << 4)), sk[0]);
|
||||
return num_rounds;
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_DOWN
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,223 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_ENABLE_INTRINSICS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_AES_X86NI
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class *
|
||||
br_aes_x86ni_cbcdec_get_vtable(void)
|
||||
{
|
||||
return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcdec_vtable : NULL;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_x86ni_cbcdec_vtable;
|
||||
ctx->num_rounds = br_aes_x86ni_keysched_dec(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_UP
|
||||
|
||||
/* see bearssl_block.h */
|
||||
BR_TARGET("sse2,aes")
|
||||
void
|
||||
br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned num_rounds;
|
||||
__m128i sk[15], ivx;
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
ivx = _mm_loadu_si128(iv);
|
||||
num_rounds = ctx->num_rounds;
|
||||
for (u = 0; u <= num_rounds; u ++) {
|
||||
sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
|
||||
}
|
||||
while (len > 0) {
|
||||
__m128i x0, x1, x2, x3, e0, e1, e2, e3;
|
||||
|
||||
x0 = _mm_loadu_si128((void *)(buf + 0));
|
||||
if (len >= 64) {
|
||||
x1 = _mm_loadu_si128((void *)(buf + 16));
|
||||
x2 = _mm_loadu_si128((void *)(buf + 32));
|
||||
x3 = _mm_loadu_si128((void *)(buf + 48));
|
||||
} else {
|
||||
x0 = _mm_loadu_si128((void *)(buf + 0));
|
||||
if (len >= 32) {
|
||||
x1 = _mm_loadu_si128((void *)(buf + 16));
|
||||
if (len >= 48) {
|
||||
x2 = _mm_loadu_si128(
|
||||
(void *)(buf + 32));
|
||||
x3 = x2;
|
||||
} else {
|
||||
x2 = x0;
|
||||
x3 = x1;
|
||||
}
|
||||
} else {
|
||||
x1 = x0;
|
||||
x2 = x0;
|
||||
x3 = x0;
|
||||
}
|
||||
}
|
||||
e0 = x0;
|
||||
e1 = x1;
|
||||
e2 = x2;
|
||||
e3 = x3;
|
||||
x0 = _mm_xor_si128(x0, sk[0]);
|
||||
x1 = _mm_xor_si128(x1, sk[0]);
|
||||
x2 = _mm_xor_si128(x2, sk[0]);
|
||||
x3 = _mm_xor_si128(x3, sk[0]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[1]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[1]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[1]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[1]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[2]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[2]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[2]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[2]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[3]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[3]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[3]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[3]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[4]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[4]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[4]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[4]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[5]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[5]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[5]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[5]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[6]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[6]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[6]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[6]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[7]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[7]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[7]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[7]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[8]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[8]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[8]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[8]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[9]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[9]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[9]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[9]);
|
||||
if (num_rounds == 10) {
|
||||
x0 = _mm_aesdeclast_si128(x0, sk[10]);
|
||||
x1 = _mm_aesdeclast_si128(x1, sk[10]);
|
||||
x2 = _mm_aesdeclast_si128(x2, sk[10]);
|
||||
x3 = _mm_aesdeclast_si128(x3, sk[10]);
|
||||
} else if (num_rounds == 12) {
|
||||
x0 = _mm_aesdec_si128(x0, sk[10]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[10]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[10]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[10]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[11]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[11]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[11]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[11]);
|
||||
x0 = _mm_aesdeclast_si128(x0, sk[12]);
|
||||
x1 = _mm_aesdeclast_si128(x1, sk[12]);
|
||||
x2 = _mm_aesdeclast_si128(x2, sk[12]);
|
||||
x3 = _mm_aesdeclast_si128(x3, sk[12]);
|
||||
} else {
|
||||
x0 = _mm_aesdec_si128(x0, sk[10]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[10]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[10]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[10]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[11]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[11]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[11]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[11]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[12]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[12]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[12]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[12]);
|
||||
x0 = _mm_aesdec_si128(x0, sk[13]);
|
||||
x1 = _mm_aesdec_si128(x1, sk[13]);
|
||||
x2 = _mm_aesdec_si128(x2, sk[13]);
|
||||
x3 = _mm_aesdec_si128(x3, sk[13]);
|
||||
x0 = _mm_aesdeclast_si128(x0, sk[14]);
|
||||
x1 = _mm_aesdeclast_si128(x1, sk[14]);
|
||||
x2 = _mm_aesdeclast_si128(x2, sk[14]);
|
||||
x3 = _mm_aesdeclast_si128(x3, sk[14]);
|
||||
}
|
||||
x0 = _mm_xor_si128(x0, ivx);
|
||||
x1 = _mm_xor_si128(x1, e0);
|
||||
x2 = _mm_xor_si128(x2, e1);
|
||||
x3 = _mm_xor_si128(x3, e2);
|
||||
ivx = e3;
|
||||
_mm_storeu_si128((void *)(buf + 0), x0);
|
||||
if (len >= 64) {
|
||||
_mm_storeu_si128((void *)(buf + 16), x1);
|
||||
_mm_storeu_si128((void *)(buf + 32), x2);
|
||||
_mm_storeu_si128((void *)(buf + 48), x3);
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
} else {
|
||||
if (len >= 32) {
|
||||
_mm_storeu_si128((void *)(buf + 16), x1);
|
||||
if (len >= 48) {
|
||||
_mm_storeu_si128(
|
||||
(void *)(buf + 32), x2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
_mm_storeu_si128(iv, ivx);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_DOWN
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable = {
|
||||
sizeof(br_aes_x86ni_cbcdec_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_aes_x86ni_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_aes_x86ni_cbcdec_run
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class *
|
||||
br_aes_x86ni_cbcdec_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_ENABLE_INTRINSICS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_AES_X86NI
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class *
|
||||
br_aes_x86ni_cbcenc_get_vtable(void)
|
||||
{
|
||||
return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_x86ni_cbcenc_vtable;
|
||||
ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_UP
|
||||
|
||||
/* see bearssl_block.h */
|
||||
BR_TARGET("sse2,aes")
|
||||
void
|
||||
br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned num_rounds;
|
||||
__m128i sk[15], ivx;
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
ivx = _mm_loadu_si128(iv);
|
||||
num_rounds = ctx->num_rounds;
|
||||
for (u = 0; u <= num_rounds; u ++) {
|
||||
sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
|
||||
}
|
||||
while (len > 0) {
|
||||
__m128i x;
|
||||
|
||||
x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
|
||||
x = _mm_xor_si128(x, sk[0]);
|
||||
x = _mm_aesenc_si128(x, sk[1]);
|
||||
x = _mm_aesenc_si128(x, sk[2]);
|
||||
x = _mm_aesenc_si128(x, sk[3]);
|
||||
x = _mm_aesenc_si128(x, sk[4]);
|
||||
x = _mm_aesenc_si128(x, sk[5]);
|
||||
x = _mm_aesenc_si128(x, sk[6]);
|
||||
x = _mm_aesenc_si128(x, sk[7]);
|
||||
x = _mm_aesenc_si128(x, sk[8]);
|
||||
x = _mm_aesenc_si128(x, sk[9]);
|
||||
if (num_rounds == 10) {
|
||||
x = _mm_aesenclast_si128(x, sk[10]);
|
||||
} else if (num_rounds == 12) {
|
||||
x = _mm_aesenc_si128(x, sk[10]);
|
||||
x = _mm_aesenc_si128(x, sk[11]);
|
||||
x = _mm_aesenclast_si128(x, sk[12]);
|
||||
} else {
|
||||
x = _mm_aesenc_si128(x, sk[10]);
|
||||
x = _mm_aesenc_si128(x, sk[11]);
|
||||
x = _mm_aesenc_si128(x, sk[12]);
|
||||
x = _mm_aesenc_si128(x, sk[13]);
|
||||
x = _mm_aesenclast_si128(x, sk[14]);
|
||||
}
|
||||
ivx = x;
|
||||
_mm_storeu_si128((void *)buf, x);
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
_mm_storeu_si128(iv, ivx);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_DOWN
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = {
|
||||
sizeof(br_aes_x86ni_cbcenc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_aes_x86ni_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_aes_x86ni_cbcenc_run
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class *
|
||||
br_aes_x86ni_cbcenc_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_ENABLE_INTRINSICS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_AES_X86NI
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class *
|
||||
br_aes_x86ni_ctr_get_vtable(void)
|
||||
{
|
||||
return br_aes_x86ni_supported() ? &br_aes_x86ni_ctr_vtable : NULL;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_x86ni_ctr_vtable;
|
||||
ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_UP
|
||||
|
||||
/* see bearssl_block.h */
|
||||
BR_TARGET("sse2,sse4.1,aes")
|
||||
uint32_t
|
||||
br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned char ivbuf[16];
|
||||
unsigned num_rounds;
|
||||
__m128i sk[15];
|
||||
__m128i ivx;
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
memcpy(ivbuf, iv, 12);
|
||||
num_rounds = ctx->num_rounds;
|
||||
for (u = 0; u <= num_rounds; u ++) {
|
||||
sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
|
||||
}
|
||||
ivx = _mm_loadu_si128((void *)ivbuf);
|
||||
while (len > 0) {
|
||||
__m128i x0, x1, x2, x3;
|
||||
|
||||
x0 = _mm_insert_epi32(ivx, br_bswap32(cc + 0), 3);
|
||||
x1 = _mm_insert_epi32(ivx, br_bswap32(cc + 1), 3);
|
||||
x2 = _mm_insert_epi32(ivx, br_bswap32(cc + 2), 3);
|
||||
x3 = _mm_insert_epi32(ivx, br_bswap32(cc + 3), 3);
|
||||
x0 = _mm_xor_si128(x0, sk[0]);
|
||||
x1 = _mm_xor_si128(x1, sk[0]);
|
||||
x2 = _mm_xor_si128(x2, sk[0]);
|
||||
x3 = _mm_xor_si128(x3, sk[0]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[1]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[1]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[1]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[1]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[2]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[2]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[2]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[2]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[3]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[3]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[3]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[3]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[4]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[4]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[4]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[4]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[5]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[5]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[5]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[5]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[6]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[6]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[6]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[6]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[7]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[7]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[7]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[7]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[8]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[8]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[8]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[8]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[9]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[9]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[9]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[9]);
|
||||
if (num_rounds == 10) {
|
||||
x0 = _mm_aesenclast_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[10]);
|
||||
x2 = _mm_aesenclast_si128(x2, sk[10]);
|
||||
x3 = _mm_aesenclast_si128(x3, sk[10]);
|
||||
} else if (num_rounds == 12) {
|
||||
x0 = _mm_aesenc_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[10]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[10]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[10]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[11]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[11]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[11]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[11]);
|
||||
x0 = _mm_aesenclast_si128(x0, sk[12]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[12]);
|
||||
x2 = _mm_aesenclast_si128(x2, sk[12]);
|
||||
x3 = _mm_aesenclast_si128(x3, sk[12]);
|
||||
} else {
|
||||
x0 = _mm_aesenc_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[10]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[10]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[10]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[11]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[11]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[11]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[11]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[12]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[12]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[12]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[12]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[13]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[13]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[13]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[13]);
|
||||
x0 = _mm_aesenclast_si128(x0, sk[14]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[14]);
|
||||
x2 = _mm_aesenclast_si128(x2, sk[14]);
|
||||
x3 = _mm_aesenclast_si128(x3, sk[14]);
|
||||
}
|
||||
if (len >= 64) {
|
||||
x0 = _mm_xor_si128(x0,
|
||||
_mm_loadu_si128((void *)(buf + 0)));
|
||||
x1 = _mm_xor_si128(x1,
|
||||
_mm_loadu_si128((void *)(buf + 16)));
|
||||
x2 = _mm_xor_si128(x2,
|
||||
_mm_loadu_si128((void *)(buf + 32)));
|
||||
x3 = _mm_xor_si128(x3,
|
||||
_mm_loadu_si128((void *)(buf + 48)));
|
||||
_mm_storeu_si128((void *)(buf + 0), x0);
|
||||
_mm_storeu_si128((void *)(buf + 16), x1);
|
||||
_mm_storeu_si128((void *)(buf + 32), x2);
|
||||
_mm_storeu_si128((void *)(buf + 48), x3);
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
cc += 4;
|
||||
} else {
|
||||
unsigned char tmp[64];
|
||||
|
||||
_mm_storeu_si128((void *)(tmp + 0), x0);
|
||||
_mm_storeu_si128((void *)(tmp + 16), x1);
|
||||
_mm_storeu_si128((void *)(tmp + 32), x2);
|
||||
_mm_storeu_si128((void *)(tmp + 48), x3);
|
||||
for (u = 0; u < len; u ++) {
|
||||
buf[u] ^= tmp[u];
|
||||
}
|
||||
cc += (uint32_t)len >> 4;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_DOWN
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class br_aes_x86ni_ctr_vtable = {
|
||||
sizeof(br_aes_x86ni_ctr_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctr_class **, const void *, size_t))
|
||||
&br_aes_x86ni_ctr_init,
|
||||
(uint32_t (*)(const br_block_ctr_class *const *,
|
||||
const void *, uint32_t, void *, size_t))
|
||||
&br_aes_x86ni_ctr_run
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class *
|
||||
br_aes_x86ni_ctr_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,596 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_ENABLE_INTRINSICS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_AES_X86NI
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class *
|
||||
br_aes_x86ni_ctrcbc_get_vtable(void)
|
||||
{
|
||||
return br_aes_x86ni_supported() ? &br_aes_x86ni_ctrcbc_vtable : NULL;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_x86ni_ctrcbc_vtable;
|
||||
ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_UP
|
||||
|
||||
/* see bearssl_block.h */
|
||||
BR_TARGET("sse2,sse4.1,aes")
|
||||
void
|
||||
br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx,
|
||||
void *ctr, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned num_rounds;
|
||||
__m128i sk[15];
|
||||
__m128i ivx0, ivx1, ivx2, ivx3;
|
||||
__m128i erev, zero, one, four, notthree;
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
num_rounds = ctx->num_rounds;
|
||||
for (u = 0; u <= num_rounds; u ++) {
|
||||
sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Some SSE2 constants.
|
||||
*/
|
||||
erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15);
|
||||
zero = _mm_setzero_si128();
|
||||
one = _mm_set_epi64x(0, 1);
|
||||
four = _mm_set_epi64x(0, 4);
|
||||
notthree = _mm_sub_epi64(zero, four);
|
||||
|
||||
/*
|
||||
* Decode the counter in big-endian and pre-increment the other
|
||||
* three counters.
|
||||
*/
|
||||
ivx0 = _mm_shuffle_epi8(_mm_loadu_si128((void *)ctr), erev);
|
||||
ivx1 = _mm_add_epi64(ivx0, one);
|
||||
ivx1 = _mm_sub_epi64(ivx1,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(ivx1, zero), 8));
|
||||
ivx2 = _mm_add_epi64(ivx1, one);
|
||||
ivx2 = _mm_sub_epi64(ivx2,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(ivx2, zero), 8));
|
||||
ivx3 = _mm_add_epi64(ivx2, one);
|
||||
ivx3 = _mm_sub_epi64(ivx3,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(ivx3, zero), 8));
|
||||
while (len > 0) {
|
||||
__m128i x0, x1, x2, x3;
|
||||
|
||||
/*
|
||||
* Load counter values; we need to byteswap them because
|
||||
* the specification says that they use big-endian.
|
||||
*/
|
||||
x0 = _mm_shuffle_epi8(ivx0, erev);
|
||||
x1 = _mm_shuffle_epi8(ivx1, erev);
|
||||
x2 = _mm_shuffle_epi8(ivx2, erev);
|
||||
x3 = _mm_shuffle_epi8(ivx3, erev);
|
||||
|
||||
x0 = _mm_xor_si128(x0, sk[0]);
|
||||
x1 = _mm_xor_si128(x1, sk[0]);
|
||||
x2 = _mm_xor_si128(x2, sk[0]);
|
||||
x3 = _mm_xor_si128(x3, sk[0]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[1]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[1]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[1]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[1]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[2]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[2]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[2]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[2]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[3]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[3]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[3]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[3]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[4]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[4]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[4]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[4]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[5]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[5]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[5]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[5]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[6]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[6]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[6]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[6]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[7]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[7]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[7]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[7]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[8]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[8]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[8]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[8]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[9]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[9]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[9]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[9]);
|
||||
if (num_rounds == 10) {
|
||||
x0 = _mm_aesenclast_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[10]);
|
||||
x2 = _mm_aesenclast_si128(x2, sk[10]);
|
||||
x3 = _mm_aesenclast_si128(x3, sk[10]);
|
||||
} else if (num_rounds == 12) {
|
||||
x0 = _mm_aesenc_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[10]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[10]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[10]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[11]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[11]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[11]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[11]);
|
||||
x0 = _mm_aesenclast_si128(x0, sk[12]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[12]);
|
||||
x2 = _mm_aesenclast_si128(x2, sk[12]);
|
||||
x3 = _mm_aesenclast_si128(x3, sk[12]);
|
||||
} else {
|
||||
x0 = _mm_aesenc_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[10]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[10]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[10]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[11]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[11]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[11]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[11]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[12]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[12]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[12]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[12]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[13]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[13]);
|
||||
x2 = _mm_aesenc_si128(x2, sk[13]);
|
||||
x3 = _mm_aesenc_si128(x3, sk[13]);
|
||||
x0 = _mm_aesenclast_si128(x0, sk[14]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[14]);
|
||||
x2 = _mm_aesenclast_si128(x2, sk[14]);
|
||||
x3 = _mm_aesenclast_si128(x3, sk[14]);
|
||||
}
|
||||
if (len >= 64) {
|
||||
x0 = _mm_xor_si128(x0,
|
||||
_mm_loadu_si128((void *)(buf + 0)));
|
||||
x1 = _mm_xor_si128(x1,
|
||||
_mm_loadu_si128((void *)(buf + 16)));
|
||||
x2 = _mm_xor_si128(x2,
|
||||
_mm_loadu_si128((void *)(buf + 32)));
|
||||
x3 = _mm_xor_si128(x3,
|
||||
_mm_loadu_si128((void *)(buf + 48)));
|
||||
_mm_storeu_si128((void *)(buf + 0), x0);
|
||||
_mm_storeu_si128((void *)(buf + 16), x1);
|
||||
_mm_storeu_si128((void *)(buf + 32), x2);
|
||||
_mm_storeu_si128((void *)(buf + 48), x3);
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
} else {
|
||||
unsigned char tmp[64];
|
||||
|
||||
_mm_storeu_si128((void *)(tmp + 0), x0);
|
||||
_mm_storeu_si128((void *)(tmp + 16), x1);
|
||||
_mm_storeu_si128((void *)(tmp + 32), x2);
|
||||
_mm_storeu_si128((void *)(tmp + 48), x3);
|
||||
for (u = 0; u < len; u ++) {
|
||||
buf[u] ^= tmp[u];
|
||||
}
|
||||
switch (len) {
|
||||
case 16:
|
||||
ivx0 = ivx1;
|
||||
break;
|
||||
case 32:
|
||||
ivx0 = ivx2;
|
||||
break;
|
||||
case 48:
|
||||
ivx0 = ivx3;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add 4 to each counter value. For carry propagation
|
||||
* into the upper 64-bit words, we would need to compare
|
||||
* the results with 4, but SSE2+ has only _signed_
|
||||
* comparisons. Instead, we mask out the low two bits,
|
||||
* and check whether the remaining bits are zero.
|
||||
*/
|
||||
ivx0 = _mm_add_epi64(ivx0, four);
|
||||
ivx1 = _mm_add_epi64(ivx1, four);
|
||||
ivx2 = _mm_add_epi64(ivx2, four);
|
||||
ivx3 = _mm_add_epi64(ivx3, four);
|
||||
ivx0 = _mm_sub_epi64(ivx0,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(
|
||||
_mm_and_si128(ivx0, notthree), zero), 8));
|
||||
ivx1 = _mm_sub_epi64(ivx1,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(
|
||||
_mm_and_si128(ivx1, notthree), zero), 8));
|
||||
ivx2 = _mm_sub_epi64(ivx2,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(
|
||||
_mm_and_si128(ivx2, notthree), zero), 8));
|
||||
ivx3 = _mm_sub_epi64(ivx3,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(
|
||||
_mm_and_si128(ivx3, notthree), zero), 8));
|
||||
}
|
||||
|
||||
/*
|
||||
* Write back new counter value. The loop took care to put the
|
||||
* right counter value in ivx0.
|
||||
*/
|
||||
_mm_storeu_si128((void *)ctr, _mm_shuffle_epi8(ivx0, erev));
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
BR_TARGET("sse2,sse4.1,aes")
|
||||
void
|
||||
br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx,
|
||||
void *cbcmac, const void *data, size_t len)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
unsigned num_rounds;
|
||||
__m128i sk[15], ivx;
|
||||
unsigned u;
|
||||
|
||||
buf = data;
|
||||
ivx = _mm_loadu_si128(cbcmac);
|
||||
num_rounds = ctx->num_rounds;
|
||||
for (u = 0; u <= num_rounds; u ++) {
|
||||
sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
|
||||
}
|
||||
while (len > 0) {
|
||||
__m128i x;
|
||||
|
||||
x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
|
||||
x = _mm_xor_si128(x, sk[0]);
|
||||
x = _mm_aesenc_si128(x, sk[1]);
|
||||
x = _mm_aesenc_si128(x, sk[2]);
|
||||
x = _mm_aesenc_si128(x, sk[3]);
|
||||
x = _mm_aesenc_si128(x, sk[4]);
|
||||
x = _mm_aesenc_si128(x, sk[5]);
|
||||
x = _mm_aesenc_si128(x, sk[6]);
|
||||
x = _mm_aesenc_si128(x, sk[7]);
|
||||
x = _mm_aesenc_si128(x, sk[8]);
|
||||
x = _mm_aesenc_si128(x, sk[9]);
|
||||
if (num_rounds == 10) {
|
||||
x = _mm_aesenclast_si128(x, sk[10]);
|
||||
} else if (num_rounds == 12) {
|
||||
x = _mm_aesenc_si128(x, sk[10]);
|
||||
x = _mm_aesenc_si128(x, sk[11]);
|
||||
x = _mm_aesenclast_si128(x, sk[12]);
|
||||
} else {
|
||||
x = _mm_aesenc_si128(x, sk[10]);
|
||||
x = _mm_aesenc_si128(x, sk[11]);
|
||||
x = _mm_aesenc_si128(x, sk[12]);
|
||||
x = _mm_aesenc_si128(x, sk[13]);
|
||||
x = _mm_aesenclast_si128(x, sk[14]);
|
||||
}
|
||||
ivx = x;
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
}
|
||||
_mm_storeu_si128(cbcmac, ivx);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
BR_TARGET("sse2,sse4.1,aes")
|
||||
void
|
||||
br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned num_rounds;
|
||||
__m128i sk[15];
|
||||
__m128i ivx, cmx;
|
||||
__m128i erev, zero, one;
|
||||
unsigned u;
|
||||
int first_iter;
|
||||
|
||||
num_rounds = ctx->num_rounds;
|
||||
for (u = 0; u <= num_rounds; u ++) {
|
||||
sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Some SSE2 constants.
|
||||
*/
|
||||
erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15);
|
||||
zero = _mm_setzero_si128();
|
||||
one = _mm_set_epi64x(0, 1);
|
||||
|
||||
/*
|
||||
* Decode the counter in big-endian.
|
||||
*/
|
||||
ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
|
||||
cmx = _mm_loadu_si128(cbcmac);
|
||||
|
||||
buf = data;
|
||||
first_iter = 1;
|
||||
while (len > 0) {
|
||||
__m128i dx, x0, x1;
|
||||
|
||||
/*
|
||||
* Load initial values:
|
||||
* dx encrypted block of data
|
||||
* x0 counter (for CTR encryption)
|
||||
* x1 input for CBC-MAC
|
||||
*/
|
||||
dx = _mm_loadu_si128((void *)buf);
|
||||
x0 = _mm_shuffle_epi8(ivx, erev);
|
||||
x1 = cmx;
|
||||
|
||||
x0 = _mm_xor_si128(x0, sk[0]);
|
||||
x1 = _mm_xor_si128(x1, sk[0]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[1]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[1]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[2]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[2]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[3]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[3]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[4]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[4]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[5]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[5]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[6]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[6]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[7]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[7]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[8]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[8]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[9]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[9]);
|
||||
if (num_rounds == 10) {
|
||||
x0 = _mm_aesenclast_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[10]);
|
||||
} else if (num_rounds == 12) {
|
||||
x0 = _mm_aesenc_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[10]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[11]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[11]);
|
||||
x0 = _mm_aesenclast_si128(x0, sk[12]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[12]);
|
||||
} else {
|
||||
x0 = _mm_aesenc_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[10]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[11]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[11]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[12]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[12]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[13]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[13]);
|
||||
x0 = _mm_aesenclast_si128(x0, sk[14]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[14]);
|
||||
}
|
||||
|
||||
x0 = _mm_xor_si128(x0, dx);
|
||||
if (first_iter) {
|
||||
cmx = _mm_xor_si128(cmx, x0);
|
||||
first_iter = 0;
|
||||
} else {
|
||||
cmx = _mm_xor_si128(x1, x0);
|
||||
}
|
||||
_mm_storeu_si128((void *)buf, x0);
|
||||
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
|
||||
/*
|
||||
* Increment the counter value.
|
||||
*/
|
||||
ivx = _mm_add_epi64(ivx, one);
|
||||
ivx = _mm_sub_epi64(ivx,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
|
||||
|
||||
/*
|
||||
* If this was the last iteration, then compute the
|
||||
* extra block encryption to complete CBC-MAC.
|
||||
*/
|
||||
if (len == 0) {
|
||||
cmx = _mm_xor_si128(cmx, sk[0]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[1]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[2]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[3]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[4]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[5]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[6]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[7]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[8]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[9]);
|
||||
if (num_rounds == 10) {
|
||||
cmx = _mm_aesenclast_si128(cmx, sk[10]);
|
||||
} else if (num_rounds == 12) {
|
||||
cmx = _mm_aesenc_si128(cmx, sk[10]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[11]);
|
||||
cmx = _mm_aesenclast_si128(cmx, sk[12]);
|
||||
} else {
|
||||
cmx = _mm_aesenc_si128(cmx, sk[10]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[11]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[12]);
|
||||
cmx = _mm_aesenc_si128(cmx, sk[13]);
|
||||
cmx = _mm_aesenclast_si128(cmx, sk[14]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Write back new counter value and CBC-MAC value.
|
||||
*/
|
||||
_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
|
||||
_mm_storeu_si128(cbcmac, cmx);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
BR_TARGET("sse2,sse4.1,aes")
|
||||
void
|
||||
br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned num_rounds;
|
||||
__m128i sk[15];
|
||||
__m128i ivx, cmx;
|
||||
__m128i erev, zero, one;
|
||||
unsigned u;
|
||||
|
||||
num_rounds = ctx->num_rounds;
|
||||
for (u = 0; u <= num_rounds; u ++) {
|
||||
sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Some SSE2 constants.
|
||||
*/
|
||||
erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15);
|
||||
zero = _mm_setzero_si128();
|
||||
one = _mm_set_epi64x(0, 1);
|
||||
|
||||
/*
|
||||
* Decode the counter in big-endian.
|
||||
*/
|
||||
ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
|
||||
cmx = _mm_loadu_si128(cbcmac);
|
||||
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
__m128i dx, x0, x1;
|
||||
|
||||
/*
|
||||
* Load initial values:
|
||||
* dx encrypted block of data
|
||||
* x0 counter (for CTR encryption)
|
||||
* x1 input for CBC-MAC
|
||||
*/
|
||||
dx = _mm_loadu_si128((void *)buf);
|
||||
x0 = _mm_shuffle_epi8(ivx, erev);
|
||||
x1 = _mm_xor_si128(cmx, dx);
|
||||
|
||||
x0 = _mm_xor_si128(x0, sk[0]);
|
||||
x1 = _mm_xor_si128(x1, sk[0]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[1]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[1]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[2]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[2]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[3]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[3]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[4]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[4]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[5]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[5]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[6]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[6]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[7]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[7]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[8]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[8]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[9]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[9]);
|
||||
if (num_rounds == 10) {
|
||||
x0 = _mm_aesenclast_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[10]);
|
||||
} else if (num_rounds == 12) {
|
||||
x0 = _mm_aesenc_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[10]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[11]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[11]);
|
||||
x0 = _mm_aesenclast_si128(x0, sk[12]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[12]);
|
||||
} else {
|
||||
x0 = _mm_aesenc_si128(x0, sk[10]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[10]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[11]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[11]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[12]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[12]);
|
||||
x0 = _mm_aesenc_si128(x0, sk[13]);
|
||||
x1 = _mm_aesenc_si128(x1, sk[13]);
|
||||
x0 = _mm_aesenclast_si128(x0, sk[14]);
|
||||
x1 = _mm_aesenclast_si128(x1, sk[14]);
|
||||
}
|
||||
x0 = _mm_xor_si128(x0, dx);
|
||||
cmx = x1;
|
||||
_mm_storeu_si128((void *)buf, x0);
|
||||
|
||||
buf += 16;
|
||||
len -= 16;
|
||||
|
||||
/*
|
||||
* Increment the counter value.
|
||||
*/
|
||||
ivx = _mm_add_epi64(ivx, one);
|
||||
ivx = _mm_sub_epi64(ivx,
|
||||
_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
|
||||
}
|
||||
|
||||
/*
|
||||
* Write back new counter value and CBC-MAC value.
|
||||
*/
|
||||
_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
|
||||
_mm_storeu_si128(cbcmac, cmx);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_DOWN
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable = {
|
||||
sizeof(br_aes_x86ni_ctrcbc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
|
||||
&br_aes_x86ni_ctrcbc_init,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_x86ni_ctrcbc_encrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_x86ni_ctrcbc_decrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, size_t))
|
||||
&br_aes_x86ni_ctrcbc_ctr,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, const void *, size_t))
|
||||
&br_aes_x86ni_ctrcbc_mac
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class *
|
||||
br_aes_x86ni_ctrcbc_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_rand.h */
|
||||
void
|
||||
br_aesctr_drbg_init(br_aesctr_drbg_context *ctx,
|
||||
const br_block_ctr_class *aesctr,
|
||||
const void *seed, size_t len)
|
||||
{
|
||||
unsigned char tmp[16];
|
||||
|
||||
ctx->vtable = &br_aesctr_drbg_vtable;
|
||||
memset(tmp, 0, sizeof tmp);
|
||||
aesctr->init(&ctx->sk.vtable, tmp, 16);
|
||||
ctx->cc = 0;
|
||||
br_aesctr_drbg_update(ctx, seed, len);
|
||||
}
|
||||
|
||||
/* see bearssl_rand.h */
|
||||
void
|
||||
br_aesctr_drbg_generate(br_aesctr_drbg_context *ctx, void *out, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned char iv[12];
|
||||
|
||||
buf = out;
|
||||
memset(iv, 0, sizeof iv);
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
/*
|
||||
* We generate data by blocks of at most 65280 bytes. This
|
||||
* allows for unambiguously testing the counter overflow
|
||||
* condition; also, it should work on 16-bit architectures
|
||||
* (where 'size_t' is 16 bits only).
|
||||
*/
|
||||
clen = len;
|
||||
if (clen > 65280) {
|
||||
clen = 65280;
|
||||
}
|
||||
|
||||
/*
|
||||
* We make sure that the counter won't exceed the configured
|
||||
* limit.
|
||||
*/
|
||||
if ((uint32_t)(ctx->cc + ((clen + 15) >> 4)) > 32768) {
|
||||
clen = (32768 - ctx->cc) << 4;
|
||||
if (clen > len) {
|
||||
clen = len;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Run CTR.
|
||||
*/
|
||||
memset(buf, 0, clen);
|
||||
ctx->cc = ctx->sk.vtable->run(&ctx->sk.vtable,
|
||||
iv, ctx->cc, buf, clen);
|
||||
buf += clen;
|
||||
len -= clen;
|
||||
|
||||
/*
|
||||
* Every 32768 blocks, we force a state update.
|
||||
*/
|
||||
if (ctx->cc >= 32768) {
|
||||
br_aesctr_drbg_update(ctx, NULL, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_rand.h */
|
||||
void
|
||||
br_aesctr_drbg_update(br_aesctr_drbg_context *ctx, const void *seed, size_t len)
|
||||
{
|
||||
/*
|
||||
* We use a Hirose construction on AES-256 to make a hash function.
|
||||
* Function definition:
|
||||
* - running state consists in two 16-byte blocks G and H
|
||||
* - initial values of G and H are conventional
|
||||
* - there is a fixed block-sized constant C
|
||||
* - for next data block m:
|
||||
* set AES key to H||m
|
||||
* G' = E(G) xor G
|
||||
* H' = E(G xor C) xor G xor C
|
||||
* G <- G', H <- H'
|
||||
* - once all blocks have been processed, output is H||G
|
||||
*
|
||||
* Constants:
|
||||
* G_init = B6 B6 ... B6
|
||||
* H_init = A5 A5 ... A5
|
||||
* C = 01 00 ... 00
|
||||
*
|
||||
* With this hash function h(), we compute the new state as
|
||||
* follows:
|
||||
* - produce a state-dependent value s as encryption of an
|
||||
* all-one block with AES and the current key
|
||||
* - compute the new key as the first 128 bits of h(s||seed)
|
||||
*
|
||||
* Original Hirose article:
|
||||
* https://www.iacr.org/archive/fse2006/40470213/40470213.pdf
|
||||
*/
|
||||
|
||||
unsigned char s[16], iv[12];
|
||||
unsigned char G[16], H[16];
|
||||
int first;
|
||||
|
||||
/*
|
||||
* Use an all-one IV to get a fresh output block that depends on the
|
||||
* current seed.
|
||||
*/
|
||||
memset(iv, 0xFF, sizeof iv);
|
||||
memset(s, 0, 16);
|
||||
ctx->sk.vtable->run(&ctx->sk.vtable, iv, 0xFFFFFFFF, s, 16);
|
||||
|
||||
/*
|
||||
* Set G[] and H[] to conventional start values.
|
||||
*/
|
||||
memset(G, 0xB6, sizeof G);
|
||||
memset(H, 0x5A, sizeof H);
|
||||
|
||||
/*
|
||||
* Process the concatenation of the current state and the seed
|
||||
* with the custom hash function.
|
||||
*/
|
||||
first = 1;
|
||||
for (;;) {
|
||||
unsigned char tmp[32];
|
||||
unsigned char newG[16];
|
||||
|
||||
/*
|
||||
* Assemble new key H||m into tmp[].
|
||||
*/
|
||||
memcpy(tmp, H, 16);
|
||||
if (first) {
|
||||
memcpy(tmp + 16, s, 16);
|
||||
first = 0;
|
||||
} else {
|
||||
size_t clen;
|
||||
|
||||
if (len == 0) {
|
||||
break;
|
||||
}
|
||||
clen = len < 16 ? len : 16;
|
||||
memcpy(tmp + 16, seed, clen);
|
||||
memset(tmp + 16 + clen, 0, 16 - clen);
|
||||
seed = (const unsigned char *)seed + clen;
|
||||
len -= clen;
|
||||
}
|
||||
ctx->sk.vtable->init(&ctx->sk.vtable, tmp, 32);
|
||||
|
||||
/*
|
||||
* Compute new G and H values.
|
||||
*/
|
||||
memcpy(iv, G, 12);
|
||||
memcpy(newG, G, 16);
|
||||
ctx->sk.vtable->run(&ctx->sk.vtable, iv,
|
||||
br_dec32be(G + 12), newG, 16);
|
||||
iv[0] ^= 0x01;
|
||||
memcpy(H, G, 16);
|
||||
H[0] ^= 0x01;
|
||||
ctx->sk.vtable->run(&ctx->sk.vtable, iv,
|
||||
br_dec32be(G + 12), H, 16);
|
||||
memcpy(G, newG, 16);
|
||||
}
|
||||
|
||||
/*
|
||||
* Output hash value is H||G. We truncate it to its first 128 bits,
|
||||
* i.e. H; that's our new AES key.
|
||||
*/
|
||||
ctx->sk.vtable->init(&ctx->sk.vtable, H, 16);
|
||||
ctx->cc = 0;
|
||||
}
|
||||
|
||||
/* see bearssl_rand.h */
|
||||
const br_prng_class br_aesctr_drbg_vtable = {
|
||||
sizeof(br_aesctr_drbg_context),
|
||||
(void (*)(const br_prng_class **, const void *, const void *, size_t))
|
||||
&br_aesctr_drbg_init,
|
||||
(void (*)(const br_prng_class **, void *, size_t))
|
||||
&br_aesctr_drbg_generate,
|
||||
(void (*)(const br_prng_class **, const void *, size_t))
|
||||
&br_aesctr_drbg_update
|
||||
};
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
br_asn1_uint
|
||||
br_asn1_uint_prepare(const void *xdata, size_t xlen)
|
||||
{
|
||||
const unsigned char *x;
|
||||
br_asn1_uint t;
|
||||
|
||||
x = xdata;
|
||||
while (xlen > 0 && *x == 0) {
|
||||
x ++;
|
||||
xlen --;
|
||||
}
|
||||
t.data = x;
|
||||
t.len = xlen;
|
||||
t.asn1len = xlen;
|
||||
if (xlen == 0 || x[0] >= 0x80) {
|
||||
t.asn1len ++;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
size_t
|
||||
br_asn1_encode_length(void *dest, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t z;
|
||||
int i, j;
|
||||
|
||||
buf = dest;
|
||||
if (len < 0x80) {
|
||||
if (buf != NULL) {
|
||||
*buf = len;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
i = 0;
|
||||
for (z = len; z != 0; z >>= 8) {
|
||||
i ++;
|
||||
}
|
||||
if (buf != NULL) {
|
||||
*buf ++ = 0x80 + i;
|
||||
for (j = i - 1; j >= 0; j --) {
|
||||
*buf ++ = len >> (j << 3);
|
||||
}
|
||||
}
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
size_t
|
||||
br_asn1_encode_uint(void *dest, br_asn1_uint pp)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t lenlen;
|
||||
|
||||
if (dest == NULL) {
|
||||
return 1 + br_asn1_encode_length(NULL, pp.asn1len) + pp.asn1len;
|
||||
}
|
||||
buf = dest;
|
||||
*buf ++ = 0x02;
|
||||
lenlen = br_asn1_encode_length(buf, pp.asn1len);
|
||||
buf += lenlen;
|
||||
*buf = 0x00;
|
||||
memcpy(buf + pp.asn1len - pp.len, pp.data, pp.len);
|
||||
return 1 + lenlen + pp.asn1len;
|
||||
}
|
||||
|
|
@ -0,0 +1,346 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* Implementation Notes
|
||||
* ====================
|
||||
*
|
||||
* The combined CTR + CBC-MAC functions can only handle full blocks,
|
||||
* so some buffering is necessary.
|
||||
*
|
||||
* - 'ptr' contains a value from 0 to 15, which is the number of bytes
|
||||
* accumulated in buf[] that still needs to be processed with the
|
||||
* current CBC-MAC computation.
|
||||
*
|
||||
* - When processing the message itself, CTR encryption/decryption is
|
||||
* also done at the same time. The first 'ptr' bytes of buf[] then
|
||||
* contains the plaintext bytes, while the last '16 - ptr' bytes of
|
||||
* buf[] are the remnants of the stream block, to be used against
|
||||
* the next input bytes, when available. When 'ptr' is 0, the
|
||||
* contents of buf[] are to be ignored.
|
||||
*
|
||||
* - The current counter and running CBC-MAC values are kept in 'ctr'
|
||||
* and 'cbcmac', respectively.
|
||||
*/
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx)
|
||||
{
|
||||
ctx->bctx = bctx;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
int
|
||||
br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len,
|
||||
uint64_t aad_len, uint64_t data_len, size_t tag_len)
|
||||
{
|
||||
unsigned char tmp[16];
|
||||
unsigned u, q;
|
||||
|
||||
if (nonce_len < 7 || nonce_len > 13) {
|
||||
return 0;
|
||||
}
|
||||
if (tag_len < 4 || tag_len > 16 || (tag_len & 1) != 0) {
|
||||
return 0;
|
||||
}
|
||||
q = 15 - (unsigned)nonce_len;
|
||||
ctx->tag_len = tag_len;
|
||||
|
||||
/*
|
||||
* Block B0, to start CBC-MAC.
|
||||
*/
|
||||
tmp[0] = (aad_len > 0 ? 0x40 : 0x00)
|
||||
| (((unsigned)tag_len - 2) << 2)
|
||||
| (q - 1);
|
||||
memcpy(tmp + 1, nonce, nonce_len);
|
||||
for (u = 0; u < q; u ++) {
|
||||
tmp[15 - u] = (unsigned char)data_len;
|
||||
data_len >>= 8;
|
||||
}
|
||||
if (data_len != 0) {
|
||||
/*
|
||||
* If the data length was not entirely consumed in the
|
||||
* loop above, then it exceeds the maximum limit of
|
||||
* q bytes (when encoded).
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start CBC-MAC.
|
||||
*/
|
||||
memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, tmp, sizeof tmp);
|
||||
|
||||
/*
|
||||
* Assemble AAD length header.
|
||||
*/
|
||||
if ((aad_len >> 32) != 0) {
|
||||
ctx->buf[0] = 0xFF;
|
||||
ctx->buf[1] = 0xFF;
|
||||
br_enc64be(ctx->buf + 2, aad_len);
|
||||
ctx->ptr = 10;
|
||||
} else if (aad_len >= 0xFF00) {
|
||||
ctx->buf[0] = 0xFF;
|
||||
ctx->buf[1] = 0xFE;
|
||||
br_enc32be(ctx->buf + 2, (uint32_t)aad_len);
|
||||
ctx->ptr = 6;
|
||||
} else if (aad_len > 0) {
|
||||
br_enc16be(ctx->buf, (unsigned)aad_len);
|
||||
ctx->ptr = 2;
|
||||
} else {
|
||||
ctx->ptr = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make initial counter value and compute tag mask.
|
||||
*/
|
||||
ctx->ctr[0] = q - 1;
|
||||
memcpy(ctx->ctr + 1, nonce, nonce_len);
|
||||
memset(ctx->ctr + 1 + nonce_len, 0, q);
|
||||
memset(ctx->tagmask, 0, sizeof ctx->tagmask);
|
||||
(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
|
||||
ctx->tagmask, sizeof ctx->tagmask);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len)
|
||||
{
|
||||
const unsigned char *dbuf;
|
||||
size_t ptr;
|
||||
|
||||
dbuf = data;
|
||||
|
||||
/*
|
||||
* Complete partial block, if needed.
|
||||
*/
|
||||
ptr = ctx->ptr;
|
||||
if (ptr != 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof ctx->buf) - ptr;
|
||||
if (clen > len) {
|
||||
memcpy(ctx->buf + ptr, dbuf, len);
|
||||
ctx->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
memcpy(ctx->buf + ptr, dbuf, clen);
|
||||
dbuf += clen;
|
||||
len -= clen;
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
|
||||
ctx->buf, sizeof ctx->buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Process complete blocks.
|
||||
*/
|
||||
ptr = len & 15;
|
||||
len -= ptr;
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, dbuf, len);
|
||||
dbuf += len;
|
||||
|
||||
/*
|
||||
* Copy last partial block in the context buffer.
|
||||
*/
|
||||
memcpy(ctx->buf, dbuf, ptr);
|
||||
ctx->ptr = ptr;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_ccm_flip(br_ccm_context *ctx)
|
||||
{
|
||||
size_t ptr;
|
||||
|
||||
/*
|
||||
* Complete AAD partial block with zeros, if necessary.
|
||||
*/
|
||||
ptr = ctx->ptr;
|
||||
if (ptr != 0) {
|
||||
memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
|
||||
ctx->buf, sizeof ctx->buf);
|
||||
ctx->ptr = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Counter was already set by br_ccm_reset().
|
||||
*/
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len)
|
||||
{
|
||||
unsigned char *dbuf;
|
||||
size_t ptr;
|
||||
|
||||
dbuf = data;
|
||||
|
||||
/*
|
||||
* Complete a partial block, if any: ctx->buf[] contains
|
||||
* ctx->ptr plaintext bytes (already reported), and the other
|
||||
* bytes are CTR stream output.
|
||||
*/
|
||||
ptr = ctx->ptr;
|
||||
if (ptr != 0) {
|
||||
size_t clen;
|
||||
size_t u;
|
||||
|
||||
clen = (sizeof ctx->buf) - ptr;
|
||||
if (clen > len) {
|
||||
clen = len;
|
||||
}
|
||||
if (encrypt) {
|
||||
for (u = 0; u < clen; u ++) {
|
||||
unsigned w, x;
|
||||
|
||||
w = ctx->buf[ptr + u];
|
||||
x = dbuf[u];
|
||||
ctx->buf[ptr + u] = x;
|
||||
dbuf[u] = w ^ x;
|
||||
}
|
||||
} else {
|
||||
for (u = 0; u < clen; u ++) {
|
||||
unsigned w;
|
||||
|
||||
w = ctx->buf[ptr + u] ^ dbuf[u];
|
||||
dbuf[u] = w;
|
||||
ctx->buf[ptr + u] = w;
|
||||
}
|
||||
}
|
||||
dbuf += clen;
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if (ptr < sizeof ctx->buf) {
|
||||
ctx->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
(*ctx->bctx)->mac(ctx->bctx,
|
||||
ctx->cbcmac, ctx->buf, sizeof ctx->buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Process all complete blocks. Note that the ctrcbc API is for
|
||||
* encrypt-then-MAC (CBC-MAC is computed over the encrypted
|
||||
* blocks) while CCM uses MAC-and-encrypt (CBC-MAC is computed
|
||||
* over the plaintext blocks). Therefore, we need to use the
|
||||
* _decryption_ function for encryption, and the encryption
|
||||
* function for decryption (this works because CTR encryption
|
||||
* and decryption are identical, so the choice really is about
|
||||
* computing the CBC-MAC before or after XORing with the CTR
|
||||
* stream).
|
||||
*/
|
||||
ptr = len & 15;
|
||||
len -= ptr;
|
||||
if (encrypt) {
|
||||
(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
|
||||
dbuf, len);
|
||||
} else {
|
||||
(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
|
||||
dbuf, len);
|
||||
}
|
||||
dbuf += len;
|
||||
|
||||
/*
|
||||
* If there is some remaining data, then we need to compute an
|
||||
* extra block of CTR stream.
|
||||
*/
|
||||
if (ptr != 0) {
|
||||
size_t u;
|
||||
|
||||
memset(ctx->buf, 0, sizeof ctx->buf);
|
||||
(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
|
||||
ctx->buf, sizeof ctx->buf);
|
||||
if (encrypt) {
|
||||
for (u = 0; u < ptr; u ++) {
|
||||
unsigned w, x;
|
||||
|
||||
w = ctx->buf[u];
|
||||
x = dbuf[u];
|
||||
ctx->buf[u] = x;
|
||||
dbuf[u] = w ^ x;
|
||||
}
|
||||
} else {
|
||||
for (u = 0; u < ptr; u ++) {
|
||||
unsigned w;
|
||||
|
||||
w = ctx->buf[u] ^ dbuf[u];
|
||||
dbuf[u] = w;
|
||||
ctx->buf[u] = w;
|
||||
}
|
||||
}
|
||||
}
|
||||
ctx->ptr = ptr;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
size_t
|
||||
br_ccm_get_tag(br_ccm_context *ctx, void *tag)
|
||||
{
|
||||
size_t ptr;
|
||||
size_t u;
|
||||
|
||||
/*
|
||||
* If there is some buffered data, then we need to pad it with
|
||||
* zeros and finish up CBC-MAC.
|
||||
*/
|
||||
ptr = ctx->ptr;
|
||||
if (ptr != 0) {
|
||||
memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
|
||||
ctx->buf, sizeof ctx->buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* XOR the tag mask into the CBC-MAC output.
|
||||
*/
|
||||
for (u = 0; u < ctx->tag_len; u ++) {
|
||||
ctx->cbcmac[u] ^= ctx->tagmask[u];
|
||||
}
|
||||
memcpy(tag, ctx->cbcmac, ctx->tag_len);
|
||||
return ctx->tag_len;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
uint32_t
|
||||
br_ccm_check_tag(br_ccm_context *ctx, const void *tag)
|
||||
{
|
||||
unsigned char tmp[16];
|
||||
size_t u, tag_len;
|
||||
uint32_t z;
|
||||
|
||||
tag_len = br_ccm_get_tag(ctx, tmp);
|
||||
z = 0;
|
||||
for (u = 0; u < tag_len; u ++) {
|
||||
z |= tmp[u] ^ ((const unsigned char *)tag)[u];
|
||||
}
|
||||
return EQ0(z);
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len)
|
||||
{
|
||||
unsigned char *d;
|
||||
const unsigned char *s;
|
||||
|
||||
d = dst;
|
||||
s = src;
|
||||
while (len -- > 0) {
|
||||
uint32_t x, y;
|
||||
|
||||
x = *s ++;
|
||||
y = *d;
|
||||
*d = MUX(ctl, x, y);
|
||||
d ++;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
uint32_t
|
||||
br_chacha20_ct_run(const void *key,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint32_t kw[8], ivw[3];
|
||||
size_t u;
|
||||
|
||||
static const uint32_t CW[] = {
|
||||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
|
||||
};
|
||||
|
||||
buf = data;
|
||||
for (u = 0; u < 8; u ++) {
|
||||
kw[u] = br_dec32le((const unsigned char *)key + (u << 2));
|
||||
}
|
||||
for (u = 0; u < 3; u ++) {
|
||||
ivw[u] = br_dec32le((const unsigned char *)iv + (u << 2));
|
||||
}
|
||||
while (len > 0) {
|
||||
uint32_t state[16];
|
||||
int i;
|
||||
size_t clen;
|
||||
unsigned char tmp[64];
|
||||
|
||||
memcpy(&state[0], CW, sizeof CW);
|
||||
memcpy(&state[4], kw, sizeof kw);
|
||||
state[12] = cc;
|
||||
memcpy(&state[13], ivw, sizeof ivw);
|
||||
for (i = 0; i < 10; i ++) {
|
||||
|
||||
#define QROUND(a, b, c, d) do { \
|
||||
state[a] += state[b]; \
|
||||
state[d] ^= state[a]; \
|
||||
state[d] = (state[d] << 16) | (state[d] >> 16); \
|
||||
state[c] += state[d]; \
|
||||
state[b] ^= state[c]; \
|
||||
state[b] = (state[b] << 12) | (state[b] >> 20); \
|
||||
state[a] += state[b]; \
|
||||
state[d] ^= state[a]; \
|
||||
state[d] = (state[d] << 8) | (state[d] >> 24); \
|
||||
state[c] += state[d]; \
|
||||
state[b] ^= state[c]; \
|
||||
state[b] = (state[b] << 7) | (state[b] >> 25); \
|
||||
} while (0)
|
||||
|
||||
QROUND( 0, 4, 8, 12);
|
||||
QROUND( 1, 5, 9, 13);
|
||||
QROUND( 2, 6, 10, 14);
|
||||
QROUND( 3, 7, 11, 15);
|
||||
QROUND( 0, 5, 10, 15);
|
||||
QROUND( 1, 6, 11, 12);
|
||||
QROUND( 2, 7, 8, 13);
|
||||
QROUND( 3, 4, 9, 14);
|
||||
|
||||
#undef QROUND
|
||||
|
||||
}
|
||||
for (u = 0; u < 4; u ++) {
|
||||
br_enc32le(&tmp[u << 2], state[u] + CW[u]);
|
||||
}
|
||||
for (u = 4; u < 12; u ++) {
|
||||
br_enc32le(&tmp[u << 2], state[u] + kw[u - 4]);
|
||||
}
|
||||
br_enc32le(&tmp[48], state[12] + cc);
|
||||
for (u = 13; u < 16; u ++) {
|
||||
br_enc32le(&tmp[u << 2], state[u] + ivw[u - 13]);
|
||||
}
|
||||
|
||||
clen = len < 64 ? len : 64;
|
||||
for (u = 0; u < clen; u ++) {
|
||||
buf[u] ^= tmp[u];
|
||||
}
|
||||
buf += clen;
|
||||
len -= clen;
|
||||
cc ++;
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
|
@ -0,0 +1,237 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_ENABLE_INTRINSICS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_SSE2
|
||||
|
||||
/*
|
||||
* This file contains a ChaCha20 implementation that leverages SSE2
|
||||
* opcodes for better performance.
|
||||
*/
|
||||
|
||||
/* see bearssl_block.h */
|
||||
br_chacha20_run
|
||||
br_chacha20_sse2_get(void)
|
||||
{
|
||||
/*
|
||||
* If using 64-bit mode, then SSE2 opcodes should be automatically
|
||||
* available, since they are part of the ABI.
|
||||
*
|
||||
* In 32-bit mode, we use CPUID to detect the SSE2 feature.
|
||||
*/
|
||||
|
||||
#if BR_amd64
|
||||
return &br_chacha20_sse2_run;
|
||||
#else
|
||||
|
||||
/*
|
||||
* SSE2 support is indicated by bit 26 in EDX.
|
||||
*/
|
||||
if (br_cpuid(0, 0, 0, 0x04000000)) {
|
||||
return &br_chacha20_sse2_run;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_UP
|
||||
|
||||
/* see bearssl_block.h */
|
||||
BR_TARGET("sse2")
|
||||
uint32_t
|
||||
br_chacha20_sse2_run(const void *key,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint32_t ivtmp[4];
|
||||
__m128i kw0, kw1;
|
||||
__m128i iw, cw;
|
||||
__m128i one;
|
||||
|
||||
static const uint32_t CW[] = {
|
||||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
|
||||
};
|
||||
|
||||
buf = data;
|
||||
kw0 = _mm_loadu_si128(key);
|
||||
kw1 = _mm_loadu_si128((const void *)((const unsigned char *)key + 16));
|
||||
ivtmp[0] = cc;
|
||||
memcpy(ivtmp + 1, iv, 12);
|
||||
iw = _mm_loadu_si128((const void *)ivtmp);
|
||||
cw = _mm_loadu_si128((const void *)CW);
|
||||
one = _mm_set_epi32(0, 0, 0, 1);
|
||||
|
||||
while (len > 0) {
|
||||
/*
|
||||
* sj contains state words 4*j to 4*j+3.
|
||||
*/
|
||||
__m128i s0, s1, s2, s3;
|
||||
int i;
|
||||
|
||||
s0 = cw;
|
||||
s1 = kw0;
|
||||
s2 = kw1;
|
||||
s3 = iw;
|
||||
for (i = 0; i < 10; i ++) {
|
||||
/*
|
||||
* Even round is straightforward application on
|
||||
* the state words.
|
||||
*/
|
||||
s0 = _mm_add_epi32(s0, s1);
|
||||
s3 = _mm_xor_si128(s3, s0);
|
||||
s3 = _mm_or_si128(
|
||||
_mm_slli_epi32(s3, 16),
|
||||
_mm_srli_epi32(s3, 16));
|
||||
|
||||
s2 = _mm_add_epi32(s2, s3);
|
||||
s1 = _mm_xor_si128(s1, s2);
|
||||
s1 = _mm_or_si128(
|
||||
_mm_slli_epi32(s1, 12),
|
||||
_mm_srli_epi32(s1, 20));
|
||||
|
||||
s0 = _mm_add_epi32(s0, s1);
|
||||
s3 = _mm_xor_si128(s3, s0);
|
||||
s3 = _mm_or_si128(
|
||||
_mm_slli_epi32(s3, 8),
|
||||
_mm_srli_epi32(s3, 24));
|
||||
|
||||
s2 = _mm_add_epi32(s2, s3);
|
||||
s1 = _mm_xor_si128(s1, s2);
|
||||
s1 = _mm_or_si128(
|
||||
_mm_slli_epi32(s1, 7),
|
||||
_mm_srli_epi32(s1, 25));
|
||||
|
||||
/*
|
||||
* For the odd round, we must rotate some state
|
||||
* words so that the computations apply on the
|
||||
* right combinations of words.
|
||||
*/
|
||||
s1 = _mm_shuffle_epi32(s1, 0x39);
|
||||
s2 = _mm_shuffle_epi32(s2, 0x4E);
|
||||
s3 = _mm_shuffle_epi32(s3, 0x93);
|
||||
|
||||
s0 = _mm_add_epi32(s0, s1);
|
||||
s3 = _mm_xor_si128(s3, s0);
|
||||
s3 = _mm_or_si128(
|
||||
_mm_slli_epi32(s3, 16),
|
||||
_mm_srli_epi32(s3, 16));
|
||||
|
||||
s2 = _mm_add_epi32(s2, s3);
|
||||
s1 = _mm_xor_si128(s1, s2);
|
||||
s1 = _mm_or_si128(
|
||||
_mm_slli_epi32(s1, 12),
|
||||
_mm_srli_epi32(s1, 20));
|
||||
|
||||
s0 = _mm_add_epi32(s0, s1);
|
||||
s3 = _mm_xor_si128(s3, s0);
|
||||
s3 = _mm_or_si128(
|
||||
_mm_slli_epi32(s3, 8),
|
||||
_mm_srli_epi32(s3, 24));
|
||||
|
||||
s2 = _mm_add_epi32(s2, s3);
|
||||
s1 = _mm_xor_si128(s1, s2);
|
||||
s1 = _mm_or_si128(
|
||||
_mm_slli_epi32(s1, 7),
|
||||
_mm_srli_epi32(s1, 25));
|
||||
|
||||
/*
|
||||
* After the odd round, we rotate back the values
|
||||
* to undo the rotate at the start of the odd round.
|
||||
*/
|
||||
s1 = _mm_shuffle_epi32(s1, 0x93);
|
||||
s2 = _mm_shuffle_epi32(s2, 0x4E);
|
||||
s3 = _mm_shuffle_epi32(s3, 0x39);
|
||||
}
|
||||
|
||||
/*
|
||||
* Addition with the initial state.
|
||||
*/
|
||||
s0 = _mm_add_epi32(s0, cw);
|
||||
s1 = _mm_add_epi32(s1, kw0);
|
||||
s2 = _mm_add_epi32(s2, kw1);
|
||||
s3 = _mm_add_epi32(s3, iw);
|
||||
|
||||
/*
|
||||
* Increment block counter.
|
||||
*/
|
||||
iw = _mm_add_epi32(iw, one);
|
||||
|
||||
/*
|
||||
* XOR final state with the data.
|
||||
*/
|
||||
if (len < 64) {
|
||||
unsigned char tmp[64];
|
||||
size_t u;
|
||||
|
||||
_mm_storeu_si128((void *)(tmp + 0), s0);
|
||||
_mm_storeu_si128((void *)(tmp + 16), s1);
|
||||
_mm_storeu_si128((void *)(tmp + 32), s2);
|
||||
_mm_storeu_si128((void *)(tmp + 48), s3);
|
||||
for (u = 0; u < len; u ++) {
|
||||
buf[u] ^= tmp[u];
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
__m128i b0, b1, b2, b3;
|
||||
|
||||
b0 = _mm_loadu_si128((const void *)(buf + 0));
|
||||
b1 = _mm_loadu_si128((const void *)(buf + 16));
|
||||
b2 = _mm_loadu_si128((const void *)(buf + 32));
|
||||
b3 = _mm_loadu_si128((const void *)(buf + 48));
|
||||
b0 = _mm_xor_si128(b0, s0);
|
||||
b1 = _mm_xor_si128(b1, s1);
|
||||
b2 = _mm_xor_si128(b2, s2);
|
||||
b3 = _mm_xor_si128(b3, s3);
|
||||
_mm_storeu_si128((void *)(buf + 0), b0);
|
||||
_mm_storeu_si128((void *)(buf + 16), b1);
|
||||
_mm_storeu_si128((void *)(buf + 32), b2);
|
||||
_mm_storeu_si128((void *)(buf + 48), b3);
|
||||
buf += 64;
|
||||
len -= 64;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* _mm_extract_epi32() requires SSE4.1. We prefer to stick to
|
||||
* raw SSE2, thus we use _mm_extract_epi16().
|
||||
*/
|
||||
return (uint32_t)_mm_extract_epi16(iw, 0)
|
||||
| ((uint32_t)_mm_extract_epi16(iw, 1) << 16);
|
||||
}
|
||||
|
||||
BR_TARGETS_X86_DOWN
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
br_chacha20_run
|
||||
br_chacha20_sse2_get(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CONFIG_H__
|
||||
#define CONFIG_H__
|
||||
|
||||
/*
|
||||
* This file contains compile-time flags that can override the
|
||||
* autodetection performed in relevant files. Each flag is a macro; it
|
||||
* deactivates the feature if defined to 0, activates it if defined to a
|
||||
* non-zero integer (normally 1). If the macro is not defined, then
|
||||
* autodetection applies.
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_64 is enabled, 64-bit integer types are assumed to be
|
||||
* efficient (i.e. the architecture has 64-bit registers and can
|
||||
* do 64-bit operations as fast as 32-bit operations).
|
||||
*
|
||||
#define BR_64 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_LOMUL is enabled, then multiplications of 32-bit values whose
|
||||
* result are truncated to the low 32 bits are assumed to be
|
||||
* substantially more efficient than 32-bit multiplications that yield
|
||||
* 64-bit results. This is typically the case on low-end ARM Cortex M
|
||||
* systems (M0, M0+, M1, and arguably M3 and M4 as well).
|
||||
*
|
||||
#define BR_LOMUL 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_SLOW_MUL is enabled, multiplications are assumed to be
|
||||
* substantially slow with regards to other integer operations, thus
|
||||
* making it worth to make more operations for a given task if it allows
|
||||
* using less multiplications.
|
||||
*
|
||||
#define BR_SLOW_MUL 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_SLOW_MUL15 is enabled, short multplications (on 15-bit words)
|
||||
* are assumed to be substantially slow with regards to other integer
|
||||
* operations, thus making it worth to make more integer operations if
|
||||
* it allows using less multiplications.
|
||||
*
|
||||
#define BR_SLOW_MUL15 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_CT_MUL31 is enabled, multiplications of 31-bit values (used
|
||||
* in the "i31" big integer implementation) use an alternate implementation
|
||||
* which is slower and larger than the normal multiplication, but should
|
||||
* ensure constant-time multiplications even on architectures where the
|
||||
* multiplication opcode takes a variable number of cycles to complete.
|
||||
*
|
||||
#define BR_CT_MUL31 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_CT_MUL15 is enabled, multiplications of 15-bit values (held
|
||||
* in 32-bit words) use an alternate implementation which is slower and
|
||||
* larger than the normal multiplication, but should ensure
|
||||
* constant-time multiplications on most/all architectures where the
|
||||
* basic multiplication is not constant-time.
|
||||
#define BR_CT_MUL15 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_NO_ARITH_SHIFT is enabled, arithmetic right shifts (with sign
|
||||
* extension) are performed with a sequence of operations which is bigger
|
||||
* and slower than a simple right shift on a signed value. This avoids
|
||||
* relying on an implementation-defined behaviour. However, most if not
|
||||
* all C compilers use sign extension for right shifts on signed values,
|
||||
* so this alternate macro is disabled by default.
|
||||
#define BR_NO_ARITH_SHIFT 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_RDRAND is enabled, the SSL engine will use the RDRAND opcode
|
||||
* to automatically obtain quality randomness for seeding its internal
|
||||
* PRNG. Since that opcode is present only in recent x86 CPU, its
|
||||
* support is dynamically tested; if the current CPU does not support
|
||||
* it, then another random source will be used, such as /dev/urandom or
|
||||
* CryptGenRandom().
|
||||
*
|
||||
#define BR_RDRAND 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_USE_GETENTROPY is enabled, the SSL engine will use the
|
||||
* getentropy() function to obtain quality randomness for seeding its
|
||||
* internal PRNG. On Linux and FreeBSD, getentropy() is implemented by
|
||||
* the standard library with the system call getrandom(); on OpenBSD,
|
||||
* getentropy() is the system call, and there is no getrandom() wrapper,
|
||||
* hence the use of the getentropy() function for maximum portability.
|
||||
*
|
||||
* If the getentropy() call fails, and BR_USE_URANDOM is not explicitly
|
||||
* disabled, then /dev/urandom will be used as a fallback mechanism. On
|
||||
* FreeBSD and OpenBSD, this does not change much, since /dev/urandom
|
||||
* will block if not enough entropy has been obtained since last boot.
|
||||
* On Linux, /dev/urandom might not block, which can be troublesome in
|
||||
* early boot stages, which is why getentropy() is preferred.
|
||||
*
|
||||
#define BR_USE_GETENTROPY 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_USE_URANDOM is enabled, the SSL engine will use /dev/urandom
|
||||
* to automatically obtain quality randomness for seeding its internal
|
||||
* PRNG.
|
||||
*
|
||||
#define BR_USE_URANDOM 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_USE_WIN32_RAND is enabled, the SSL engine will use the Win32
|
||||
* (CryptoAPI) functions (CryptAcquireContext(), CryptGenRandom()...) to
|
||||
* automatically obtain quality randomness for seeding its internal PRNG.
|
||||
*
|
||||
* Note: if both BR_USE_URANDOM and BR_USE_WIN32_RAND are defined, the
|
||||
* former takes precedence.
|
||||
*
|
||||
#define BR_USE_WIN32_RAND 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_USE_UNIX_TIME is enabled, the X.509 validation engine obtains
|
||||
* the current time from the OS by calling time(), and assuming that the
|
||||
* returned value (a 'time_t') is an integer that counts time in seconds
|
||||
* since the Unix Epoch (Jan 1st, 1970, 00:00 UTC).
|
||||
*
|
||||
#define BR_USE_UNIX_TIME 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_USE_WIN32_TIME is enabled, the X.509 validation engine obtains
|
||||
* the current time from the OS by calling the Win32 function
|
||||
* GetSystemTimeAsFileTime().
|
||||
*
|
||||
* Note: if both BR_USE_UNIX_TIME and BR_USE_WIN32_TIME are defined, the
|
||||
* former takes precedence.
|
||||
*
|
||||
#define BR_USE_WIN32_TIME 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_ARMEL_CORTEXM_GCC is enabled, some operations are replaced with
|
||||
* inline assembly which is shorter and/or faster. This should be used
|
||||
* only when all of the following are true:
|
||||
* - target architecture is ARM in Thumb mode
|
||||
* - target endianness is little-endian
|
||||
* - compiler is GCC (or GCC-compatible for inline assembly syntax)
|
||||
*
|
||||
* This is meant for the low-end cores (Cortex M0, M0+, M1, M3).
|
||||
* Note: if BR_LOMUL is not explicitly enabled or disabled, then
|
||||
* enabling BR_ARMEL_CORTEXM_GCC also enables BR_LOMUL.
|
||||
*
|
||||
#define BR_ARMEL_CORTEXM_GCC 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_AES_X86NI is enabled, the AES implementation using the x86 "NI"
|
||||
* instructions (dedicated AES opcodes) will be compiled. If this is not
|
||||
* enabled explicitly, then that AES implementation will be compiled only
|
||||
* if a compatible compiler is detected. If set explicitly to 0, the
|
||||
* implementation will not be compiled at all.
|
||||
*
|
||||
#define BR_AES_X86NI 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_SSE2 is enabled, SSE2 intrinsics will be used for some
|
||||
* algorithm implementations that use them (e.g. chacha20_sse2). If this
|
||||
* is not enabled explicitly, then support for SSE2 intrinsics will be
|
||||
* automatically detected. If set explicitly to 0, then SSE2 code will
|
||||
* not be compiled at all.
|
||||
*
|
||||
#define BR_SSE2 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_POWER8 is enabled, the AES implementation using the POWER ISA
|
||||
* 2.07 opcodes (available on POWER8 processors and later) is compiled.
|
||||
* If this is not enabled explicitly, then that implementation will be
|
||||
* compiled only if a compatible compiler is detected, _and_ the target
|
||||
* architecture is POWER8 or later.
|
||||
*
|
||||
#define BR_POWER8 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_INT128 is enabled, then code using the 'unsigned __int64'
|
||||
* and 'unsigned __int128' types will be used to leverage 64x64->128
|
||||
* unsigned multiplications. This should work with GCC and compatible
|
||||
* compilers on 64-bit architectures.
|
||||
*
|
||||
#define BR_INT128 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_UMUL128 is enabled, then code using the '_umul128()' and
|
||||
* '_addcarry_u64()' intrinsics will be used to implement 64x64->128
|
||||
* unsigned multiplications. This should work on Visual C on x64 systems.
|
||||
*
|
||||
#define BR_UMUL128 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_LE_UNALIGNED is enabled, then the current architecture is
|
||||
* assumed to use little-endian encoding for integers, and to tolerate
|
||||
* unaligned accesses with no or minimal time penalty.
|
||||
*
|
||||
#define BR_LE_UNALIGNED 1
|
||||
*/
|
||||
|
||||
/*
|
||||
* When BR_BE_UNALIGNED is enabled, then the current architecture is
|
||||
* assumed to use big-endian encoding for integers, and to tolerate
|
||||
* unaligned accesses with no or minimal time penalty.
|
||||
*
|
||||
#define BR_BE_UNALIGNED 1
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_range_dec16be(uint16_t *v, size_t num, const void *src)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
|
||||
buf = src;
|
||||
while (num -- > 0) {
|
||||
*v ++ = br_dec16be(buf);
|
||||
buf += 2;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_range_dec16le(uint16_t *v, size_t num, const void *src)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
|
||||
buf = src;
|
||||
while (num -- > 0) {
|
||||
*v ++ = br_dec16le(buf);
|
||||
buf += 2;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_range_dec32be(uint32_t *v, size_t num, const void *src)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
|
||||
buf = src;
|
||||
while (num -- > 0) {
|
||||
*v ++ = br_dec32be(buf);
|
||||
buf += 4;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_range_dec32le(uint32_t *v, size_t num, const void *src)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
|
||||
buf = src;
|
||||
while (num -- > 0) {
|
||||
*v ++ = br_dec32le(buf);
|
||||
buf += 4;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_range_dec64be(uint64_t *v, size_t num, const void *src)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
|
||||
buf = src;
|
||||
while (num -- > 0) {
|
||||
*v ++ = br_dec64be(buf);
|
||||
buf += 8;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_range_dec64le(uint64_t *v, size_t num, const void *src)
|
||||
{
|
||||
const unsigned char *buf;
|
||||
|
||||
buf = src;
|
||||
while (num -- > 0) {
|
||||
*v ++ = br_dec64le(buf);
|
||||
buf += 8;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,411 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* During key schedule, we need to apply bit extraction PC-2 then permute
|
||||
* things into our bitslice representation. PC-2 extracts 48 bits out
|
||||
* of two 28-bit words (kl and kr), and we store these bits into two
|
||||
* 32-bit words sk0 and sk1.
|
||||
*
|
||||
* -- bit 16+x of sk0 comes from bit QL0[x] of kl
|
||||
* -- bit x of sk0 comes from bit QR0[x] of kr
|
||||
* -- bit 16+x of sk1 comes from bit QL1[x] of kl
|
||||
* -- bit x of sk1 comes from bit QR1[x] of kr
|
||||
*/
|
||||
|
||||
static const unsigned char QL0[] = {
|
||||
17, 4, 27, 23, 13, 22, 7, 18,
|
||||
16, 24, 2, 20, 1, 8, 15, 26
|
||||
};
|
||||
|
||||
static const unsigned char QR0[] = {
|
||||
25, 19, 9, 1, 5, 11, 23, 8,
|
||||
17, 0, 22, 3, 6, 20, 27, 24
|
||||
};
|
||||
|
||||
static const unsigned char QL1[] = {
|
||||
28, 28, 14, 11, 28, 28, 25, 0,
|
||||
28, 28, 5, 9, 28, 28, 12, 21
|
||||
};
|
||||
|
||||
static const unsigned char QR1[] = {
|
||||
28, 28, 15, 4, 28, 28, 26, 16,
|
||||
28, 28, 12, 7, 28, 28, 10, 14
|
||||
};
|
||||
|
||||
/*
|
||||
* 32-bit rotation. The C compiler is supposed to recognize it as a
|
||||
* rotation and use the local architecture rotation opcode (if available).
|
||||
*/
|
||||
static inline uint32_t
|
||||
rotl(uint32_t x, int n)
|
||||
{
|
||||
return (x << n) | (x >> (32 - n));
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute key schedule for 8 key bytes (produces 32 subkey words).
|
||||
*/
|
||||
static void
|
||||
keysched_unit(uint32_t *skey, const void *key)
|
||||
{
|
||||
int i;
|
||||
|
||||
br_des_keysched_unit(skey, key);
|
||||
|
||||
/*
|
||||
* Apply PC-2 + bitslicing.
|
||||
*/
|
||||
for (i = 0; i < 16; i ++) {
|
||||
uint32_t kl, kr, sk0, sk1;
|
||||
int j;
|
||||
|
||||
kl = skey[(i << 1) + 0];
|
||||
kr = skey[(i << 1) + 1];
|
||||
sk0 = 0;
|
||||
sk1 = 0;
|
||||
for (j = 0; j < 16; j ++) {
|
||||
sk0 <<= 1;
|
||||
sk1 <<= 1;
|
||||
sk0 |= ((kl >> QL0[j]) & (uint32_t)1) << 16;
|
||||
sk0 |= (kr >> QR0[j]) & (uint32_t)1;
|
||||
sk1 |= ((kl >> QL1[j]) & (uint32_t)1) << 16;
|
||||
sk1 |= (kr >> QR1[j]) & (uint32_t)1;
|
||||
}
|
||||
|
||||
skey[(i << 1) + 0] = sk0;
|
||||
skey[(i << 1) + 1] = sk1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/*
|
||||
* Speed-optimized version for PC-2 + bitslicing.
|
||||
* (Unused. Kept for reference only.)
|
||||
*/
|
||||
sk0 = kl & (uint32_t)0x00100000;
|
||||
sk0 |= (kl & (uint32_t)0x08008000) << 2;
|
||||
sk0 |= (kl & (uint32_t)0x00400000) << 4;
|
||||
sk0 |= (kl & (uint32_t)0x00800000) << 5;
|
||||
sk0 |= (kl & (uint32_t)0x00040000) << 6;
|
||||
sk0 |= (kl & (uint32_t)0x00010000) << 7;
|
||||
sk0 |= (kl & (uint32_t)0x00000100) << 10;
|
||||
sk0 |= (kl & (uint32_t)0x00022000) << 14;
|
||||
sk0 |= (kl & (uint32_t)0x00000082) << 18;
|
||||
sk0 |= (kl & (uint32_t)0x00000004) << 19;
|
||||
sk0 |= (kl & (uint32_t)0x04000000) >> 10;
|
||||
sk0 |= (kl & (uint32_t)0x00000010) << 26;
|
||||
sk0 |= (kl & (uint32_t)0x01000000) >> 2;
|
||||
|
||||
sk0 |= kr & (uint32_t)0x00000100;
|
||||
sk0 |= (kr & (uint32_t)0x00000008) << 1;
|
||||
sk0 |= (kr & (uint32_t)0x00000200) << 4;
|
||||
sk0 |= rotl(kr & (uint32_t)0x08000021, 6);
|
||||
sk0 |= (kr & (uint32_t)0x01000000) >> 24;
|
||||
sk0 |= (kr & (uint32_t)0x00000002) << 11;
|
||||
sk0 |= (kr & (uint32_t)0x00100000) >> 18;
|
||||
sk0 |= (kr & (uint32_t)0x00400000) >> 17;
|
||||
sk0 |= (kr & (uint32_t)0x00800000) >> 14;
|
||||
sk0 |= (kr & (uint32_t)0x02020000) >> 10;
|
||||
sk0 |= (kr & (uint32_t)0x00080000) >> 5;
|
||||
sk0 |= (kr & (uint32_t)0x00000040) >> 3;
|
||||
sk0 |= (kr & (uint32_t)0x00000800) >> 1;
|
||||
|
||||
sk1 = kl & (uint32_t)0x02000000;
|
||||
sk1 |= (kl & (uint32_t)0x00001000) << 5;
|
||||
sk1 |= (kl & (uint32_t)0x00000200) << 11;
|
||||
sk1 |= (kl & (uint32_t)0x00004000) << 15;
|
||||
sk1 |= (kl & (uint32_t)0x00000020) << 16;
|
||||
sk1 |= (kl & (uint32_t)0x00000800) << 17;
|
||||
sk1 |= (kl & (uint32_t)0x00000001) << 24;
|
||||
sk1 |= (kl & (uint32_t)0x00200000) >> 5;
|
||||
|
||||
sk1 |= (kr & (uint32_t)0x00000010) << 8;
|
||||
sk1 |= (kr & (uint32_t)0x04000000) >> 17;
|
||||
sk1 |= (kr & (uint32_t)0x00004000) >> 14;
|
||||
sk1 |= (kr & (uint32_t)0x00000400) >> 9;
|
||||
sk1 |= (kr & (uint32_t)0x00010000) >> 8;
|
||||
sk1 |= (kr & (uint32_t)0x00001000) >> 7;
|
||||
sk1 |= (kr & (uint32_t)0x00000080) >> 3;
|
||||
sk1 |= (kr & (uint32_t)0x00008000) >> 2;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
unsigned
|
||||
br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len)
|
||||
{
|
||||
switch (key_len) {
|
||||
case 8:
|
||||
keysched_unit(skey, key);
|
||||
return 1;
|
||||
case 16:
|
||||
keysched_unit(skey, key);
|
||||
keysched_unit(skey + 32, (const unsigned char *)key + 8);
|
||||
br_des_rev_skey(skey + 32);
|
||||
memcpy(skey + 64, skey, 32 * sizeof *skey);
|
||||
return 3;
|
||||
default:
|
||||
keysched_unit(skey, key);
|
||||
keysched_unit(skey + 32, (const unsigned char *)key + 8);
|
||||
br_des_rev_skey(skey + 32);
|
||||
keysched_unit(skey + 64, (const unsigned char *)key + 16);
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* DES confusion function. This function performs expansion E (32 to
|
||||
* 48 bits), XOR with subkey, S-boxes, and permutation P.
|
||||
*/
|
||||
static inline uint32_t
|
||||
Fconf(uint32_t r0, const uint32_t *sk)
|
||||
{
|
||||
/*
|
||||
* Each 6->4 S-box is virtually turned into four 6->1 boxes; we
|
||||
* thus end up with 32 boxes that we call "T-boxes" here. We will
|
||||
* evaluate them with bitslice code.
|
||||
*
|
||||
* Each T-box is a circuit of multiplexers (sort of) and thus
|
||||
* takes 70 inputs: the 6 actual T-box inputs, and 64 constants
|
||||
* that describe the T-box output for all combinations of the
|
||||
* 6 inputs. With this model, all T-boxes are identical (with
|
||||
* distinct inputs) and thus can be executed in parallel with
|
||||
* bitslice code.
|
||||
*
|
||||
* T-boxes are numbered from 0 to 31, in least-to-most
|
||||
* significant order. Thus, S-box S1 corresponds to T-boxes 31,
|
||||
* 30, 29 and 28, in that order. T-box 'n' is computed with the
|
||||
* bits at rank 'n' in the 32-bit words.
|
||||
*
|
||||
* Words x0 to x5 contain the T-box inputs 0 to 5.
|
||||
*/
|
||||
uint32_t x0, x1, x2, x3, x4, x5, z0;
|
||||
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
|
||||
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
|
||||
uint32_t y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
|
||||
uint32_t y30;
|
||||
|
||||
/*
|
||||
* Spread input bits over the 6 input words x*.
|
||||
*/
|
||||
x1 = r0 & (uint32_t)0x11111111;
|
||||
x2 = (r0 >> 1) & (uint32_t)0x11111111;
|
||||
x3 = (r0 >> 2) & (uint32_t)0x11111111;
|
||||
x4 = (r0 >> 3) & (uint32_t)0x11111111;
|
||||
x1 = (x1 << 4) - x1;
|
||||
x2 = (x2 << 4) - x2;
|
||||
x3 = (x3 << 4) - x3;
|
||||
x4 = (x4 << 4) - x4;
|
||||
x0 = (x4 << 4) | (x4 >> 28);
|
||||
x5 = (x1 >> 4) | (x1 << 28);
|
||||
|
||||
/*
|
||||
* XOR with the subkey for this round.
|
||||
*/
|
||||
x0 ^= sk[0];
|
||||
x1 ^= sk[1];
|
||||
x2 ^= sk[2];
|
||||
x3 ^= sk[3];
|
||||
x4 ^= sk[4];
|
||||
x5 ^= sk[5];
|
||||
|
||||
/*
|
||||
* The T-boxes are done in parallel, since they all use a
|
||||
* "tree of multiplexer". We use "fake multiplexers":
|
||||
*
|
||||
* y = a ^ (x & b)
|
||||
*
|
||||
* computes y as either 'a' (if x == 0) or 'a ^ b' (if x == 1).
|
||||
*/
|
||||
y0 = (uint32_t)0xEFA72C4D ^ (x0 & (uint32_t)0xEC7AC69C);
|
||||
y1 = (uint32_t)0xAEAAEDFF ^ (x0 & (uint32_t)0x500FB821);
|
||||
y2 = (uint32_t)0x37396665 ^ (x0 & (uint32_t)0x40EFA809);
|
||||
y3 = (uint32_t)0x68D7B833 ^ (x0 & (uint32_t)0xA5EC0B28);
|
||||
y4 = (uint32_t)0xC9C755BB ^ (x0 & (uint32_t)0x252CF820);
|
||||
y5 = (uint32_t)0x73FC3606 ^ (x0 & (uint32_t)0x40205801);
|
||||
y6 = (uint32_t)0xA2A0A918 ^ (x0 & (uint32_t)0xE220F929);
|
||||
y7 = (uint32_t)0x8222BD90 ^ (x0 & (uint32_t)0x44A3F9E1);
|
||||
y8 = (uint32_t)0xD6B6AC77 ^ (x0 & (uint32_t)0x794F104A);
|
||||
y9 = (uint32_t)0x3069300C ^ (x0 & (uint32_t)0x026F320B);
|
||||
y10 = (uint32_t)0x6CE0D5CC ^ (x0 & (uint32_t)0x7640B01A);
|
||||
y11 = (uint32_t)0x59A9A22D ^ (x0 & (uint32_t)0x238F1572);
|
||||
y12 = (uint32_t)0xAC6D0BD4 ^ (x0 & (uint32_t)0x7A63C083);
|
||||
y13 = (uint32_t)0x21C83200 ^ (x0 & (uint32_t)0x11CCA000);
|
||||
y14 = (uint32_t)0xA0E62188 ^ (x0 & (uint32_t)0x202F69AA);
|
||||
/* y15 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
|
||||
y16 = (uint32_t)0xAF7D655A ^ (x0 & (uint32_t)0x51B33BE9);
|
||||
y17 = (uint32_t)0xF0168AA3 ^ (x0 & (uint32_t)0x3B0FE8AE);
|
||||
y18 = (uint32_t)0x90AA30C6 ^ (x0 & (uint32_t)0x90BF8816);
|
||||
y19 = (uint32_t)0x5AB2750A ^ (x0 & (uint32_t)0x09E34F9B);
|
||||
y20 = (uint32_t)0x5391BE65 ^ (x0 & (uint32_t)0x0103BE88);
|
||||
y21 = (uint32_t)0x93372BAF ^ (x0 & (uint32_t)0x49AC8E25);
|
||||
y22 = (uint32_t)0xF288210C ^ (x0 & (uint32_t)0x922C313D);
|
||||
y23 = (uint32_t)0x920AF5C0 ^ (x0 & (uint32_t)0x70EF31B0);
|
||||
y24 = (uint32_t)0x63D312C0 ^ (x0 & (uint32_t)0x6A707100);
|
||||
y25 = (uint32_t)0x537B3006 ^ (x0 & (uint32_t)0xB97C9011);
|
||||
y26 = (uint32_t)0xA2EFB0A5 ^ (x0 & (uint32_t)0xA320C959);
|
||||
y27 = (uint32_t)0xBC8F96A5 ^ (x0 & (uint32_t)0x6EA0AB4A);
|
||||
y28 = (uint32_t)0xFAD176A5 ^ (x0 & (uint32_t)0x6953DDF8);
|
||||
y29 = (uint32_t)0x665A14A3 ^ (x0 & (uint32_t)0xF74F3E2B);
|
||||
y30 = (uint32_t)0xF2EFF0CC ^ (x0 & (uint32_t)0xF0306CAD);
|
||||
/* y31 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
|
||||
|
||||
y0 = y0 ^ (x1 & y1);
|
||||
y1 = y2 ^ (x1 & y3);
|
||||
y2 = y4 ^ (x1 & y5);
|
||||
y3 = y6 ^ (x1 & y7);
|
||||
y4 = y8 ^ (x1 & y9);
|
||||
y5 = y10 ^ (x1 & y11);
|
||||
y6 = y12 ^ (x1 & y13);
|
||||
y7 = y14; /* was: y14 ^ (x1 & y15) */
|
||||
y8 = y16 ^ (x1 & y17);
|
||||
y9 = y18 ^ (x1 & y19);
|
||||
y10 = y20 ^ (x1 & y21);
|
||||
y11 = y22 ^ (x1 & y23);
|
||||
y12 = y24 ^ (x1 & y25);
|
||||
y13 = y26 ^ (x1 & y27);
|
||||
y14 = y28 ^ (x1 & y29);
|
||||
y15 = y30; /* was: y30 ^ (x1 & y31) */
|
||||
|
||||
y0 = y0 ^ (x2 & y1);
|
||||
y1 = y2 ^ (x2 & y3);
|
||||
y2 = y4 ^ (x2 & y5);
|
||||
y3 = y6 ^ (x2 & y7);
|
||||
y4 = y8 ^ (x2 & y9);
|
||||
y5 = y10 ^ (x2 & y11);
|
||||
y6 = y12 ^ (x2 & y13);
|
||||
y7 = y14 ^ (x2 & y15);
|
||||
|
||||
y0 = y0 ^ (x3 & y1);
|
||||
y1 = y2 ^ (x3 & y3);
|
||||
y2 = y4 ^ (x3 & y5);
|
||||
y3 = y6 ^ (x3 & y7);
|
||||
|
||||
y0 = y0 ^ (x4 & y1);
|
||||
y1 = y2 ^ (x4 & y3);
|
||||
|
||||
y0 = y0 ^ (x5 & y1);
|
||||
|
||||
/*
|
||||
* The P permutation:
|
||||
* -- Each bit move is converted into a mask + left rotation.
|
||||
* -- Rotations that use the same movement are coalesced together.
|
||||
* -- Left and right shifts are used as alternatives to a rotation
|
||||
* where appropriate (this will help architectures that do not have
|
||||
* a rotation opcode).
|
||||
*/
|
||||
z0 = (y0 & (uint32_t)0x00000004) << 3;
|
||||
z0 |= (y0 & (uint32_t)0x00004000) << 4;
|
||||
z0 |= rotl(y0 & 0x12020120, 5);
|
||||
z0 |= (y0 & (uint32_t)0x00100000) << 6;
|
||||
z0 |= (y0 & (uint32_t)0x00008000) << 9;
|
||||
z0 |= (y0 & (uint32_t)0x04000000) >> 22;
|
||||
z0 |= (y0 & (uint32_t)0x00000001) << 11;
|
||||
z0 |= rotl(y0 & 0x20000200, 12);
|
||||
z0 |= (y0 & (uint32_t)0x00200000) >> 19;
|
||||
z0 |= (y0 & (uint32_t)0x00000040) << 14;
|
||||
z0 |= (y0 & (uint32_t)0x00010000) << 15;
|
||||
z0 |= (y0 & (uint32_t)0x00000002) << 16;
|
||||
z0 |= rotl(y0 & 0x40801800, 17);
|
||||
z0 |= (y0 & (uint32_t)0x00080000) >> 13;
|
||||
z0 |= (y0 & (uint32_t)0x00000010) << 21;
|
||||
z0 |= (y0 & (uint32_t)0x01000000) >> 10;
|
||||
z0 |= rotl(y0 & 0x88000008, 24);
|
||||
z0 |= (y0 & (uint32_t)0x00000480) >> 7;
|
||||
z0 |= (y0 & (uint32_t)0x00442000) >> 6;
|
||||
return z0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process one block through 16 successive rounds, omitting the swap
|
||||
* in the final round.
|
||||
*/
|
||||
static void
|
||||
process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *sk_exp)
|
||||
{
|
||||
int i;
|
||||
uint32_t l, r;
|
||||
|
||||
l = *pl;
|
||||
r = *pr;
|
||||
for (i = 0; i < 16; i ++) {
|
||||
uint32_t t;
|
||||
|
||||
t = l ^ Fconf(r, sk_exp);
|
||||
l = r;
|
||||
r = t;
|
||||
sk_exp += 6;
|
||||
}
|
||||
*pl = r;
|
||||
*pr = l;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_des_ct_process_block(unsigned num_rounds,
|
||||
const uint32_t *sk_exp, void *block)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint32_t l, r;
|
||||
|
||||
buf = block;
|
||||
l = br_dec32be(buf);
|
||||
r = br_dec32be(buf + 4);
|
||||
br_des_do_IP(&l, &r);
|
||||
while (num_rounds -- > 0) {
|
||||
process_block_unit(&l, &r, sk_exp);
|
||||
sk_exp += 96;
|
||||
}
|
||||
br_des_do_invIP(&l, &r);
|
||||
br_enc32be(buf, l);
|
||||
br_enc32be(buf + 4, r);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_des_ct_skey_expand(uint32_t *sk_exp,
|
||||
unsigned num_rounds, const uint32_t *skey)
|
||||
{
|
||||
num_rounds <<= 4;
|
||||
while (num_rounds -- > 0) {
|
||||
uint32_t v, w0, w1, w2, w3;
|
||||
|
||||
v = *skey ++;
|
||||
w0 = v & 0x11111111;
|
||||
w1 = (v >> 1) & 0x11111111;
|
||||
w2 = (v >> 2) & 0x11111111;
|
||||
w3 = (v >> 3) & 0x11111111;
|
||||
*sk_exp ++ = (w0 << 4) - w0;
|
||||
*sk_exp ++ = (w1 << 4) - w1;
|
||||
*sk_exp ++ = (w2 << 4) - w2;
|
||||
*sk_exp ++ = (w3 << 4) - w3;
|
||||
v = *skey ++;
|
||||
w0 = v & 0x11111111;
|
||||
w1 = (v >> 1) & 0x11111111;
|
||||
*sk_exp ++ = (w0 << 4) - w0;
|
||||
*sk_exp ++ = (w1 << 4) - w1;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_des_ct_cbcdec_init(br_des_ct_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_des_ct_cbcdec_vtable;
|
||||
ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
|
||||
if (len == 8) {
|
||||
br_des_rev_skey(ctx->skey);
|
||||
} else {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 48; i += 2) {
|
||||
uint32_t t;
|
||||
|
||||
t = ctx->skey[i];
|
||||
ctx->skey[i] = ctx->skey[94 - i];
|
||||
ctx->skey[94 - i] = t;
|
||||
t = ctx->skey[i + 1];
|
||||
ctx->skey[i + 1] = ctx->skey[95 - i];
|
||||
ctx->skey[95 - i] = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_des_ct_cbcdec_run(const br_des_ct_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
uint32_t sk_exp[288];
|
||||
|
||||
br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
ivbuf = iv;
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
unsigned char tmp[8];
|
||||
int i;
|
||||
|
||||
memcpy(tmp, buf, 8);
|
||||
br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
|
||||
for (i = 0; i < 8; i ++) {
|
||||
buf[i] ^= ivbuf[i];
|
||||
}
|
||||
memcpy(ivbuf, tmp, 8);
|
||||
buf += 8;
|
||||
len -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_des_ct_cbcdec_vtable = {
|
||||
sizeof(br_des_ct_cbcdec_keys),
|
||||
8,
|
||||
3,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_des_ct_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_des_ct_cbcdec_run
|
||||
};
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_des_ct_cbcenc_init(br_des_ct_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_des_ct_cbcenc_vtable;
|
||||
ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_des_ct_cbcenc_run(const br_des_ct_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
uint32_t sk_exp[288];
|
||||
|
||||
br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
|
||||
ivbuf = iv;
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
buf[i] ^= ivbuf[i];
|
||||
}
|
||||
br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
|
||||
memcpy(ivbuf, buf, 8);
|
||||
buf += 8;
|
||||
len -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_des_ct_cbcenc_vtable = {
|
||||
sizeof(br_des_ct_cbcenc_keys),
|
||||
8,
|
||||
3,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_des_ct_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_des_ct_cbcenc_run
|
||||
};
|
||||
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_des_do_IP(uint32_t *xl, uint32_t *xr)
|
||||
{
|
||||
/*
|
||||
* Permutation algorithm is initially from Richard Outerbridge;
|
||||
* implementation here is adapted from Crypto++ "des.cpp" file
|
||||
* (which is in public domain).
|
||||
*/
|
||||
uint32_t l, r, t;
|
||||
|
||||
l = *xl;
|
||||
r = *xr;
|
||||
t = ((l >> 4) ^ r) & (uint32_t)0x0F0F0F0F;
|
||||
r ^= t;
|
||||
l ^= t << 4;
|
||||
t = ((l >> 16) ^ r) & (uint32_t)0x0000FFFF;
|
||||
r ^= t;
|
||||
l ^= t << 16;
|
||||
t = ((r >> 2) ^ l) & (uint32_t)0x33333333;
|
||||
l ^= t;
|
||||
r ^= t << 2;
|
||||
t = ((r >> 8) ^ l) & (uint32_t)0x00FF00FF;
|
||||
l ^= t;
|
||||
r ^= t << 8;
|
||||
t = ((l >> 1) ^ r) & (uint32_t)0x55555555;
|
||||
r ^= t;
|
||||
l ^= t << 1;
|
||||
*xl = l;
|
||||
*xr = r;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_des_do_invIP(uint32_t *xl, uint32_t *xr)
|
||||
{
|
||||
/*
|
||||
* See br_des_do_IP().
|
||||
*/
|
||||
uint32_t l, r, t;
|
||||
|
||||
l = *xl;
|
||||
r = *xr;
|
||||
t = ((l >> 1) ^ r) & 0x55555555;
|
||||
r ^= t;
|
||||
l ^= t << 1;
|
||||
t = ((r >> 8) ^ l) & 0x00FF00FF;
|
||||
l ^= t;
|
||||
r ^= t << 8;
|
||||
t = ((r >> 2) ^ l) & 0x33333333;
|
||||
l ^= t;
|
||||
r ^= t << 2;
|
||||
t = ((l >> 16) ^ r) & 0x0000FFFF;
|
||||
r ^= t;
|
||||
l ^= t << 16;
|
||||
t = ((l >> 4) ^ r) & 0x0F0F0F0F;
|
||||
r ^= t;
|
||||
l ^= t << 4;
|
||||
*xl = l;
|
||||
*xr = r;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_des_keysched_unit(uint32_t *skey, const void *key)
|
||||
{
|
||||
uint32_t xl, xr, kl, kr;
|
||||
int i;
|
||||
|
||||
xl = br_dec32be(key);
|
||||
xr = br_dec32be((const unsigned char *)key + 4);
|
||||
|
||||
/*
|
||||
* Permutation PC-1 is quite similar to the IP permutation.
|
||||
* Definition of IP (in FIPS 46-3 notations) is:
|
||||
* 58 50 42 34 26 18 10 2
|
||||
* 60 52 44 36 28 20 12 4
|
||||
* 62 54 46 38 30 22 14 6
|
||||
* 64 56 48 40 32 24 16 8
|
||||
* 57 49 41 33 25 17 9 1
|
||||
* 59 51 43 35 27 19 11 3
|
||||
* 61 53 45 37 29 21 13 5
|
||||
* 63 55 47 39 31 23 15 7
|
||||
*
|
||||
* Definition of PC-1 is:
|
||||
* 57 49 41 33 25 17 9 1
|
||||
* 58 50 42 34 26 18 10 2
|
||||
* 59 51 43 35 27 19 11 3
|
||||
* 60 52 44 36
|
||||
* 63 55 47 39 31 23 15 7
|
||||
* 62 54 46 38 30 22 14 6
|
||||
* 61 53 45 37 29 21 13 5
|
||||
* 28 20 12 4
|
||||
*/
|
||||
br_des_do_IP(&xl, &xr);
|
||||
kl = ((xr & (uint32_t)0xFF000000) >> 4)
|
||||
| ((xl & (uint32_t)0xFF000000) >> 12)
|
||||
| ((xr & (uint32_t)0x00FF0000) >> 12)
|
||||
| ((xl & (uint32_t)0x00FF0000) >> 20);
|
||||
kr = ((xr & (uint32_t)0x000000FF) << 20)
|
||||
| ((xl & (uint32_t)0x0000FF00) << 4)
|
||||
| ((xr & (uint32_t)0x0000FF00) >> 4)
|
||||
| ((xl & (uint32_t)0x000F0000) >> 16);
|
||||
|
||||
/*
|
||||
* For each round, rotate the two 28-bit words kl and kr.
|
||||
* The extraction of the 48-bit subkey (PC-2) is not done yet.
|
||||
*/
|
||||
for (i = 0; i < 16; i ++) {
|
||||
if ((1 << i) & 0x8103) {
|
||||
kl = (kl << 1) | (kl >> 27);
|
||||
kr = (kr << 1) | (kr >> 27);
|
||||
} else {
|
||||
kl = (kl << 2) | (kl >> 26);
|
||||
kr = (kr << 2) | (kr >> 26);
|
||||
}
|
||||
kl &= (uint32_t)0x0FFFFFFF;
|
||||
kr &= (uint32_t)0x0FFFFFFF;
|
||||
skey[(i << 1) + 0] = kl;
|
||||
skey[(i << 1) + 1] = kr;
|
||||
}
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_des_rev_skey(uint32_t *skey)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i += 2) {
|
||||
uint32_t t;
|
||||
|
||||
t = skey[i + 0];
|
||||
skey[i + 0] = skey[30 - i];
|
||||
skey[30 - i] = t;
|
||||
t = skey[i + 1];
|
||||
skey[i + 1] = skey[31 - i];
|
||||
skey[31 - i] = t;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,310 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* PC2left[x] tells where bit x goes when applying PC-2. 'x' is a bit
|
||||
* position in the left rotated key word. Both position are in normal
|
||||
* order (rightmost bit is 0).
|
||||
*/
|
||||
static const unsigned char PC2left[] = {
|
||||
16, 3, 7, 24, 20, 11, 24,
|
||||
13, 2, 10, 24, 22, 5, 15,
|
||||
23, 1, 9, 21, 12, 24, 6,
|
||||
4, 14, 18, 8, 17, 0, 19
|
||||
};
|
||||
|
||||
/*
|
||||
* Similar to PC2left[x], for the right rotated key word.
|
||||
*/
|
||||
static const unsigned char PC2right[] = {
|
||||
8, 18, 24, 6, 22, 15, 3,
|
||||
10, 12, 19, 5, 14, 11, 24,
|
||||
4, 23, 16, 9, 24, 20, 2,
|
||||
24, 7, 13, 0, 21, 17, 1
|
||||
};
|
||||
|
||||
/*
|
||||
* S-boxes and PC-1 merged.
|
||||
*/
|
||||
static const uint32_t S1[] = {
|
||||
0x00808200, 0x00000000, 0x00008000, 0x00808202,
|
||||
0x00808002, 0x00008202, 0x00000002, 0x00008000,
|
||||
0x00000200, 0x00808200, 0x00808202, 0x00000200,
|
||||
0x00800202, 0x00808002, 0x00800000, 0x00000002,
|
||||
0x00000202, 0x00800200, 0x00800200, 0x00008200,
|
||||
0x00008200, 0x00808000, 0x00808000, 0x00800202,
|
||||
0x00008002, 0x00800002, 0x00800002, 0x00008002,
|
||||
0x00000000, 0x00000202, 0x00008202, 0x00800000,
|
||||
0x00008000, 0x00808202, 0x00000002, 0x00808000,
|
||||
0x00808200, 0x00800000, 0x00800000, 0x00000200,
|
||||
0x00808002, 0x00008000, 0x00008200, 0x00800002,
|
||||
0x00000200, 0x00000002, 0x00800202, 0x00008202,
|
||||
0x00808202, 0x00008002, 0x00808000, 0x00800202,
|
||||
0x00800002, 0x00000202, 0x00008202, 0x00808200,
|
||||
0x00000202, 0x00800200, 0x00800200, 0x00000000,
|
||||
0x00008002, 0x00008200, 0x00000000, 0x00808002
|
||||
};
|
||||
|
||||
static const uint32_t S2[] = {
|
||||
0x40084010, 0x40004000, 0x00004000, 0x00084010,
|
||||
0x00080000, 0x00000010, 0x40080010, 0x40004010,
|
||||
0x40000010, 0x40084010, 0x40084000, 0x40000000,
|
||||
0x40004000, 0x00080000, 0x00000010, 0x40080010,
|
||||
0x00084000, 0x00080010, 0x40004010, 0x00000000,
|
||||
0x40000000, 0x00004000, 0x00084010, 0x40080000,
|
||||
0x00080010, 0x40000010, 0x00000000, 0x00084000,
|
||||
0x00004010, 0x40084000, 0x40080000, 0x00004010,
|
||||
0x00000000, 0x00084010, 0x40080010, 0x00080000,
|
||||
0x40004010, 0x40080000, 0x40084000, 0x00004000,
|
||||
0x40080000, 0x40004000, 0x00000010, 0x40084010,
|
||||
0x00084010, 0x00000010, 0x00004000, 0x40000000,
|
||||
0x00004010, 0x40084000, 0x00080000, 0x40000010,
|
||||
0x00080010, 0x40004010, 0x40000010, 0x00080010,
|
||||
0x00084000, 0x00000000, 0x40004000, 0x00004010,
|
||||
0x40000000, 0x40080010, 0x40084010, 0x00084000
|
||||
};
|
||||
|
||||
static const uint32_t S3[] = {
|
||||
0x00000104, 0x04010100, 0x00000000, 0x04010004,
|
||||
0x04000100, 0x00000000, 0x00010104, 0x04000100,
|
||||
0x00010004, 0x04000004, 0x04000004, 0x00010000,
|
||||
0x04010104, 0x00010004, 0x04010000, 0x00000104,
|
||||
0x04000000, 0x00000004, 0x04010100, 0x00000100,
|
||||
0x00010100, 0x04010000, 0x04010004, 0x00010104,
|
||||
0x04000104, 0x00010100, 0x00010000, 0x04000104,
|
||||
0x00000004, 0x04010104, 0x00000100, 0x04000000,
|
||||
0x04010100, 0x04000000, 0x00010004, 0x00000104,
|
||||
0x00010000, 0x04010100, 0x04000100, 0x00000000,
|
||||
0x00000100, 0x00010004, 0x04010104, 0x04000100,
|
||||
0x04000004, 0x00000100, 0x00000000, 0x04010004,
|
||||
0x04000104, 0x00010000, 0x04000000, 0x04010104,
|
||||
0x00000004, 0x00010104, 0x00010100, 0x04000004,
|
||||
0x04010000, 0x04000104, 0x00000104, 0x04010000,
|
||||
0x00010104, 0x00000004, 0x04010004, 0x00010100
|
||||
};
|
||||
|
||||
static const uint32_t S4[] = {
|
||||
0x80401000, 0x80001040, 0x80001040, 0x00000040,
|
||||
0x00401040, 0x80400040, 0x80400000, 0x80001000,
|
||||
0x00000000, 0x00401000, 0x00401000, 0x80401040,
|
||||
0x80000040, 0x00000000, 0x00400040, 0x80400000,
|
||||
0x80000000, 0x00001000, 0x00400000, 0x80401000,
|
||||
0x00000040, 0x00400000, 0x80001000, 0x00001040,
|
||||
0x80400040, 0x80000000, 0x00001040, 0x00400040,
|
||||
0x00001000, 0x00401040, 0x80401040, 0x80000040,
|
||||
0x00400040, 0x80400000, 0x00401000, 0x80401040,
|
||||
0x80000040, 0x00000000, 0x00000000, 0x00401000,
|
||||
0x00001040, 0x00400040, 0x80400040, 0x80000000,
|
||||
0x80401000, 0x80001040, 0x80001040, 0x00000040,
|
||||
0x80401040, 0x80000040, 0x80000000, 0x00001000,
|
||||
0x80400000, 0x80001000, 0x00401040, 0x80400040,
|
||||
0x80001000, 0x00001040, 0x00400000, 0x80401000,
|
||||
0x00000040, 0x00400000, 0x00001000, 0x00401040
|
||||
};
|
||||
|
||||
static const uint32_t S5[] = {
|
||||
0x00000080, 0x01040080, 0x01040000, 0x21000080,
|
||||
0x00040000, 0x00000080, 0x20000000, 0x01040000,
|
||||
0x20040080, 0x00040000, 0x01000080, 0x20040080,
|
||||
0x21000080, 0x21040000, 0x00040080, 0x20000000,
|
||||
0x01000000, 0x20040000, 0x20040000, 0x00000000,
|
||||
0x20000080, 0x21040080, 0x21040080, 0x01000080,
|
||||
0x21040000, 0x20000080, 0x00000000, 0x21000000,
|
||||
0x01040080, 0x01000000, 0x21000000, 0x00040080,
|
||||
0x00040000, 0x21000080, 0x00000080, 0x01000000,
|
||||
0x20000000, 0x01040000, 0x21000080, 0x20040080,
|
||||
0x01000080, 0x20000000, 0x21040000, 0x01040080,
|
||||
0x20040080, 0x00000080, 0x01000000, 0x21040000,
|
||||
0x21040080, 0x00040080, 0x21000000, 0x21040080,
|
||||
0x01040000, 0x00000000, 0x20040000, 0x21000000,
|
||||
0x00040080, 0x01000080, 0x20000080, 0x00040000,
|
||||
0x00000000, 0x20040000, 0x01040080, 0x20000080
|
||||
};
|
||||
|
||||
static const uint32_t S6[] = {
|
||||
0x10000008, 0x10200000, 0x00002000, 0x10202008,
|
||||
0x10200000, 0x00000008, 0x10202008, 0x00200000,
|
||||
0x10002000, 0x00202008, 0x00200000, 0x10000008,
|
||||
0x00200008, 0x10002000, 0x10000000, 0x00002008,
|
||||
0x00000000, 0x00200008, 0x10002008, 0x00002000,
|
||||
0x00202000, 0x10002008, 0x00000008, 0x10200008,
|
||||
0x10200008, 0x00000000, 0x00202008, 0x10202000,
|
||||
0x00002008, 0x00202000, 0x10202000, 0x10000000,
|
||||
0x10002000, 0x00000008, 0x10200008, 0x00202000,
|
||||
0x10202008, 0x00200000, 0x00002008, 0x10000008,
|
||||
0x00200000, 0x10002000, 0x10000000, 0x00002008,
|
||||
0x10000008, 0x10202008, 0x00202000, 0x10200000,
|
||||
0x00202008, 0x10202000, 0x00000000, 0x10200008,
|
||||
0x00000008, 0x00002000, 0x10200000, 0x00202008,
|
||||
0x00002000, 0x00200008, 0x10002008, 0x00000000,
|
||||
0x10202000, 0x10000000, 0x00200008, 0x10002008
|
||||
};
|
||||
|
||||
static const uint32_t S7[] = {
|
||||
0x00100000, 0x02100001, 0x02000401, 0x00000000,
|
||||
0x00000400, 0x02000401, 0x00100401, 0x02100400,
|
||||
0x02100401, 0x00100000, 0x00000000, 0x02000001,
|
||||
0x00000001, 0x02000000, 0x02100001, 0x00000401,
|
||||
0x02000400, 0x00100401, 0x00100001, 0x02000400,
|
||||
0x02000001, 0x02100000, 0x02100400, 0x00100001,
|
||||
0x02100000, 0x00000400, 0x00000401, 0x02100401,
|
||||
0x00100400, 0x00000001, 0x02000000, 0x00100400,
|
||||
0x02000000, 0x00100400, 0x00100000, 0x02000401,
|
||||
0x02000401, 0x02100001, 0x02100001, 0x00000001,
|
||||
0x00100001, 0x02000000, 0x02000400, 0x00100000,
|
||||
0x02100400, 0x00000401, 0x00100401, 0x02100400,
|
||||
0x00000401, 0x02000001, 0x02100401, 0x02100000,
|
||||
0x00100400, 0x00000000, 0x00000001, 0x02100401,
|
||||
0x00000000, 0x00100401, 0x02100000, 0x00000400,
|
||||
0x02000001, 0x02000400, 0x00000400, 0x00100001
|
||||
};
|
||||
|
||||
static const uint32_t S8[] = {
|
||||
0x08000820, 0x00000800, 0x00020000, 0x08020820,
|
||||
0x08000000, 0x08000820, 0x00000020, 0x08000000,
|
||||
0x00020020, 0x08020000, 0x08020820, 0x00020800,
|
||||
0x08020800, 0x00020820, 0x00000800, 0x00000020,
|
||||
0x08020000, 0x08000020, 0x08000800, 0x00000820,
|
||||
0x00020800, 0x00020020, 0x08020020, 0x08020800,
|
||||
0x00000820, 0x00000000, 0x00000000, 0x08020020,
|
||||
0x08000020, 0x08000800, 0x00020820, 0x00020000,
|
||||
0x00020820, 0x00020000, 0x08020800, 0x00000800,
|
||||
0x00000020, 0x08020020, 0x00000800, 0x00020820,
|
||||
0x08000800, 0x00000020, 0x08000020, 0x08020000,
|
||||
0x08020020, 0x08000000, 0x00020000, 0x08000820,
|
||||
0x00000000, 0x08020820, 0x00020020, 0x08000020,
|
||||
0x08020000, 0x08000800, 0x08000820, 0x00000000,
|
||||
0x08020820, 0x00020800, 0x00020800, 0x00000820,
|
||||
0x00000820, 0x00020020, 0x08000000, 0x08020800
|
||||
};
|
||||
|
||||
static inline uint32_t
|
||||
Fconf(uint32_t r0, uint32_t skl, uint32_t skr)
|
||||
{
|
||||
uint32_t r1;
|
||||
|
||||
r1 = (r0 << 16) | (r0 >> 16);
|
||||
return
|
||||
S1[((r1 >> 11) ^ (skl >> 18)) & 0x3F]
|
||||
| S2[((r0 >> 23) ^ (skl >> 12)) & 0x3F]
|
||||
| S3[((r0 >> 19) ^ (skl >> 6)) & 0x3F]
|
||||
| S4[((r0 >> 15) ^ (skl )) & 0x3F]
|
||||
| S5[((r0 >> 11) ^ (skr >> 18)) & 0x3F]
|
||||
| S6[((r0 >> 7) ^ (skr >> 12)) & 0x3F]
|
||||
| S7[((r0 >> 3) ^ (skr >> 6)) & 0x3F]
|
||||
| S8[((r1 >> 15) ^ (skr )) & 0x3F];
|
||||
}
|
||||
|
||||
static void
|
||||
process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *skey)
|
||||
{
|
||||
int i;
|
||||
uint32_t l, r;
|
||||
|
||||
l = *pl;
|
||||
r = *pr;
|
||||
for (i = 0; i < 16; i ++) {
|
||||
uint32_t t;
|
||||
|
||||
t = l ^ Fconf(r, skey[(i << 1) + 0], skey[(i << 1) + 1]);
|
||||
l = r;
|
||||
r = t;
|
||||
}
|
||||
*pl = r;
|
||||
*pr = l;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
br_des_tab_process_block(unsigned num_rounds, const uint32_t *skey, void *block)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint32_t l, r;
|
||||
|
||||
buf = block;
|
||||
l = br_dec32be(buf);
|
||||
r = br_dec32be(buf + 4);
|
||||
br_des_do_IP(&l, &r);
|
||||
while (num_rounds -- > 0) {
|
||||
process_block_unit(&l, &r, skey);
|
||||
skey += 32;
|
||||
}
|
||||
br_des_do_invIP(&l, &r);
|
||||
br_enc32be(buf, l);
|
||||
br_enc32be(buf + 4, r);
|
||||
}
|
||||
|
||||
static void
|
||||
keysched_unit(uint32_t *skey, const void *key)
|
||||
{
|
||||
int i;
|
||||
|
||||
br_des_keysched_unit(skey, key);
|
||||
|
||||
/*
|
||||
* Apply PC-2 to get the 48-bit subkeys.
|
||||
*/
|
||||
for (i = 0; i < 16; i ++) {
|
||||
uint32_t xl, xr, ul, ur;
|
||||
int j;
|
||||
|
||||
xl = skey[(i << 1) + 0];
|
||||
xr = skey[(i << 1) + 1];
|
||||
ul = 0;
|
||||
ur = 0;
|
||||
for (j = 0; j < 28; j ++) {
|
||||
ul |= (xl & 1) << PC2left[j];
|
||||
ur |= (xr & 1) << PC2right[j];
|
||||
xl >>= 1;
|
||||
xr >>= 1;
|
||||
}
|
||||
skey[(i << 1) + 0] = ul;
|
||||
skey[(i << 1) + 1] = ur;
|
||||
}
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
unsigned
|
||||
br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len)
|
||||
{
|
||||
switch (key_len) {
|
||||
case 8:
|
||||
keysched_unit(skey, key);
|
||||
return 1;
|
||||
case 16:
|
||||
keysched_unit(skey, key);
|
||||
keysched_unit(skey + 32, (const unsigned char *)key + 8);
|
||||
br_des_rev_skey(skey + 32);
|
||||
memcpy(skey + 64, skey, 32 * sizeof *skey);
|
||||
return 3;
|
||||
default:
|
||||
keysched_unit(skey, key);
|
||||
keysched_unit(skey + 32, (const unsigned char *)key + 8);
|
||||
br_des_rev_skey(skey + 32);
|
||||
keysched_unit(skey + 64, (const unsigned char *)key + 16);
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_des_tab_cbcdec_init(br_des_tab_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_des_tab_cbcdec_vtable;
|
||||
ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
|
||||
if (len == 8) {
|
||||
br_des_rev_skey(ctx->skey);
|
||||
} else {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 48; i += 2) {
|
||||
uint32_t t;
|
||||
|
||||
t = ctx->skey[i];
|
||||
ctx->skey[i] = ctx->skey[94 - i];
|
||||
ctx->skey[94 - i] = t;
|
||||
t = ctx->skey[i + 1];
|
||||
ctx->skey[i + 1] = ctx->skey[95 - i];
|
||||
ctx->skey[95 - i] = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_des_tab_cbcdec_run(const br_des_tab_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
|
||||
ivbuf = iv;
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
unsigned char tmp[8];
|
||||
int i;
|
||||
|
||||
memcpy(tmp, buf, 8);
|
||||
br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
|
||||
for (i = 0; i < 8; i ++) {
|
||||
buf[i] ^= ivbuf[i];
|
||||
}
|
||||
memcpy(ivbuf, tmp, 8);
|
||||
buf += 8;
|
||||
len -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_des_tab_cbcdec_vtable = {
|
||||
sizeof(br_des_tab_cbcdec_keys),
|
||||
8,
|
||||
3,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_des_tab_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_des_tab_cbcdec_run
|
||||
};
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_des_tab_cbcenc_init(br_des_tab_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_des_tab_cbcenc_vtable;
|
||||
ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_des_tab_cbcenc_run(const br_des_tab_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf, *ivbuf;
|
||||
|
||||
ivbuf = iv;
|
||||
buf = data;
|
||||
while (len > 0) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 8; i ++) {
|
||||
buf[i] ^= ivbuf[i];
|
||||
}
|
||||
br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
|
||||
memcpy(ivbuf, buf, 8);
|
||||
buf += 8;
|
||||
len -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_des_tab_cbcenc_vtable = {
|
||||
sizeof(br_des_tab_cbcenc_keys),
|
||||
8,
|
||||
3,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_des_tab_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_des_tab_cbcenc_run
|
||||
};
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* This file contains the encoded OID for the standard hash functions.
|
||||
* Such OID appear in, for instance, the PKCS#1 v1.5 padding for RSA
|
||||
* signatures.
|
||||
*/
|
||||
|
||||
static const unsigned char md5_OID[] = {
|
||||
0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05
|
||||
};
|
||||
|
||||
static const unsigned char sha1_OID[] = {
|
||||
0x2B, 0x0E, 0x03, 0x02, 0x1A
|
||||
};
|
||||
|
||||
static const unsigned char sha224_OID[] = {
|
||||
0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
|
||||
};
|
||||
|
||||
static const unsigned char sha256_OID[] = {
|
||||
0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
|
||||
};
|
||||
|
||||
static const unsigned char sha384_OID[] = {
|
||||
0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
|
||||
};
|
||||
|
||||
static const unsigned char sha512_OID[] = {
|
||||
0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
|
||||
};
|
||||
|
||||
/* see inner.h */
|
||||
const unsigned char *
|
||||
br_digest_OID(int digest_id, size_t *len)
|
||||
{
|
||||
switch (digest_id) {
|
||||
case br_md5_ID:
|
||||
*len = sizeof md5_OID;
|
||||
return md5_OID;
|
||||
case br_sha1_ID:
|
||||
*len = sizeof sha1_OID;
|
||||
return sha1_OID;
|
||||
case br_sha224_ID:
|
||||
*len = sizeof sha224_OID;
|
||||
return sha224_OID;
|
||||
case br_sha256_ID:
|
||||
*len = sizeof sha256_OID;
|
||||
return sha256_OID;
|
||||
case br_sha384_ID:
|
||||
*len = sizeof sha384_OID;
|
||||
return sha384_OID;
|
||||
case br_sha512_ID:
|
||||
*len = sizeof sha512_OID;
|
||||
return sha512_OID;
|
||||
default:
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see inner.h */
|
||||
size_t
|
||||
br_digest_size_by_ID(int digest_id)
|
||||
{
|
||||
switch (digest_id) {
|
||||
case br_md5sha1_ID:
|
||||
return br_md5_SIZE + br_sha1_SIZE;
|
||||
case br_md5_ID:
|
||||
return br_md5_SIZE;
|
||||
case br_sha1_ID:
|
||||
return br_sha1_SIZE;
|
||||
case br_sha224_ID:
|
||||
return br_sha224_SIZE;
|
||||
case br_sha256_ID:
|
||||
return br_sha256_SIZE;
|
||||
case br_sha384_ID:
|
||||
return br_sha384_SIZE;
|
||||
case br_sha512_ID:
|
||||
return br_sha512_SIZE;
|
||||
default:
|
||||
/* abort(); */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,525 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* Implementation Notes
|
||||
* ====================
|
||||
*
|
||||
* The combined CTR + CBC-MAC functions can only handle full blocks,
|
||||
* so some buffering is necessary. Moreover, EAX has a special padding
|
||||
* rule for CBC-MAC, which implies that we cannot compute the MAC over
|
||||
* the last received full block until we know whether we are at the
|
||||
* end of the data or not.
|
||||
*
|
||||
* - 'ptr' contains a value from 1 to 16, which is the number of bytes
|
||||
* accumulated in buf[] that still needs to be processed with the
|
||||
* current OMAC computation. Beware that this can go to 16: a
|
||||
* complete block cannot be processed until it is known whether it
|
||||
* is the last block or not. However, it can never be 0, because
|
||||
* OMAC^t works on an input that is at least one-block long.
|
||||
*
|
||||
* - When processing the message itself, CTR encryption/decryption is
|
||||
* also done at the same time. The first 'ptr' bytes of buf[] then
|
||||
* contains the encrypted bytes, while the last '16 - ptr' bytes of
|
||||
* buf[] are the remnants of the stream block, to be used against
|
||||
* the next input bytes, when available.
|
||||
*
|
||||
* - The current counter and running CBC-MAC values are kept in 'ctr'
|
||||
* and 'cbcmac', respectively.
|
||||
*
|
||||
* - The derived keys for padding are kept in L2 and L4 (double and
|
||||
* quadruple of Enc_K(0^n), in GF(2^128), respectively).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Start an OMAC computation; the first block is the big-endian
|
||||
* representation of the provided value ('val' must fit on one byte).
|
||||
* We make it a delayed block because it may also be the last one,
|
||||
*/
|
||||
static void
|
||||
omac_start(br_eax_context *ctx, unsigned val)
|
||||
{
|
||||
memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
|
||||
memset(ctx->buf, 0, sizeof ctx->buf);
|
||||
ctx->buf[15] = val;
|
||||
ctx->ptr = 16;
|
||||
}
|
||||
|
||||
/*
|
||||
* Double a value in finite field GF(2^128), defined with modulus
|
||||
* X^128+X^7+X^2+X+1.
|
||||
*/
|
||||
static void
|
||||
double_gf128(unsigned char *dst, const unsigned char *src)
|
||||
{
|
||||
unsigned cc;
|
||||
int i;
|
||||
|
||||
cc = 0x87 & -((unsigned)src[0] >> 7);
|
||||
for (i = 15; i >= 0; i --) {
|
||||
unsigned z;
|
||||
|
||||
z = (src[i] << 1) ^ cc;
|
||||
cc = z >> 8;
|
||||
dst[i] = (unsigned char)z;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply padding to the last block, currently in ctx->buf (with
|
||||
* ctx->ptr bytes), and finalize OMAC computation.
|
||||
*/
|
||||
static void
|
||||
do_pad(br_eax_context *ctx)
|
||||
{
|
||||
unsigned char *pad;
|
||||
size_t ptr, u;
|
||||
|
||||
ptr = ctx->ptr;
|
||||
if (ptr == 16) {
|
||||
pad = ctx->L2;
|
||||
} else {
|
||||
ctx->buf[ptr ++] = 0x80;
|
||||
memset(ctx->buf + ptr, 0x00, 16 - ptr);
|
||||
pad = ctx->L4;
|
||||
}
|
||||
for (u = 0; u < sizeof ctx->buf; u ++) {
|
||||
ctx->buf[u] ^= pad[u];
|
||||
}
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply CBC-MAC on the provided data, with buffering management.
|
||||
*
|
||||
* Upon entry, two situations are acceptable:
|
||||
*
|
||||
* ctx->ptr == 0: there is no data to process in ctx->buf
|
||||
* ctx->ptr == 16: there is a full block of unprocessed data in ctx->buf
|
||||
*
|
||||
* Upon exit, ctx->ptr may be zero only if it was already zero on entry,
|
||||
* and len == 0. In all other situations, ctx->ptr will be non-zero on
|
||||
* exit (and may have value 16).
|
||||
*/
|
||||
static void
|
||||
do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len)
|
||||
{
|
||||
size_t ptr;
|
||||
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
ptr = len & (size_t)15;
|
||||
if (ptr == 0) {
|
||||
len -= 16;
|
||||
ptr = 16;
|
||||
} else {
|
||||
len -= ptr;
|
||||
}
|
||||
if (ctx->ptr == 16) {
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
|
||||
ctx->buf, sizeof ctx->buf);
|
||||
}
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len);
|
||||
memcpy(ctx->buf, (const unsigned char *)data + len, ptr);
|
||||
ctx->ptr = ptr;
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx)
|
||||
{
|
||||
unsigned char tmp[16], iv[16];
|
||||
|
||||
ctx->vtable = &br_eax_vtable;
|
||||
ctx->bctx = bctx;
|
||||
|
||||
/*
|
||||
* Encrypt a whole-zero block to compute L2 and L4.
|
||||
*/
|
||||
memset(tmp, 0, sizeof tmp);
|
||||
memset(iv, 0, sizeof iv);
|
||||
(*bctx)->ctr(bctx, iv, tmp, sizeof tmp);
|
||||
double_gf128(ctx->L2, tmp);
|
||||
double_gf128(ctx->L4, ctx->L2);
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_capture(const br_eax_context *ctx, br_eax_state *st)
|
||||
{
|
||||
/*
|
||||
* We capture the three OMAC* states _after_ processing the
|
||||
* initial block (assuming that nonce, message and AAD are
|
||||
* all non-empty).
|
||||
*/
|
||||
int i;
|
||||
|
||||
memset(st->st, 0, sizeof st->st);
|
||||
for (i = 0; i < 3; i ++) {
|
||||
unsigned char tmp[16];
|
||||
|
||||
memset(tmp, 0, sizeof tmp);
|
||||
tmp[15] = (unsigned char)i;
|
||||
(*ctx->bctx)->mac(ctx->bctx, st->st[i], tmp, sizeof tmp);
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len)
|
||||
{
|
||||
/*
|
||||
* Process nonce with OMAC^0.
|
||||
*/
|
||||
omac_start(ctx, 0);
|
||||
do_cbcmac_chunk(ctx, nonce, len);
|
||||
do_pad(ctx);
|
||||
memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
|
||||
|
||||
/*
|
||||
* Start OMAC^1 for the AAD ("header" in the EAX specification).
|
||||
*/
|
||||
omac_start(ctx, 1);
|
||||
|
||||
/*
|
||||
* We use ctx->head[0] as temporary flag to mark that we are
|
||||
* using a "normal" reset().
|
||||
*/
|
||||
ctx->head[0] = 0;
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_reset_pre_aad(br_eax_context *ctx, const br_eax_state *st,
|
||||
const void *nonce, size_t len)
|
||||
{
|
||||
if (len == 0) {
|
||||
omac_start(ctx, 0);
|
||||
} else {
|
||||
memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);
|
||||
ctx->ptr = 0;
|
||||
do_cbcmac_chunk(ctx, nonce, len);
|
||||
}
|
||||
do_pad(ctx);
|
||||
memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
|
||||
|
||||
memcpy(ctx->cbcmac, st->st[1], sizeof ctx->cbcmac);
|
||||
ctx->ptr = 0;
|
||||
|
||||
memcpy(ctx->ctr, st->st[2], sizeof ctx->ctr);
|
||||
|
||||
/*
|
||||
* We use ctx->head[0] as a flag to indicate that we use a
|
||||
* a recorded state, with ctx->ctr containing the preprocessed
|
||||
* first block for OMAC^2.
|
||||
*/
|
||||
ctx->head[0] = 1;
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_reset_post_aad(br_eax_context *ctx, const br_eax_state *st,
|
||||
const void *nonce, size_t len)
|
||||
{
|
||||
if (len == 0) {
|
||||
omac_start(ctx, 0);
|
||||
} else {
|
||||
memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);
|
||||
ctx->ptr = 0;
|
||||
do_cbcmac_chunk(ctx, nonce, len);
|
||||
}
|
||||
do_pad(ctx);
|
||||
memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
|
||||
memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
|
||||
|
||||
memcpy(ctx->head, st->st[1], sizeof ctx->head);
|
||||
|
||||
memcpy(ctx->cbcmac, st->st[2], sizeof ctx->cbcmac);
|
||||
ctx->ptr = 0;
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len)
|
||||
{
|
||||
size_t ptr;
|
||||
|
||||
ptr = ctx->ptr;
|
||||
|
||||
/*
|
||||
* If there is a partial block, first complete it.
|
||||
*/
|
||||
if (ptr < 16) {
|
||||
size_t clen;
|
||||
|
||||
clen = 16 - ptr;
|
||||
if (len <= clen) {
|
||||
memcpy(ctx->buf + ptr, data, len);
|
||||
ctx->ptr = ptr + len;
|
||||
return;
|
||||
}
|
||||
memcpy(ctx->buf + ptr, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
}
|
||||
|
||||
/*
|
||||
* We now have a full block in buf[], and this is not the last
|
||||
* block.
|
||||
*/
|
||||
do_cbcmac_chunk(ctx, data, len);
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_flip(br_eax_context *ctx)
|
||||
{
|
||||
int from_capture;
|
||||
|
||||
/*
|
||||
* ctx->head[0] may be non-zero if the context was reset with
|
||||
* a pre-AAD captured state. In that case, ctx->ctr[] contains
|
||||
* the state for OMAC^2 _after_ processing the first block.
|
||||
*/
|
||||
from_capture = ctx->head[0];
|
||||
|
||||
/*
|
||||
* Complete the OMAC computation on the AAD.
|
||||
*/
|
||||
do_pad(ctx);
|
||||
memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac);
|
||||
|
||||
/*
|
||||
* Start OMAC^2 for the encrypted data.
|
||||
* If the context was initialized from a captured state, then
|
||||
* the OMAC^2 value is in the ctr[] array.
|
||||
*/
|
||||
if (from_capture) {
|
||||
memcpy(ctx->cbcmac, ctx->ctr, sizeof ctx->cbcmac);
|
||||
ctx->ptr = 0;
|
||||
} else {
|
||||
omac_start(ctx, 2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initial counter value for CTR is the processed nonce.
|
||||
*/
|
||||
memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len)
|
||||
{
|
||||
unsigned char *dbuf;
|
||||
size_t ptr;
|
||||
|
||||
/*
|
||||
* Ensure that there is actual data to process.
|
||||
*/
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dbuf = data;
|
||||
ptr = ctx->ptr;
|
||||
|
||||
/*
|
||||
* We may have ptr == 0 here if we initialized from a captured
|
||||
* state. In that case, there is no partially consumed block
|
||||
* or unprocessed data.
|
||||
*/
|
||||
if (ptr != 0 && ptr != 16) {
|
||||
/*
|
||||
* We have a partially consumed block.
|
||||
*/
|
||||
size_t u, clen;
|
||||
|
||||
clen = 16 - ptr;
|
||||
if (len <= clen) {
|
||||
clen = len;
|
||||
}
|
||||
if (encrypt) {
|
||||
for (u = 0; u < clen; u ++) {
|
||||
ctx->buf[ptr + u] ^= dbuf[u];
|
||||
}
|
||||
memcpy(dbuf, ctx->buf + ptr, clen);
|
||||
} else {
|
||||
for (u = 0; u < clen; u ++) {
|
||||
unsigned dx, sx;
|
||||
|
||||
sx = ctx->buf[ptr + u];
|
||||
dx = dbuf[u];
|
||||
ctx->buf[ptr + u] = dx;
|
||||
dbuf[u] = sx ^ dx;
|
||||
}
|
||||
}
|
||||
|
||||
if (len <= clen) {
|
||||
ctx->ptr = ptr + clen;
|
||||
return;
|
||||
}
|
||||
dbuf += clen;
|
||||
len -= clen;
|
||||
}
|
||||
|
||||
/*
|
||||
* We now have a complete encrypted block in buf[] that must still
|
||||
* be processed with OMAC, and this is not the final buf.
|
||||
* Exception: when ptr == 0, no block has been produced yet.
|
||||
*/
|
||||
if (ptr != 0) {
|
||||
(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
|
||||
ctx->buf, sizeof ctx->buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do CTR encryption or decryption and CBC-MAC for all full blocks
|
||||
* except the last.
|
||||
*/
|
||||
ptr = len & (size_t)15;
|
||||
if (ptr == 0) {
|
||||
len -= 16;
|
||||
ptr = 16;
|
||||
} else {
|
||||
len -= ptr;
|
||||
}
|
||||
if (encrypt) {
|
||||
(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
|
||||
dbuf, len);
|
||||
} else {
|
||||
(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
|
||||
dbuf, len);
|
||||
}
|
||||
dbuf += len;
|
||||
|
||||
/*
|
||||
* Compute next block of CTR stream, and use it to finish
|
||||
* encrypting or decrypting the data.
|
||||
*/
|
||||
memset(ctx->buf, 0, sizeof ctx->buf);
|
||||
(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf);
|
||||
if (encrypt) {
|
||||
size_t u;
|
||||
|
||||
for (u = 0; u < ptr; u ++) {
|
||||
ctx->buf[u] ^= dbuf[u];
|
||||
}
|
||||
memcpy(dbuf, ctx->buf, ptr);
|
||||
} else {
|
||||
size_t u;
|
||||
|
||||
for (u = 0; u < ptr; u ++) {
|
||||
unsigned dx, sx;
|
||||
|
||||
sx = ctx->buf[u];
|
||||
dx = dbuf[u];
|
||||
ctx->buf[u] = dx;
|
||||
dbuf[u] = sx ^ dx;
|
||||
}
|
||||
}
|
||||
ctx->ptr = ptr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete tag computation. The final tag is written in ctx->cbcmac.
|
||||
*/
|
||||
static void
|
||||
do_final(br_eax_context *ctx)
|
||||
{
|
||||
size_t u;
|
||||
|
||||
do_pad(ctx);
|
||||
|
||||
/*
|
||||
* Authentication tag is the XOR of the three OMAC outputs for
|
||||
* the nonce, AAD and encrypted data.
|
||||
*/
|
||||
for (u = 0; u < 16; u ++) {
|
||||
ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u];
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_get_tag(br_eax_context *ctx, void *tag)
|
||||
{
|
||||
do_final(ctx);
|
||||
memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac);
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
void
|
||||
br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len)
|
||||
{
|
||||
do_final(ctx);
|
||||
memcpy(tag, ctx->cbcmac, len);
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
uint32_t
|
||||
br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len)
|
||||
{
|
||||
unsigned char tmp[16];
|
||||
size_t u;
|
||||
int x;
|
||||
|
||||
br_eax_get_tag(ctx, tmp);
|
||||
x = 0;
|
||||
for (u = 0; u < len; u ++) {
|
||||
x |= tmp[u] ^ ((const unsigned char *)tag)[u];
|
||||
}
|
||||
return EQ0(x);
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
uint32_t
|
||||
br_eax_check_tag(br_eax_context *ctx, const void *tag)
|
||||
{
|
||||
return br_eax_check_tag_trunc(ctx, tag, 16);
|
||||
}
|
||||
|
||||
/* see bearssl_aead.h */
|
||||
const br_aead_class br_eax_vtable = {
|
||||
16,
|
||||
(void (*)(const br_aead_class **, const void *, size_t))
|
||||
&br_eax_reset,
|
||||
(void (*)(const br_aead_class **, const void *, size_t))
|
||||
&br_eax_aad_inject,
|
||||
(void (*)(const br_aead_class **))
|
||||
&br_eax_flip,
|
||||
(void (*)(const br_aead_class **, int, void *, size_t))
|
||||
&br_eax_run,
|
||||
(void (*)(const br_aead_class **, void *))
|
||||
&br_eax_get_tag,
|
||||
(uint32_t (*)(const br_aead_class **, const void *))
|
||||
&br_eax_check_tag,
|
||||
(void (*)(const br_aead_class **, void *, size_t))
|
||||
&br_eax_get_tag_trunc,
|
||||
(uint32_t (*)(const br_aead_class **, const void *, size_t))
|
||||
&br_eax_check_tag_trunc
|
||||
};
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
return br_ec_p256_m15.generator(curve, len);
|
||||
case BR_EC_curve25519:
|
||||
return br_ec_c25519_m15.generator(curve, len);
|
||||
default:
|
||||
return br_ec_prime_i15.generator(curve, len);
|
||||
}
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
return br_ec_p256_m15.order(curve, len);
|
||||
case BR_EC_curve25519:
|
||||
return br_ec_c25519_m15.order(curve, len);
|
||||
default:
|
||||
return br_ec_prime_i15.order(curve, len);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
return br_ec_p256_m15.xoff(curve, len);
|
||||
case BR_EC_curve25519:
|
||||
return br_ec_c25519_m15.xoff(curve, len);
|
||||
default:
|
||||
return br_ec_prime_i15.xoff(curve, len);
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *kb, size_t kblen, int curve)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
return br_ec_p256_m15.mul(G, Glen, kb, kblen, curve);
|
||||
case BR_EC_curve25519:
|
||||
return br_ec_c25519_m15.mul(G, Glen, kb, kblen, curve);
|
||||
default:
|
||||
return br_ec_prime_i15.mul(G, Glen, kb, kblen, curve);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
return br_ec_p256_m15.mulgen(R, x, xlen, curve);
|
||||
case BR_EC_curve25519:
|
||||
return br_ec_c25519_m15.mulgen(R, x, xlen, curve);
|
||||
default:
|
||||
return br_ec_prime_i15.mulgen(R, x, xlen, curve);
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
return br_ec_p256_m15.muladd(A, B, len,
|
||||
x, xlen, y, ylen, curve);
|
||||
case BR_EC_curve25519:
|
||||
return br_ec_c25519_m15.muladd(A, B, len,
|
||||
x, xlen, y, ylen, curve);
|
||||
default:
|
||||
return br_ec_prime_i15.muladd(A, B, len,
|
||||
x, xlen, y, ylen, curve);
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_all_m15 = {
|
||||
(uint32_t)0x23800000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_p256_m64.generator(curve, len);
|
||||
#else
|
||||
return br_ec_p256_m31.generator(curve, len);
|
||||
#endif
|
||||
case BR_EC_curve25519:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_c25519_m64.generator(curve, len);
|
||||
#else
|
||||
return br_ec_c25519_m31.generator(curve, len);
|
||||
#endif
|
||||
default:
|
||||
return br_ec_prime_i31.generator(curve, len);
|
||||
}
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_p256_m64.order(curve, len);
|
||||
#else
|
||||
return br_ec_p256_m31.order(curve, len);
|
||||
#endif
|
||||
case BR_EC_curve25519:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_c25519_m64.order(curve, len);
|
||||
#else
|
||||
return br_ec_c25519_m31.order(curve, len);
|
||||
#endif
|
||||
default:
|
||||
return br_ec_prime_i31.order(curve, len);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_p256_m64.xoff(curve, len);
|
||||
#else
|
||||
return br_ec_p256_m31.xoff(curve, len);
|
||||
#endif
|
||||
case BR_EC_curve25519:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_c25519_m64.xoff(curve, len);
|
||||
#else
|
||||
return br_ec_c25519_m31.xoff(curve, len);
|
||||
#endif
|
||||
default:
|
||||
return br_ec_prime_i31.xoff(curve, len);
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *kb, size_t kblen, int curve)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_p256_m64.mul(G, Glen, kb, kblen, curve);
|
||||
#else
|
||||
return br_ec_p256_m31.mul(G, Glen, kb, kblen, curve);
|
||||
#endif
|
||||
case BR_EC_curve25519:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_c25519_m64.mul(G, Glen, kb, kblen, curve);
|
||||
#else
|
||||
return br_ec_c25519_m31.mul(G, Glen, kb, kblen, curve);
|
||||
#endif
|
||||
default:
|
||||
return br_ec_prime_i31.mul(G, Glen, kb, kblen, curve);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_p256_m64.mulgen(R, x, xlen, curve);
|
||||
#else
|
||||
return br_ec_p256_m31.mulgen(R, x, xlen, curve);
|
||||
#endif
|
||||
case BR_EC_curve25519:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_c25519_m64.mulgen(R, x, xlen, curve);
|
||||
#else
|
||||
return br_ec_c25519_m31.mulgen(R, x, xlen, curve);
|
||||
#endif
|
||||
default:
|
||||
return br_ec_prime_i31.mulgen(R, x, xlen, curve);
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_p256_m64.muladd(A, B, len,
|
||||
x, xlen, y, ylen, curve);
|
||||
#else
|
||||
return br_ec_p256_m31.muladd(A, B, len,
|
||||
x, xlen, y, ylen, curve);
|
||||
#endif
|
||||
case BR_EC_curve25519:
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
return br_ec_c25519_m64.muladd(A, B, len,
|
||||
x, xlen, y, ylen, curve);
|
||||
#else
|
||||
return br_ec_c25519_m31.muladd(A, B, len,
|
||||
x, xlen, y, ylen, curve);
|
||||
#endif
|
||||
default:
|
||||
return br_ec_prime_i31.muladd(A, B, len,
|
||||
x, xlen, y, ylen, curve);
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_all_m31 = {
|
||||
(uint32_t)0x23800000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
|
|
@ -0,0 +1,398 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* Parameters for the field:
|
||||
* - field modulus p = 2^255-19
|
||||
* - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
|
||||
*/
|
||||
|
||||
static const uint16_t C255_P[] = {
|
||||
0x0110,
|
||||
0x7FED, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
|
||||
0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
|
||||
0x7FFF
|
||||
};
|
||||
|
||||
#define P0I 0x4A1B
|
||||
|
||||
static const uint16_t C255_R2[] = {
|
||||
0x0110,
|
||||
0x0169, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000
|
||||
};
|
||||
|
||||
/* obsolete
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
static void
|
||||
print_int_mont(const char *name, const uint16_t *x)
|
||||
{
|
||||
uint16_t y[18];
|
||||
unsigned char tmp[32];
|
||||
size_t u;
|
||||
|
||||
printf("%s = ", name);
|
||||
memcpy(y, x, sizeof y);
|
||||
br_i15_from_monty(y, C255_P, P0I);
|
||||
br_i15_encode(tmp, sizeof tmp, y);
|
||||
for (u = 0; u < sizeof tmp; u ++) {
|
||||
printf("%02X", tmp[u]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
*/
|
||||
|
||||
static const uint16_t C255_A24[] = {
|
||||
0x0110,
|
||||
0x45D3, 0x0046, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000
|
||||
};
|
||||
|
||||
static const unsigned char GEN[] = {
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
static const unsigned char ORDER[] = {
|
||||
0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
|
||||
};
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return GEN;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return ORDER;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
cswap(uint16_t *a, uint16_t *b, uint32_t ctl)
|
||||
{
|
||||
int i;
|
||||
|
||||
ctl = -ctl;
|
||||
for (i = 0; i < 18; i ++) {
|
||||
uint32_t aw, bw, tw;
|
||||
|
||||
aw = a[i];
|
||||
bw = b[i];
|
||||
tw = ctl & (aw ^ bw);
|
||||
a[i] = aw ^ tw;
|
||||
b[i] = bw ^ tw;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
c255_add(uint16_t *d, const uint16_t *a, const uint16_t *b)
|
||||
{
|
||||
uint32_t ctl;
|
||||
uint16_t t[18];
|
||||
|
||||
memcpy(t, a, sizeof t);
|
||||
ctl = br_i15_add(t, b, 1);
|
||||
ctl |= NOT(br_i15_sub(t, C255_P, 0));
|
||||
br_i15_sub(t, C255_P, ctl);
|
||||
memcpy(d, t, sizeof t);
|
||||
}
|
||||
|
||||
static void
|
||||
c255_sub(uint16_t *d, const uint16_t *a, const uint16_t *b)
|
||||
{
|
||||
uint16_t t[18];
|
||||
|
||||
memcpy(t, a, sizeof t);
|
||||
br_i15_add(t, C255_P, br_i15_sub(t, b, 1));
|
||||
memcpy(d, t, sizeof t);
|
||||
}
|
||||
|
||||
static void
|
||||
c255_mul(uint16_t *d, const uint16_t *a, const uint16_t *b)
|
||||
{
|
||||
uint16_t t[18];
|
||||
|
||||
br_i15_montymul(t, a, b, C255_P, P0I);
|
||||
memcpy(d, t, sizeof t);
|
||||
}
|
||||
|
||||
static void
|
||||
byteswap(unsigned char *G)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++) {
|
||||
unsigned char t;
|
||||
|
||||
t = G[i];
|
||||
G[i] = G[31 - i];
|
||||
G[31 - i] = t;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *kb, size_t kblen, int curve)
|
||||
{
|
||||
#define ILEN (18 * sizeof(uint16_t))
|
||||
|
||||
/*
|
||||
* The a[] and b[] arrays have an extra word to allow for
|
||||
* decoding without using br_i15_decode_reduce().
|
||||
*/
|
||||
uint16_t x1[18], x2[18], x3[18], z2[18], z3[18];
|
||||
uint16_t a[19], aa[18], b[19], bb[18];
|
||||
uint16_t c[18], d[18], e[18], da[18], cb[18];
|
||||
unsigned char k[32];
|
||||
uint32_t swap;
|
||||
int i;
|
||||
|
||||
(void)curve;
|
||||
|
||||
/*
|
||||
* Points are encoded over exactly 32 bytes. Multipliers must fit
|
||||
* in 32 bytes as well.
|
||||
* RFC 7748 mandates that the high bit of the last point byte must
|
||||
* be ignored/cleared.
|
||||
*/
|
||||
if (Glen != 32 || kblen > 32) {
|
||||
return 0;
|
||||
}
|
||||
G[31] &= 0x7F;
|
||||
|
||||
/*
|
||||
* Byteswap the point encoding, because it uses little-endian, and
|
||||
* the generic decoding routine uses big-endian.
|
||||
*/
|
||||
byteswap(G);
|
||||
|
||||
/*
|
||||
* Decode the point ('u' coordinate). This should be reduced
|
||||
* modulo p, but we prefer to avoid the dependency on
|
||||
* br_i15_decode_reduce(). Instead, we use br_i15_decode_mod()
|
||||
* with a synthetic modulus of value 2^255 (this must work
|
||||
* since G was truncated to 255 bits), then use a conditional
|
||||
* subtraction. We use br_i15_decode_mod() and not
|
||||
* br_i15_decode(), because the ec_prime_i15 implementation uses
|
||||
* the former but not the latter.
|
||||
* br_i15_decode_reduce(a, G, 32, C255_P);
|
||||
*/
|
||||
br_i15_zero(b, 0x111);
|
||||
b[18] = 1;
|
||||
br_i15_decode_mod(a, G, 32, b);
|
||||
a[0] = 0x110;
|
||||
br_i15_sub(a, C255_P, NOT(br_i15_sub(a, C255_P, 0)));
|
||||
|
||||
/*
|
||||
* Initialise variables x1, x2, z2, x3 and z3. We set all of them
|
||||
* into Montgomery representation.
|
||||
*/
|
||||
br_i15_montymul(x1, a, C255_R2, C255_P, P0I);
|
||||
memcpy(x3, x1, ILEN);
|
||||
br_i15_zero(z2, C255_P[0]);
|
||||
memcpy(x2, z2, ILEN);
|
||||
x2[1] = 19;
|
||||
memcpy(z3, x2, ILEN);
|
||||
|
||||
memset(k, 0, (sizeof k) - kblen);
|
||||
memcpy(k + (sizeof k) - kblen, kb, kblen);
|
||||
k[31] &= 0xF8;
|
||||
k[0] &= 0x7F;
|
||||
k[0] |= 0x40;
|
||||
|
||||
/* obsolete
|
||||
print_int_mont("x1", x1);
|
||||
*/
|
||||
|
||||
swap = 0;
|
||||
for (i = 254; i >= 0; i --) {
|
||||
uint32_t kt;
|
||||
|
||||
kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
|
||||
swap ^= kt;
|
||||
cswap(x2, x3, swap);
|
||||
cswap(z2, z3, swap);
|
||||
swap = kt;
|
||||
|
||||
/* obsolete
|
||||
print_int_mont("x2", x2);
|
||||
print_int_mont("z2", z2);
|
||||
print_int_mont("x3", x3);
|
||||
print_int_mont("z3", z3);
|
||||
*/
|
||||
|
||||
c255_add(a, x2, z2);
|
||||
c255_mul(aa, a, a);
|
||||
c255_sub(b, x2, z2);
|
||||
c255_mul(bb, b, b);
|
||||
c255_sub(e, aa, bb);
|
||||
c255_add(c, x3, z3);
|
||||
c255_sub(d, x3, z3);
|
||||
c255_mul(da, d, a);
|
||||
c255_mul(cb, c, b);
|
||||
|
||||
/* obsolete
|
||||
print_int_mont("a ", a);
|
||||
print_int_mont("aa", aa);
|
||||
print_int_mont("b ", b);
|
||||
print_int_mont("bb", bb);
|
||||
print_int_mont("e ", e);
|
||||
print_int_mont("c ", c);
|
||||
print_int_mont("d ", d);
|
||||
print_int_mont("da", da);
|
||||
print_int_mont("cb", cb);
|
||||
*/
|
||||
|
||||
c255_add(x3, da, cb);
|
||||
c255_mul(x3, x3, x3);
|
||||
c255_sub(z3, da, cb);
|
||||
c255_mul(z3, z3, z3);
|
||||
c255_mul(z3, z3, x1);
|
||||
c255_mul(x2, aa, bb);
|
||||
c255_mul(z2, C255_A24, e);
|
||||
c255_add(z2, z2, aa);
|
||||
c255_mul(z2, e, z2);
|
||||
|
||||
/* obsolete
|
||||
print_int_mont("x2", x2);
|
||||
print_int_mont("z2", z2);
|
||||
print_int_mont("x3", x3);
|
||||
print_int_mont("z3", z3);
|
||||
*/
|
||||
}
|
||||
cswap(x2, x3, swap);
|
||||
cswap(z2, z3, swap);
|
||||
|
||||
/*
|
||||
* Inverse z2 with a modular exponentiation. This is a simple
|
||||
* square-and-multiply algorithm; we mutualise most non-squarings
|
||||
* since the exponent contains almost only ones.
|
||||
*/
|
||||
memcpy(a, z2, ILEN);
|
||||
for (i = 0; i < 15; i ++) {
|
||||
c255_mul(a, a, a);
|
||||
c255_mul(a, a, z2);
|
||||
}
|
||||
memcpy(b, a, ILEN);
|
||||
for (i = 0; i < 14; i ++) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < 16; j ++) {
|
||||
c255_mul(b, b, b);
|
||||
}
|
||||
c255_mul(b, b, a);
|
||||
}
|
||||
for (i = 14; i >= 0; i --) {
|
||||
c255_mul(b, b, b);
|
||||
if ((0xFFEB >> i) & 1) {
|
||||
c255_mul(b, z2, b);
|
||||
}
|
||||
}
|
||||
c255_mul(b, x2, b);
|
||||
|
||||
/*
|
||||
* To avoid a dependency on br_i15_from_monty(), we use a
|
||||
* Montgomery multiplication with 1.
|
||||
* memcpy(x2, b, ILEN);
|
||||
* br_i15_from_monty(x2, C255_P, P0I);
|
||||
*/
|
||||
br_i15_zero(a, C255_P[0]);
|
||||
a[1] = 1;
|
||||
br_i15_montymul(x2, a, b, C255_P, P0I);
|
||||
|
||||
br_i15_encode(G, 32, x2);
|
||||
byteswap(G);
|
||||
return 1;
|
||||
|
||||
#undef ILEN
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
const unsigned char *G;
|
||||
size_t Glen;
|
||||
|
||||
G = api_generator(curve, &Glen);
|
||||
memcpy(R, G, Glen);
|
||||
api_mul(R, Glen, x, xlen, curve);
|
||||
return Glen;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
/*
|
||||
* We don't implement this method, since it is used for ECDSA
|
||||
* only, and there is no ECDSA over Curve25519 (which instead
|
||||
* uses EdDSA).
|
||||
*/
|
||||
(void)A;
|
||||
(void)B;
|
||||
(void)len;
|
||||
(void)x;
|
||||
(void)xlen;
|
||||
(void)y;
|
||||
(void)ylen;
|
||||
(void)curve;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_c25519_i15 = {
|
||||
(uint32_t)0x20000000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
|
|
@ -0,0 +1,390 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* Parameters for the field:
|
||||
* - field modulus p = 2^255-19
|
||||
* - R^2 mod p (R = 2^(31k) for the smallest k such that R >= p)
|
||||
*/
|
||||
|
||||
static const uint32_t C255_P[] = {
|
||||
0x00000107,
|
||||
0x7FFFFFED, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
|
||||
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0000007F
|
||||
};
|
||||
|
||||
#define P0I 0x286BCA1B
|
||||
|
||||
static const uint32_t C255_R2[] = {
|
||||
0x00000107,
|
||||
0x00000000, 0x02D20000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000
|
||||
};
|
||||
|
||||
static const uint32_t C255_A24[] = {
|
||||
0x00000107,
|
||||
0x53000000, 0x0000468B, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000
|
||||
};
|
||||
|
||||
/* obsolete
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
static void
|
||||
print_int_mont(const char *name, const uint32_t *x)
|
||||
{
|
||||
uint32_t y[10];
|
||||
unsigned char tmp[32];
|
||||
size_t u;
|
||||
|
||||
printf("%s = ", name);
|
||||
memcpy(y, x, sizeof y);
|
||||
br_i31_from_monty(y, C255_P, P0I);
|
||||
br_i31_encode(tmp, sizeof tmp, y);
|
||||
for (u = 0; u < sizeof tmp; u ++) {
|
||||
printf("%02X", tmp[u]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
*/
|
||||
|
||||
static const unsigned char GEN[] = {
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
static const unsigned char ORDER[] = {
|
||||
0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
|
||||
};
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return GEN;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return ORDER;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
|
||||
{
|
||||
int i;
|
||||
|
||||
ctl = -ctl;
|
||||
for (i = 0; i < 10; i ++) {
|
||||
uint32_t aw, bw, tw;
|
||||
|
||||
aw = a[i];
|
||||
bw = b[i];
|
||||
tw = ctl & (aw ^ bw);
|
||||
a[i] = aw ^ tw;
|
||||
b[i] = bw ^ tw;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
c255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
|
||||
{
|
||||
uint32_t ctl;
|
||||
uint32_t t[10];
|
||||
|
||||
memcpy(t, a, sizeof t);
|
||||
ctl = br_i31_add(t, b, 1);
|
||||
ctl |= NOT(br_i31_sub(t, C255_P, 0));
|
||||
br_i31_sub(t, C255_P, ctl);
|
||||
memcpy(d, t, sizeof t);
|
||||
}
|
||||
|
||||
static void
|
||||
c255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
|
||||
{
|
||||
uint32_t t[10];
|
||||
|
||||
memcpy(t, a, sizeof t);
|
||||
br_i31_add(t, C255_P, br_i31_sub(t, b, 1));
|
||||
memcpy(d, t, sizeof t);
|
||||
}
|
||||
|
||||
static void
|
||||
c255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b)
|
||||
{
|
||||
uint32_t t[10];
|
||||
|
||||
br_i31_montymul(t, a, b, C255_P, P0I);
|
||||
memcpy(d, t, sizeof t);
|
||||
}
|
||||
|
||||
static void
|
||||
byteswap(unsigned char *G)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i ++) {
|
||||
unsigned char t;
|
||||
|
||||
t = G[i];
|
||||
G[i] = G[31 - i];
|
||||
G[31 - i] = t;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *kb, size_t kblen, int curve)
|
||||
{
|
||||
uint32_t x1[10], x2[10], x3[10], z2[10], z3[10];
|
||||
uint32_t a[10], aa[10], b[10], bb[10];
|
||||
uint32_t c[10], d[10], e[10], da[10], cb[10];
|
||||
unsigned char k[32];
|
||||
uint32_t swap;
|
||||
int i;
|
||||
|
||||
(void)curve;
|
||||
|
||||
/*
|
||||
* Points are encoded over exactly 32 bytes. Multipliers must fit
|
||||
* in 32 bytes as well.
|
||||
* RFC 7748 mandates that the high bit of the last point byte must
|
||||
* be ignored/cleared.
|
||||
*/
|
||||
if (Glen != 32 || kblen > 32) {
|
||||
return 0;
|
||||
}
|
||||
G[31] &= 0x7F;
|
||||
|
||||
/*
|
||||
* Byteswap the point encoding, because it uses little-endian, and
|
||||
* the generic decoding routine uses big-endian.
|
||||
*/
|
||||
byteswap(G);
|
||||
|
||||
/*
|
||||
* Decode the point ('u' coordinate). This should be reduced
|
||||
* modulo p, but we prefer to avoid the dependency on
|
||||
* br_i31_decode_reduce(). Instead, we use br_i31_decode_mod()
|
||||
* with a synthetic modulus of value 2^255 (this must work
|
||||
* since G was truncated to 255 bits), then use a conditional
|
||||
* subtraction. We use br_i31_decode_mod() and not
|
||||
* br_i31_decode(), because the ec_prime_i31 implementation uses
|
||||
* the former but not the latter.
|
||||
* br_i31_decode_reduce(a, G, 32, C255_P);
|
||||
*/
|
||||
br_i31_zero(b, 0x108);
|
||||
b[9] = 0x0080;
|
||||
br_i31_decode_mod(a, G, 32, b);
|
||||
a[0] = 0x107;
|
||||
br_i31_sub(a, C255_P, NOT(br_i31_sub(a, C255_P, 0)));
|
||||
|
||||
/*
|
||||
* Initialise variables x1, x2, z2, x3 and z3. We set all of them
|
||||
* into Montgomery representation.
|
||||
*/
|
||||
br_i31_montymul(x1, a, C255_R2, C255_P, P0I);
|
||||
memcpy(x3, x1, sizeof x1);
|
||||
br_i31_zero(z2, C255_P[0]);
|
||||
memcpy(x2, z2, sizeof z2);
|
||||
x2[1] = 0x13000000;
|
||||
memcpy(z3, x2, sizeof x2);
|
||||
|
||||
/*
|
||||
* kb[] is in big-endian notation, but possibly shorter than k[].
|
||||
*/
|
||||
memset(k, 0, (sizeof k) - kblen);
|
||||
memcpy(k + (sizeof k) - kblen, kb, kblen);
|
||||
k[31] &= 0xF8;
|
||||
k[0] &= 0x7F;
|
||||
k[0] |= 0x40;
|
||||
|
||||
/* obsolete
|
||||
print_int_mont("x1", x1);
|
||||
*/
|
||||
|
||||
swap = 0;
|
||||
for (i = 254; i >= 0; i --) {
|
||||
uint32_t kt;
|
||||
|
||||
kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
|
||||
swap ^= kt;
|
||||
cswap(x2, x3, swap);
|
||||
cswap(z2, z3, swap);
|
||||
swap = kt;
|
||||
|
||||
/* obsolete
|
||||
print_int_mont("x2", x2);
|
||||
print_int_mont("z2", z2);
|
||||
print_int_mont("x3", x3);
|
||||
print_int_mont("z3", z3);
|
||||
*/
|
||||
|
||||
c255_add(a, x2, z2);
|
||||
c255_mul(aa, a, a);
|
||||
c255_sub(b, x2, z2);
|
||||
c255_mul(bb, b, b);
|
||||
c255_sub(e, aa, bb);
|
||||
c255_add(c, x3, z3);
|
||||
c255_sub(d, x3, z3);
|
||||
c255_mul(da, d, a);
|
||||
c255_mul(cb, c, b);
|
||||
|
||||
/* obsolete
|
||||
print_int_mont("a ", a);
|
||||
print_int_mont("aa", aa);
|
||||
print_int_mont("b ", b);
|
||||
print_int_mont("bb", bb);
|
||||
print_int_mont("e ", e);
|
||||
print_int_mont("c ", c);
|
||||
print_int_mont("d ", d);
|
||||
print_int_mont("da", da);
|
||||
print_int_mont("cb", cb);
|
||||
*/
|
||||
|
||||
c255_add(x3, da, cb);
|
||||
c255_mul(x3, x3, x3);
|
||||
c255_sub(z3, da, cb);
|
||||
c255_mul(z3, z3, z3);
|
||||
c255_mul(z3, z3, x1);
|
||||
c255_mul(x2, aa, bb);
|
||||
c255_mul(z2, C255_A24, e);
|
||||
c255_add(z2, z2, aa);
|
||||
c255_mul(z2, e, z2);
|
||||
|
||||
/* obsolete
|
||||
print_int_mont("x2", x2);
|
||||
print_int_mont("z2", z2);
|
||||
print_int_mont("x3", x3);
|
||||
print_int_mont("z3", z3);
|
||||
*/
|
||||
}
|
||||
cswap(x2, x3, swap);
|
||||
cswap(z2, z3, swap);
|
||||
|
||||
/*
|
||||
* Inverse z2 with a modular exponentiation. This is a simple
|
||||
* square-and-multiply algorithm; we mutualise most non-squarings
|
||||
* since the exponent contains almost only ones.
|
||||
*/
|
||||
memcpy(a, z2, sizeof z2);
|
||||
for (i = 0; i < 15; i ++) {
|
||||
c255_mul(a, a, a);
|
||||
c255_mul(a, a, z2);
|
||||
}
|
||||
memcpy(b, a, sizeof a);
|
||||
for (i = 0; i < 14; i ++) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < 16; j ++) {
|
||||
c255_mul(b, b, b);
|
||||
}
|
||||
c255_mul(b, b, a);
|
||||
}
|
||||
for (i = 14; i >= 0; i --) {
|
||||
c255_mul(b, b, b);
|
||||
if ((0xFFEB >> i) & 1) {
|
||||
c255_mul(b, z2, b);
|
||||
}
|
||||
}
|
||||
c255_mul(b, x2, b);
|
||||
|
||||
/*
|
||||
* To avoid a dependency on br_i31_from_monty(), we use
|
||||
* a Montgomery multiplication with 1.
|
||||
* memcpy(x2, b, sizeof b);
|
||||
* br_i31_from_monty(x2, C255_P, P0I);
|
||||
*/
|
||||
br_i31_zero(a, C255_P[0]);
|
||||
a[1] = 1;
|
||||
br_i31_montymul(x2, a, b, C255_P, P0I);
|
||||
|
||||
br_i31_encode(G, 32, x2);
|
||||
byteswap(G);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
const unsigned char *G;
|
||||
size_t Glen;
|
||||
|
||||
G = api_generator(curve, &Glen);
|
||||
memcpy(R, G, Glen);
|
||||
api_mul(R, Glen, x, xlen, curve);
|
||||
return Glen;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
/*
|
||||
* We don't implement this method, since it is used for ECDSA
|
||||
* only, and there is no ECDSA over Curve25519 (which instead
|
||||
* uses EdDSA).
|
||||
*/
|
||||
(void)A;
|
||||
(void)B;
|
||||
(void)len;
|
||||
(void)x;
|
||||
(void)xlen;
|
||||
(void)y;
|
||||
(void)ylen;
|
||||
(void)curve;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_c25519_i31 = {
|
||||
(uint32_t)0x20000000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,800 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* obsolete
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
static void
|
||||
print_int(const char *name, const uint32_t *x)
|
||||
{
|
||||
size_t u;
|
||||
unsigned char tmp[40];
|
||||
|
||||
printf("%s = ", name);
|
||||
for (u = 0; u < 9; u ++) {
|
||||
if (x[u] > 0x3FFFFFFF) {
|
||||
printf("INVALID:");
|
||||
for (u = 0; u < 9; u ++) {
|
||||
printf(" %08X", x[u]);
|
||||
}
|
||||
printf("\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
memset(tmp, 0, sizeof tmp);
|
||||
for (u = 0; u < 9; u ++) {
|
||||
uint64_t w;
|
||||
int j, k;
|
||||
|
||||
w = x[u];
|
||||
j = 30 * (int)u;
|
||||
k = j & 7;
|
||||
if (k != 0) {
|
||||
w <<= k;
|
||||
j -= k;
|
||||
}
|
||||
k = j >> 3;
|
||||
for (j = 0; j < 8; j ++) {
|
||||
tmp[39 - k - j] |= (unsigned char)w;
|
||||
w >>= 8;
|
||||
}
|
||||
}
|
||||
for (u = 8; u < 40; u ++) {
|
||||
printf("%02X", tmp[u]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
|
||||
* that right-shifting a signed negative integer copies the sign bit
|
||||
* (arithmetic right-shift). This is "implementation-defined behaviour",
|
||||
* i.e. it is not undefined, but it may differ between compilers. Each
|
||||
* compiler is supposed to document its behaviour in that respect. GCC
|
||||
* explicitly defines that an arithmetic right shift is used. We expect
|
||||
* all other compilers to do the same, because underlying CPU offer an
|
||||
* arithmetic right shift opcode that could not be used otherwise.
|
||||
*/
|
||||
#if BR_NO_ARITH_SHIFT
|
||||
#define ARSH(x, n) (((uint32_t)(x) >> (n)) \
|
||||
| ((-((uint32_t)(x) >> 31)) << (32 - (n))))
|
||||
#else
|
||||
#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Convert an integer from unsigned little-endian encoding to a sequence of
|
||||
* 30-bit words in little-endian order. The final "partial" word is
|
||||
* returned.
|
||||
*/
|
||||
static uint32_t
|
||||
le8_to_le30(uint32_t *dst, const unsigned char *src, size_t len)
|
||||
{
|
||||
uint32_t acc;
|
||||
int acc_len;
|
||||
|
||||
acc = 0;
|
||||
acc_len = 0;
|
||||
while (len -- > 0) {
|
||||
uint32_t b;
|
||||
|
||||
b = *src ++;
|
||||
if (acc_len < 22) {
|
||||
acc |= b << acc_len;
|
||||
acc_len += 8;
|
||||
} else {
|
||||
*dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF;
|
||||
acc = b >> (30 - acc_len);
|
||||
acc_len -= 22;
|
||||
}
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert an integer (30-bit words, little-endian) to unsigned
|
||||
* little-endian encoding. The total encoding length is provided; all
|
||||
* the destination bytes will be filled.
|
||||
*/
|
||||
static void
|
||||
le30_to_le8(unsigned char *dst, size_t len, const uint32_t *src)
|
||||
{
|
||||
uint32_t acc;
|
||||
int acc_len;
|
||||
|
||||
acc = 0;
|
||||
acc_len = 0;
|
||||
while (len -- > 0) {
|
||||
if (acc_len < 8) {
|
||||
uint32_t w;
|
||||
|
||||
w = *src ++;
|
||||
*dst ++ = (unsigned char)(acc | (w << acc_len));
|
||||
acc = w >> (8 - acc_len);
|
||||
acc_len += 22;
|
||||
} else {
|
||||
*dst ++ = (unsigned char)acc;
|
||||
acc >>= 8;
|
||||
acc_len -= 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Multiply two integers. Source integers are represented as arrays of
|
||||
* nine 30-bit words, for values up to 2^270-1. Result is encoded over
|
||||
* 18 words of 30 bits each.
|
||||
*/
|
||||
static void
|
||||
mul9(uint32_t *d, const uint32_t *a, const uint32_t *b)
|
||||
{
|
||||
/*
|
||||
* Maximum intermediate result is no more than
|
||||
* 10376293531797946367, which fits in 64 bits. Reason:
|
||||
*
|
||||
* 10376293531797946367 = 9 * (2^30-1)^2 + 9663676406
|
||||
* 10376293531797946367 < 9663676407 * 2^30
|
||||
*
|
||||
* Thus, adding together 9 products of 30-bit integers, with
|
||||
* a carry of at most 9663676406, yields an integer that fits
|
||||
* on 64 bits and generates a carry of at most 9663676406.
|
||||
*/
|
||||
uint64_t t[17];
|
||||
uint64_t cc;
|
||||
int i;
|
||||
|
||||
t[ 0] = MUL31(a[0], b[0]);
|
||||
t[ 1] = MUL31(a[0], b[1])
|
||||
+ MUL31(a[1], b[0]);
|
||||
t[ 2] = MUL31(a[0], b[2])
|
||||
+ MUL31(a[1], b[1])
|
||||
+ MUL31(a[2], b[0]);
|
||||
t[ 3] = MUL31(a[0], b[3])
|
||||
+ MUL31(a[1], b[2])
|
||||
+ MUL31(a[2], b[1])
|
||||
+ MUL31(a[3], b[0]);
|
||||
t[ 4] = MUL31(a[0], b[4])
|
||||
+ MUL31(a[1], b[3])
|
||||
+ MUL31(a[2], b[2])
|
||||
+ MUL31(a[3], b[1])
|
||||
+ MUL31(a[4], b[0]);
|
||||
t[ 5] = MUL31(a[0], b[5])
|
||||
+ MUL31(a[1], b[4])
|
||||
+ MUL31(a[2], b[3])
|
||||
+ MUL31(a[3], b[2])
|
||||
+ MUL31(a[4], b[1])
|
||||
+ MUL31(a[5], b[0]);
|
||||
t[ 6] = MUL31(a[0], b[6])
|
||||
+ MUL31(a[1], b[5])
|
||||
+ MUL31(a[2], b[4])
|
||||
+ MUL31(a[3], b[3])
|
||||
+ MUL31(a[4], b[2])
|
||||
+ MUL31(a[5], b[1])
|
||||
+ MUL31(a[6], b[0]);
|
||||
t[ 7] = MUL31(a[0], b[7])
|
||||
+ MUL31(a[1], b[6])
|
||||
+ MUL31(a[2], b[5])
|
||||
+ MUL31(a[3], b[4])
|
||||
+ MUL31(a[4], b[3])
|
||||
+ MUL31(a[5], b[2])
|
||||
+ MUL31(a[6], b[1])
|
||||
+ MUL31(a[7], b[0]);
|
||||
t[ 8] = MUL31(a[0], b[8])
|
||||
+ MUL31(a[1], b[7])
|
||||
+ MUL31(a[2], b[6])
|
||||
+ MUL31(a[3], b[5])
|
||||
+ MUL31(a[4], b[4])
|
||||
+ MUL31(a[5], b[3])
|
||||
+ MUL31(a[6], b[2])
|
||||
+ MUL31(a[7], b[1])
|
||||
+ MUL31(a[8], b[0]);
|
||||
t[ 9] = MUL31(a[1], b[8])
|
||||
+ MUL31(a[2], b[7])
|
||||
+ MUL31(a[3], b[6])
|
||||
+ MUL31(a[4], b[5])
|
||||
+ MUL31(a[5], b[4])
|
||||
+ MUL31(a[6], b[3])
|
||||
+ MUL31(a[7], b[2])
|
||||
+ MUL31(a[8], b[1]);
|
||||
t[10] = MUL31(a[2], b[8])
|
||||
+ MUL31(a[3], b[7])
|
||||
+ MUL31(a[4], b[6])
|
||||
+ MUL31(a[5], b[5])
|
||||
+ MUL31(a[6], b[4])
|
||||
+ MUL31(a[7], b[3])
|
||||
+ MUL31(a[8], b[2]);
|
||||
t[11] = MUL31(a[3], b[8])
|
||||
+ MUL31(a[4], b[7])
|
||||
+ MUL31(a[5], b[6])
|
||||
+ MUL31(a[6], b[5])
|
||||
+ MUL31(a[7], b[4])
|
||||
+ MUL31(a[8], b[3]);
|
||||
t[12] = MUL31(a[4], b[8])
|
||||
+ MUL31(a[5], b[7])
|
||||
+ MUL31(a[6], b[6])
|
||||
+ MUL31(a[7], b[5])
|
||||
+ MUL31(a[8], b[4]);
|
||||
t[13] = MUL31(a[5], b[8])
|
||||
+ MUL31(a[6], b[7])
|
||||
+ MUL31(a[7], b[6])
|
||||
+ MUL31(a[8], b[5]);
|
||||
t[14] = MUL31(a[6], b[8])
|
||||
+ MUL31(a[7], b[7])
|
||||
+ MUL31(a[8], b[6]);
|
||||
t[15] = MUL31(a[7], b[8])
|
||||
+ MUL31(a[8], b[7]);
|
||||
t[16] = MUL31(a[8], b[8]);
|
||||
|
||||
/*
|
||||
* Propagate carries.
|
||||
*/
|
||||
cc = 0;
|
||||
for (i = 0; i < 17; i ++) {
|
||||
uint64_t w;
|
||||
|
||||
w = t[i] + cc;
|
||||
d[i] = (uint32_t)w & 0x3FFFFFFF;
|
||||
cc = w >> 30;
|
||||
}
|
||||
d[17] = (uint32_t)cc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Square a 270-bit integer, represented as an array of nine 30-bit words.
|
||||
* Result uses 18 words of 30 bits each.
|
||||
*/
|
||||
static void
|
||||
square9(uint32_t *d, const uint32_t *a)
|
||||
{
|
||||
uint64_t t[17];
|
||||
uint64_t cc;
|
||||
int i;
|
||||
|
||||
t[ 0] = MUL31(a[0], a[0]);
|
||||
t[ 1] = ((MUL31(a[0], a[1])) << 1);
|
||||
t[ 2] = MUL31(a[1], a[1])
|
||||
+ ((MUL31(a[0], a[2])) << 1);
|
||||
t[ 3] = ((MUL31(a[0], a[3])
|
||||
+ MUL31(a[1], a[2])) << 1);
|
||||
t[ 4] = MUL31(a[2], a[2])
|
||||
+ ((MUL31(a[0], a[4])
|
||||
+ MUL31(a[1], a[3])) << 1);
|
||||
t[ 5] = ((MUL31(a[0], a[5])
|
||||
+ MUL31(a[1], a[4])
|
||||
+ MUL31(a[2], a[3])) << 1);
|
||||
t[ 6] = MUL31(a[3], a[3])
|
||||
+ ((MUL31(a[0], a[6])
|
||||
+ MUL31(a[1], a[5])
|
||||
+ MUL31(a[2], a[4])) << 1);
|
||||
t[ 7] = ((MUL31(a[0], a[7])
|
||||
+ MUL31(a[1], a[6])
|
||||
+ MUL31(a[2], a[5])
|
||||
+ MUL31(a[3], a[4])) << 1);
|
||||
t[ 8] = MUL31(a[4], a[4])
|
||||
+ ((MUL31(a[0], a[8])
|
||||
+ MUL31(a[1], a[7])
|
||||
+ MUL31(a[2], a[6])
|
||||
+ MUL31(a[3], a[5])) << 1);
|
||||
t[ 9] = ((MUL31(a[1], a[8])
|
||||
+ MUL31(a[2], a[7])
|
||||
+ MUL31(a[3], a[6])
|
||||
+ MUL31(a[4], a[5])) << 1);
|
||||
t[10] = MUL31(a[5], a[5])
|
||||
+ ((MUL31(a[2], a[8])
|
||||
+ MUL31(a[3], a[7])
|
||||
+ MUL31(a[4], a[6])) << 1);
|
||||
t[11] = ((MUL31(a[3], a[8])
|
||||
+ MUL31(a[4], a[7])
|
||||
+ MUL31(a[5], a[6])) << 1);
|
||||
t[12] = MUL31(a[6], a[6])
|
||||
+ ((MUL31(a[4], a[8])
|
||||
+ MUL31(a[5], a[7])) << 1);
|
||||
t[13] = ((MUL31(a[5], a[8])
|
||||
+ MUL31(a[6], a[7])) << 1);
|
||||
t[14] = MUL31(a[7], a[7])
|
||||
+ ((MUL31(a[6], a[8])) << 1);
|
||||
t[15] = ((MUL31(a[7], a[8])) << 1);
|
||||
t[16] = MUL31(a[8], a[8]);
|
||||
|
||||
/*
|
||||
* Propagate carries.
|
||||
*/
|
||||
cc = 0;
|
||||
for (i = 0; i < 17; i ++) {
|
||||
uint64_t w;
|
||||
|
||||
w = t[i] + cc;
|
||||
d[i] = (uint32_t)w & 0x3FFFFFFF;
|
||||
cc = w >> 30;
|
||||
}
|
||||
d[17] = (uint32_t)cc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a "final reduction" in field F255 (field for Curve25519)
|
||||
* The source value must be less than twice the modulus. If the value
|
||||
* is not lower than the modulus, then the modulus is subtracted and
|
||||
* this function returns 1; otherwise, it leaves it untouched and it
|
||||
* returns 0.
|
||||
*/
|
||||
static uint32_t
|
||||
reduce_final_f255(uint32_t *d)
|
||||
{
|
||||
uint32_t t[9];
|
||||
uint32_t cc;
|
||||
int i;
|
||||
|
||||
memcpy(t, d, sizeof t);
|
||||
cc = 19;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = t[i] + cc;
|
||||
cc = w >> 30;
|
||||
t[i] = w & 0x3FFFFFFF;
|
||||
}
|
||||
cc = t[8] >> 15;
|
||||
t[8] &= 0x7FFF;
|
||||
CCOPY(cc, d, t, sizeof t);
|
||||
return cc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a multiplication of two integers modulo 2^255-19.
|
||||
* Operands are arrays of 9 words, each containing 30 bits of data, in
|
||||
* little-endian order. Input value may be up to 2^256-1; on output, value
|
||||
* fits on 256 bits and is lower than twice the modulus.
|
||||
*/
|
||||
static void
|
||||
f255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b)
|
||||
{
|
||||
uint32_t t[18], cc;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Compute raw multiplication. All result words fit in 30 bits
|
||||
* each; upper word (t[17]) must fit on 2 bits, since the product
|
||||
* of two 256-bit integers must fit on 512 bits.
|
||||
*/
|
||||
mul9(t, a, b);
|
||||
|
||||
/*
|
||||
* Modular reduction: each high word is added where necessary.
|
||||
* Since the modulus is 2^255-19 and word 9 corresponds to
|
||||
* offset 9*30 = 270, word 9+k must be added to word k with
|
||||
* a factor of 19*2^15 = 622592. The extra bits in word 8 are also
|
||||
* added that way.
|
||||
*
|
||||
* Keeping the carry on 32 bits helps with 32-bit architectures,
|
||||
* and does not noticeably impact performance on 64-bit systems.
|
||||
*/
|
||||
cc = MUL15(t[8] >> 15, 19); /* at most 19*(2^15-1) = 622573 */
|
||||
t[8] &= 0x7FFF;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
uint64_t w;
|
||||
|
||||
w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592);
|
||||
t[i] = (uint32_t)w & 0x3FFFFFFF;
|
||||
cc = (uint32_t)(w >> 30); /* at most 622592 */
|
||||
}
|
||||
|
||||
/*
|
||||
* Original product was up to (2^256-1)^2, i.e. a 512-bit integer.
|
||||
* This was split into two parts (upper of 257 bits, lower of 255
|
||||
* bits), and the upper was added to the lower with a factor 19,
|
||||
* which means that the intermediate value is less than 77*2^255
|
||||
* (19*2^257 + 2^255). Therefore, the extra bits "t[8] >> 15" are
|
||||
* less than 77, and the initial carry cc is at most 76*19 = 1444.
|
||||
*/
|
||||
cc = MUL15(t[8] >> 15, 19);
|
||||
t[8] &= 0x7FFF;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
uint32_t z;
|
||||
|
||||
z = t[i] + cc;
|
||||
d[i] = z & 0x3FFFFFFF;
|
||||
cc = z >> 30;
|
||||
}
|
||||
|
||||
/*
|
||||
* Final result is at most 2^255 + 1443. In particular, the last
|
||||
* carry is necessarily 0, since t[8] was truncated to 15 bits.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a squaring of an integer modulo 2^255-19.
|
||||
* Operands are arrays of 9 words, each containing 30 bits of data, in
|
||||
* little-endian order. Input value may be up to 2^256-1; on output, value
|
||||
* fits on 256 bits and is lower than twice the modulus.
|
||||
*/
|
||||
static void
|
||||
f255_square(uint32_t *d, const uint32_t *a)
|
||||
{
|
||||
uint32_t t[18], cc;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Compute raw squaring. All result words fit in 30 bits
|
||||
* each; upper word (t[17]) must fit on 2 bits, since the square
|
||||
* of a 256-bit integers must fit on 512 bits.
|
||||
*/
|
||||
square9(t, a);
|
||||
|
||||
/*
|
||||
* Modular reduction: each high word is added where necessary.
|
||||
* See f255_mul() for details on the reduction and carry limits.
|
||||
*/
|
||||
cc = MUL15(t[8] >> 15, 19);
|
||||
t[8] &= 0x7FFF;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
uint64_t w;
|
||||
|
||||
w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592);
|
||||
t[i] = (uint32_t)w & 0x3FFFFFFF;
|
||||
cc = (uint32_t)(w >> 30);
|
||||
}
|
||||
cc = MUL15(t[8] >> 15, 19);
|
||||
t[8] &= 0x7FFF;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
uint32_t z;
|
||||
|
||||
z = t[i] + cc;
|
||||
d[i] = z & 0x3FFFFFFF;
|
||||
cc = z >> 30;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Add two values in F255. Partial reduction is performed (down to less
|
||||
* than twice the modulus).
|
||||
*/
|
||||
static void
|
||||
f255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
|
||||
{
|
||||
/*
|
||||
* Since operand words fit on 30 bits, we can use 32-bit
|
||||
* variables throughout.
|
||||
*/
|
||||
int i;
|
||||
uint32_t cc, w;
|
||||
|
||||
cc = 0;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
w = a[i] + b[i] + cc;
|
||||
d[i] = w & 0x3FFFFFFF;
|
||||
cc = w >> 30;
|
||||
}
|
||||
cc = MUL15(w >> 15, 19);
|
||||
d[8] &= 0x7FFF;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
w = d[i] + cc;
|
||||
d[i] = w & 0x3FFFFFFF;
|
||||
cc = w >> 30;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Subtract one value from another in F255. Partial reduction is
|
||||
* performed (down to less than twice the modulus).
|
||||
*/
|
||||
static void
|
||||
f255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
|
||||
{
|
||||
/*
|
||||
* We actually compute a - b + 2*p, so that the final value is
|
||||
* necessarily positive.
|
||||
*/
|
||||
int i;
|
||||
uint32_t cc, w;
|
||||
|
||||
cc = (uint32_t)-38;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
w = a[i] - b[i] + cc;
|
||||
d[i] = w & 0x3FFFFFFF;
|
||||
cc = ARSH(w, 30);
|
||||
}
|
||||
cc = MUL15((w + 0x10000) >> 15, 19);
|
||||
d[8] &= 0x7FFF;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
w = d[i] + cc;
|
||||
d[i] = w & 0x3FFFFFFF;
|
||||
cc = w >> 30;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Multiply an integer by the 'A24' constant (121665). Partial reduction
|
||||
* is performed (down to less than twice the modulus).
|
||||
*/
|
||||
static void
|
||||
f255_mul_a24(uint32_t *d, const uint32_t *a)
|
||||
{
|
||||
int i;
|
||||
uint64_t w;
|
||||
uint32_t cc;
|
||||
|
||||
/*
|
||||
* a[] is over 256 bits, thus a[8] has length at most 16 bits.
|
||||
* We single out the processing of the last word: intermediate
|
||||
* value w is up to 121665*2^16, yielding a carry for the next
|
||||
* loop of at most 19*(121665*2^16/2^15) = 4623289.
|
||||
*/
|
||||
cc = 0;
|
||||
for (i = 0; i < 8; i ++) {
|
||||
w = MUL31(a[i], 121665) + (uint64_t)cc;
|
||||
d[i] = (uint32_t)w & 0x3FFFFFFF;
|
||||
cc = (uint32_t)(w >> 30);
|
||||
}
|
||||
w = MUL31(a[8], 121665) + (uint64_t)cc;
|
||||
d[8] = (uint32_t)w & 0x7FFF;
|
||||
cc = MUL15((uint32_t)(w >> 15), 19);
|
||||
|
||||
for (i = 0; i < 9; i ++) {
|
||||
uint32_t z;
|
||||
|
||||
z = d[i] + cc;
|
||||
d[i] = z & 0x3FFFFFFF;
|
||||
cc = z >> 30;
|
||||
}
|
||||
}
|
||||
|
||||
static const unsigned char GEN[] = {
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
static const unsigned char ORDER[] = {
|
||||
0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
|
||||
};
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return GEN;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return ORDER;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
|
||||
{
|
||||
int i;
|
||||
|
||||
ctl = -ctl;
|
||||
for (i = 0; i < 9; i ++) {
|
||||
uint32_t aw, bw, tw;
|
||||
|
||||
aw = a[i];
|
||||
bw = b[i];
|
||||
tw = ctl & (aw ^ bw);
|
||||
a[i] = aw ^ tw;
|
||||
b[i] = bw ^ tw;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *kb, size_t kblen, int curve)
|
||||
{
|
||||
uint32_t x1[9], x2[9], x3[9], z2[9], z3[9];
|
||||
uint32_t a[9], aa[9], b[9], bb[9];
|
||||
uint32_t c[9], d[9], e[9], da[9], cb[9];
|
||||
unsigned char k[32];
|
||||
uint32_t swap;
|
||||
int i;
|
||||
|
||||
(void)curve;
|
||||
|
||||
/*
|
||||
* Points are encoded over exactly 32 bytes. Multipliers must fit
|
||||
* in 32 bytes as well.
|
||||
* RFC 7748 mandates that the high bit of the last point byte must
|
||||
* be ignored/cleared.
|
||||
*/
|
||||
if (Glen != 32 || kblen > 32) {
|
||||
return 0;
|
||||
}
|
||||
G[31] &= 0x7F;
|
||||
|
||||
/*
|
||||
* Initialise variables x1, x2, z2, x3 and z3. We set all of them
|
||||
* into Montgomery representation.
|
||||
*/
|
||||
x1[8] = le8_to_le30(x1, G, 32);
|
||||
memcpy(x3, x1, sizeof x1);
|
||||
memset(z2, 0, sizeof z2);
|
||||
memset(x2, 0, sizeof x2);
|
||||
x2[0] = 1;
|
||||
memset(z3, 0, sizeof z3);
|
||||
z3[0] = 1;
|
||||
|
||||
memset(k, 0, (sizeof k) - kblen);
|
||||
memcpy(k + (sizeof k) - kblen, kb, kblen);
|
||||
k[31] &= 0xF8;
|
||||
k[0] &= 0x7F;
|
||||
k[0] |= 0x40;
|
||||
|
||||
/* obsolete
|
||||
print_int("x1", x1);
|
||||
*/
|
||||
|
||||
swap = 0;
|
||||
for (i = 254; i >= 0; i --) {
|
||||
uint32_t kt;
|
||||
|
||||
kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
|
||||
swap ^= kt;
|
||||
cswap(x2, x3, swap);
|
||||
cswap(z2, z3, swap);
|
||||
swap = kt;
|
||||
|
||||
/* obsolete
|
||||
print_int("x2", x2);
|
||||
print_int("z2", z2);
|
||||
print_int("x3", x3);
|
||||
print_int("z3", z3);
|
||||
*/
|
||||
|
||||
f255_add(a, x2, z2);
|
||||
f255_square(aa, a);
|
||||
f255_sub(b, x2, z2);
|
||||
f255_square(bb, b);
|
||||
f255_sub(e, aa, bb);
|
||||
f255_add(c, x3, z3);
|
||||
f255_sub(d, x3, z3);
|
||||
f255_mul(da, d, a);
|
||||
f255_mul(cb, c, b);
|
||||
|
||||
/* obsolete
|
||||
print_int("a ", a);
|
||||
print_int("aa", aa);
|
||||
print_int("b ", b);
|
||||
print_int("bb", bb);
|
||||
print_int("e ", e);
|
||||
print_int("c ", c);
|
||||
print_int("d ", d);
|
||||
print_int("da", da);
|
||||
print_int("cb", cb);
|
||||
*/
|
||||
|
||||
f255_add(x3, da, cb);
|
||||
f255_square(x3, x3);
|
||||
f255_sub(z3, da, cb);
|
||||
f255_square(z3, z3);
|
||||
f255_mul(z3, z3, x1);
|
||||
f255_mul(x2, aa, bb);
|
||||
f255_mul_a24(z2, e);
|
||||
f255_add(z2, z2, aa);
|
||||
f255_mul(z2, e, z2);
|
||||
|
||||
/* obsolete
|
||||
print_int("x2", x2);
|
||||
print_int("z2", z2);
|
||||
print_int("x3", x3);
|
||||
print_int("z3", z3);
|
||||
*/
|
||||
}
|
||||
cswap(x2, x3, swap);
|
||||
cswap(z2, z3, swap);
|
||||
|
||||
/*
|
||||
* Inverse z2 with a modular exponentiation. This is a simple
|
||||
* square-and-multiply algorithm; we mutualise most non-squarings
|
||||
* since the exponent contains almost only ones.
|
||||
*/
|
||||
memcpy(a, z2, sizeof z2);
|
||||
for (i = 0; i < 15; i ++) {
|
||||
f255_square(a, a);
|
||||
f255_mul(a, a, z2);
|
||||
}
|
||||
memcpy(b, a, sizeof a);
|
||||
for (i = 0; i < 14; i ++) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < 16; j ++) {
|
||||
f255_square(b, b);
|
||||
}
|
||||
f255_mul(b, b, a);
|
||||
}
|
||||
for (i = 14; i >= 0; i --) {
|
||||
f255_square(b, b);
|
||||
if ((0xFFEB >> i) & 1) {
|
||||
f255_mul(b, z2, b);
|
||||
}
|
||||
}
|
||||
f255_mul(x2, x2, b);
|
||||
reduce_final_f255(x2);
|
||||
le30_to_le8(G, 32, x2);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
const unsigned char *G;
|
||||
size_t Glen;
|
||||
|
||||
G = api_generator(curve, &Glen);
|
||||
memcpy(R, G, Glen);
|
||||
api_mul(R, Glen, x, xlen, curve);
|
||||
return Glen;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
/*
|
||||
* We don't implement this method, since it is used for ECDSA
|
||||
* only, and there is no ECDSA over Curve25519 (which instead
|
||||
* uses EdDSA).
|
||||
*/
|
||||
(void)A;
|
||||
(void)B;
|
||||
(void)len;
|
||||
(void)x;
|
||||
(void)xlen;
|
||||
(void)y;
|
||||
(void)ylen;
|
||||
(void)curve;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_c25519_m31 = {
|
||||
(uint32_t)0x20000000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
|
|
@ -0,0 +1,605 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
|
||||
#if BR_UMUL128
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
static const unsigned char GEN[] = {
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
static const unsigned char ORDER[] = {
|
||||
0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
|
||||
};
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return GEN;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return ORDER;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* A field element is encoded as five 64-bit integers, in basis 2^51.
|
||||
* Limbs may be occasionally larger than 2^51, to save on carry
|
||||
* propagation costs.
|
||||
*/
|
||||
|
||||
#define MASK51 (((uint64_t)1 << 51) - (uint64_t)1)
|
||||
|
||||
/*
|
||||
* Swap two field elements, conditionally on a flag.
|
||||
*/
|
||||
static inline void
|
||||
f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl)
|
||||
{
|
||||
uint64_t m, w;
|
||||
|
||||
m = -(uint64_t)ctl;
|
||||
w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w;
|
||||
w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w;
|
||||
w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w;
|
||||
w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w;
|
||||
w = m & (a[4] ^ b[4]); a[4] ^= w; b[4] ^= w;
|
||||
}
|
||||
|
||||
/*
|
||||
* Addition with no carry propagation. Limbs double in size.
|
||||
*/
|
||||
static inline void
|
||||
f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
|
||||
{
|
||||
d[0] = a[0] + b[0];
|
||||
d[1] = a[1] + b[1];
|
||||
d[2] = a[2] + b[2];
|
||||
d[3] = a[3] + b[3];
|
||||
d[4] = a[4] + b[4];
|
||||
}
|
||||
|
||||
/*
|
||||
* Subtraction.
|
||||
* On input, limbs must fit on 60 bits each. On output, result is
|
||||
* partially reduced, with max value 2^255+19456; moreover, all
|
||||
* limbs will fit on 51 bits, except the low limb, which may have
|
||||
* value up to 2^51+19455.
|
||||
*/
|
||||
static inline void
|
||||
f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
|
||||
{
|
||||
uint64_t cc, w;
|
||||
|
||||
/*
|
||||
* We compute d = (2^255-19)*1024 + a - b. Since the limbs
|
||||
* fit on 60 bits, the maximum value of operands are slightly
|
||||
* more than 2^264, but much less than 2^265-19456. This
|
||||
* ensures that the result is positive.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Initial carry is 19456, since we add 2^265-19456. Each
|
||||
* individual subtraction may yield a carry up to 513.
|
||||
*/
|
||||
w = a[0] - b[0] - 19456;
|
||||
d[0] = w & MASK51;
|
||||
cc = -(w >> 51) & 0x3FF;
|
||||
w = a[1] - b[1] - cc;
|
||||
d[1] = w & MASK51;
|
||||
cc = -(w >> 51) & 0x3FF;
|
||||
w = a[2] - b[2] - cc;
|
||||
d[2] = w & MASK51;
|
||||
cc = -(w >> 51) & 0x3FF;
|
||||
w = a[3] - b[3] - cc;
|
||||
d[3] = w & MASK51;
|
||||
cc = -(w >> 51) & 0x3FF;
|
||||
d[4] = ((uint64_t)1 << 61) + a[4] - b[4] - cc;
|
||||
|
||||
/*
|
||||
* Partial reduction. The intermediate result may be up to
|
||||
* slightly above 2^265, but less than 2^265+2^255. When we
|
||||
* truncate to 255 bits, the upper bits will be at most 1024.
|
||||
*/
|
||||
d[0] += 19 * (d[4] >> 51);
|
||||
d[4] &= MASK51;
|
||||
}
|
||||
|
||||
/*
|
||||
* UMUL51(hi, lo, x, y) computes:
|
||||
*
|
||||
* hi = floor((x * y) / (2^51))
|
||||
* lo = x * y mod 2^51
|
||||
*
|
||||
* Note that lo < 2^51, but "hi" may be larger, if the input operands are
|
||||
* larger.
|
||||
*/
|
||||
#if BR_INT128
|
||||
|
||||
#define UMUL51(hi, lo, x, y) do { \
|
||||
unsigned __int128 umul_tmp; \
|
||||
umul_tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \
|
||||
(hi) = (uint64_t)(umul_tmp >> 51); \
|
||||
(lo) = (uint64_t)umul_tmp & MASK51; \
|
||||
} while (0)
|
||||
|
||||
#elif BR_UMUL128
|
||||
|
||||
#define UMUL51(hi, lo, x, y) do { \
|
||||
uint64_t umul_hi, umul_lo; \
|
||||
umul_lo = _umul128((x), (y), &umul_hi); \
|
||||
(hi) = (umul_hi << 13) | (umul_lo >> 51); \
|
||||
(lo) = umul_lo & MASK51; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Multiplication.
|
||||
* On input, limbs must fit on 54 bits each.
|
||||
* On output, limb 0 is at most 2^51 + 155647, and other limbs fit
|
||||
* on 51 bits each.
|
||||
*/
|
||||
static inline void
|
||||
f255_mul(uint64_t *d, uint64_t *a, uint64_t *b)
|
||||
{
|
||||
uint64_t t[10], hi, lo, w, cc;
|
||||
|
||||
/*
|
||||
* Perform cross products, accumulating values without carry
|
||||
* propagation.
|
||||
*
|
||||
* Since input limbs fit on 54 bits each, each individual
|
||||
* UMUL51 will produce a "hi" of less than 2^57. The maximum
|
||||
* sum will be at most 5*(2^57-1) + 4*(2^51-1) (for t[5]),
|
||||
* i.e. less than 324*2^51.
|
||||
*/
|
||||
|
||||
UMUL51(t[1], t[0], a[0], b[0]);
|
||||
|
||||
UMUL51(t[2], lo, a[1], b[0]); t[1] += lo;
|
||||
UMUL51(hi, lo, a[0], b[1]); t[1] += lo; t[2] += hi;
|
||||
|
||||
UMUL51(t[3], lo, a[2], b[0]); t[2] += lo;
|
||||
UMUL51(hi, lo, a[1], b[1]); t[2] += lo; t[3] += hi;
|
||||
UMUL51(hi, lo, a[0], b[2]); t[2] += lo; t[3] += hi;
|
||||
|
||||
UMUL51(t[4], lo, a[3], b[0]); t[3] += lo;
|
||||
UMUL51(hi, lo, a[2], b[1]); t[3] += lo; t[4] += hi;
|
||||
UMUL51(hi, lo, a[1], b[2]); t[3] += lo; t[4] += hi;
|
||||
UMUL51(hi, lo, a[0], b[3]); t[3] += lo; t[4] += hi;
|
||||
|
||||
UMUL51(t[5], lo, a[4], b[0]); t[4] += lo;
|
||||
UMUL51(hi, lo, a[3], b[1]); t[4] += lo; t[5] += hi;
|
||||
UMUL51(hi, lo, a[2], b[2]); t[4] += lo; t[5] += hi;
|
||||
UMUL51(hi, lo, a[1], b[3]); t[4] += lo; t[5] += hi;
|
||||
UMUL51(hi, lo, a[0], b[4]); t[4] += lo; t[5] += hi;
|
||||
|
||||
UMUL51(t[6], lo, a[4], b[1]); t[5] += lo;
|
||||
UMUL51(hi, lo, a[3], b[2]); t[5] += lo; t[6] += hi;
|
||||
UMUL51(hi, lo, a[2], b[3]); t[5] += lo; t[6] += hi;
|
||||
UMUL51(hi, lo, a[1], b[4]); t[5] += lo; t[6] += hi;
|
||||
|
||||
UMUL51(t[7], lo, a[4], b[2]); t[6] += lo;
|
||||
UMUL51(hi, lo, a[3], b[3]); t[6] += lo; t[7] += hi;
|
||||
UMUL51(hi, lo, a[2], b[4]); t[6] += lo; t[7] += hi;
|
||||
|
||||
UMUL51(t[8], lo, a[4], b[3]); t[7] += lo;
|
||||
UMUL51(hi, lo, a[3], b[4]); t[7] += lo; t[8] += hi;
|
||||
|
||||
UMUL51(t[9], lo, a[4], b[4]); t[8] += lo;
|
||||
|
||||
/*
|
||||
* The upper words t[5]..t[9] are folded back into the lower
|
||||
* words, using the rule that 2^255 = 19 in the field.
|
||||
*
|
||||
* Since each t[i] is less than 324*2^51, the additions below
|
||||
* will yield less than 6480*2^51 in each limb; this fits in
|
||||
* 64 bits (6480*2^51 < 8192*2^51 = 2^64), hence there is
|
||||
* no overflow.
|
||||
*/
|
||||
t[0] += 19 * t[5];
|
||||
t[1] += 19 * t[6];
|
||||
t[2] += 19 * t[7];
|
||||
t[3] += 19 * t[8];
|
||||
t[4] += 19 * t[9];
|
||||
|
||||
/*
|
||||
* Propagate carries.
|
||||
*/
|
||||
w = t[0];
|
||||
d[0] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = t[1] + cc;
|
||||
d[1] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = t[2] + cc;
|
||||
d[2] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = t[3] + cc;
|
||||
d[3] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = t[4] + cc;
|
||||
d[4] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
|
||||
/*
|
||||
* Since the limbs were 64-bit values, the top carry is at
|
||||
* most 8192 (in practice, that cannot be reached). We simply
|
||||
* performed a partial reduction.
|
||||
*/
|
||||
d[0] += 19 * cc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Multiplication by A24 = 121665.
|
||||
* Input must have limbs of 60 bits at most.
|
||||
*/
|
||||
static inline void
|
||||
f255_mul_a24(uint64_t *d, const uint64_t *a)
|
||||
{
|
||||
uint64_t t[5], cc, w;
|
||||
|
||||
/*
|
||||
* 121665 = 15 * 8111. We first multiply by 15, with carry
|
||||
* propagation and partial reduction.
|
||||
*/
|
||||
w = a[0] * 15;
|
||||
t[0] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = a[1] * 15 + cc;
|
||||
t[1] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = a[2] * 15 + cc;
|
||||
t[2] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = a[3] * 15 + cc;
|
||||
t[3] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = a[4] * 15 + cc;
|
||||
t[4] = w & MASK51;
|
||||
t[0] += 19 * (w >> 51);
|
||||
|
||||
/*
|
||||
* Then multiplication by 8111. At that point, we known that
|
||||
* t[0] is less than 2^51 + 19*8192, and other limbs are less
|
||||
* than 2^51; thus, there will be no overflow.
|
||||
*/
|
||||
w = t[0] * 8111;
|
||||
d[0] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = t[1] * 8111 + cc;
|
||||
d[1] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = t[2] * 8111 + cc;
|
||||
d[2] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = t[3] * 8111 + cc;
|
||||
d[3] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = t[4] * 8111 + cc;
|
||||
d[4] = w & MASK51;
|
||||
d[0] += 19 * (w >> 51);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finalize reduction.
|
||||
* On input, limbs must fit on 51 bits, except possibly the low limb,
|
||||
* which may be slightly above 2^51.
|
||||
*/
|
||||
static inline void
|
||||
f255_final_reduce(uint64_t *a)
|
||||
{
|
||||
uint64_t t[5], cc, w;
|
||||
|
||||
/*
|
||||
* We add 19. If the result (in t[]) is below 2^255, then a[]
|
||||
* is already less than 2^255-19, thus already reduced.
|
||||
* Otherwise, we subtract 2^255 from t[], in which case we
|
||||
* have t = a - (2^255-19), and that's our result.
|
||||
*/
|
||||
w = a[0] + 19;
|
||||
t[0] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = a[1] + cc;
|
||||
t[1] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = a[2] + cc;
|
||||
t[2] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = a[3] + cc;
|
||||
t[3] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
w = a[4] + cc;
|
||||
t[4] = w & MASK51;
|
||||
cc = w >> 51;
|
||||
|
||||
/*
|
||||
* The bit 255 of t is in cc. If that bit is 0, when a[] must
|
||||
* be unchanged; otherwise, it must be replaced with t[].
|
||||
*/
|
||||
cc = -cc;
|
||||
a[0] ^= cc & (a[0] ^ t[0]);
|
||||
a[1] ^= cc & (a[1] ^ t[1]);
|
||||
a[2] ^= cc & (a[2] ^ t[2]);
|
||||
a[3] ^= cc & (a[3] ^ t[3]);
|
||||
a[4] ^= cc & (a[4] ^ t[4]);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *kb, size_t kblen, int curve)
|
||||
{
|
||||
unsigned char k[32];
|
||||
uint64_t x1[5], x2[5], z2[5], x3[5], z3[5];
|
||||
uint32_t swap;
|
||||
int i;
|
||||
|
||||
(void)curve;
|
||||
|
||||
/*
|
||||
* Points are encoded over exactly 32 bytes. Multipliers must fit
|
||||
* in 32 bytes as well.
|
||||
*/
|
||||
if (Glen != 32 || kblen > 32) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* RFC 7748 mandates that the high bit of the last point byte must
|
||||
* be ignored/cleared; the "& MASK51" in the initialization for
|
||||
* x1[4] clears that bit.
|
||||
*/
|
||||
x1[0] = br_dec64le(&G[0]) & MASK51;
|
||||
x1[1] = (br_dec64le(&G[6]) >> 3) & MASK51;
|
||||
x1[2] = (br_dec64le(&G[12]) >> 6) & MASK51;
|
||||
x1[3] = (br_dec64le(&G[19]) >> 1) & MASK51;
|
||||
x1[4] = (br_dec64le(&G[24]) >> 12) & MASK51;
|
||||
|
||||
/*
|
||||
* We can use memset() to clear values, because exact-width types
|
||||
* like uint64_t are guaranteed to have no padding bits or
|
||||
* trap representations.
|
||||
*/
|
||||
memset(x2, 0, sizeof x2);
|
||||
x2[0] = 1;
|
||||
memset(z2, 0, sizeof z2);
|
||||
memcpy(x3, x1, sizeof x1);
|
||||
memcpy(z3, x2, sizeof x2);
|
||||
|
||||
/*
|
||||
* The multiplier is provided in big-endian notation, and
|
||||
* possibly shorter than 32 bytes.
|
||||
*/
|
||||
memset(k, 0, (sizeof k) - kblen);
|
||||
memcpy(k + (sizeof k) - kblen, kb, kblen);
|
||||
k[31] &= 0xF8;
|
||||
k[0] &= 0x7F;
|
||||
k[0] |= 0x40;
|
||||
|
||||
swap = 0;
|
||||
|
||||
for (i = 254; i >= 0; i --) {
|
||||
uint64_t a[5], aa[5], b[5], bb[5], e[5];
|
||||
uint64_t c[5], d[5], da[5], cb[5];
|
||||
uint32_t kt;
|
||||
|
||||
kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
|
||||
swap ^= kt;
|
||||
f255_cswap(x2, x3, swap);
|
||||
f255_cswap(z2, z3, swap);
|
||||
swap = kt;
|
||||
|
||||
/*
|
||||
* At that point, limbs of x_2 and z_2 are assumed to fit
|
||||
* on at most 52 bits each.
|
||||
*
|
||||
* Each f255_add() adds one bit to the maximum range of
|
||||
* the values, but f255_sub() and f255_mul() bring back
|
||||
* the limbs into 52 bits. All f255_add() outputs are
|
||||
* used only as inputs for f255_mul(), which ensures
|
||||
* that limbs remain in the proper range.
|
||||
*/
|
||||
|
||||
/* A = x_2 + z_2 -- limbs fit on 53 bits each */
|
||||
f255_add(a, x2, z2);
|
||||
|
||||
/* AA = A^2 */
|
||||
f255_mul(aa, a, a);
|
||||
|
||||
/* B = x_2 - z_2 */
|
||||
f255_sub(b, x2, z2);
|
||||
|
||||
/* BB = B^2 */
|
||||
f255_mul(bb, b, b);
|
||||
|
||||
/* E = AA - BB */
|
||||
f255_sub(e, aa, bb);
|
||||
|
||||
/* C = x_3 + z_3 -- limbs fit on 53 bits each */
|
||||
f255_add(c, x3, z3);
|
||||
|
||||
/* D = x_3 - z_3 */
|
||||
f255_sub(d, x3, z3);
|
||||
|
||||
/* DA = D * A */
|
||||
f255_mul(da, d, a);
|
||||
|
||||
/* CB = C * B */
|
||||
f255_mul(cb, c, b);
|
||||
|
||||
/* x_3 = (DA + CB)^2 */
|
||||
f255_add(x3, da, cb);
|
||||
f255_mul(x3, x3, x3);
|
||||
|
||||
/* z_3 = x_1 * (DA - CB)^2 */
|
||||
f255_sub(z3, da, cb);
|
||||
f255_mul(z3, z3, z3);
|
||||
f255_mul(z3, x1, z3);
|
||||
|
||||
/* x_2 = AA * BB */
|
||||
f255_mul(x2, aa, bb);
|
||||
|
||||
/* z_2 = E * (AA + a24 * E) */
|
||||
f255_mul_a24(z2, e);
|
||||
f255_add(z2, aa, z2);
|
||||
f255_mul(z2, e, z2);
|
||||
}
|
||||
|
||||
f255_cswap(x2, x3, swap);
|
||||
f255_cswap(z2, z3, swap);
|
||||
|
||||
/*
|
||||
* Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize
|
||||
* most non-squarings. We use x1 and x3, now useless, as temporaries.
|
||||
*/
|
||||
memcpy(x1, z2, sizeof z2);
|
||||
for (i = 0; i < 15; i ++) {
|
||||
f255_mul(x1, x1, x1);
|
||||
f255_mul(x1, x1, z2);
|
||||
}
|
||||
memcpy(x3, x1, sizeof x1);
|
||||
for (i = 0; i < 14; i ++) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < 16; j ++) {
|
||||
f255_mul(x3, x3, x3);
|
||||
}
|
||||
f255_mul(x3, x3, x1);
|
||||
}
|
||||
for (i = 14; i >= 0; i --) {
|
||||
f255_mul(x3, x3, x3);
|
||||
if ((0xFFEB >> i) & 1) {
|
||||
f255_mul(x3, z2, x3);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute x2/z2. We have 1/z2 in x3.
|
||||
*/
|
||||
f255_mul(x2, x2, x3);
|
||||
f255_final_reduce(x2);
|
||||
|
||||
/*
|
||||
* Encode the final x2 value in little-endian. We first assemble
|
||||
* the limbs into 64-bit values.
|
||||
*/
|
||||
x2[0] |= x2[1] << 51;
|
||||
x2[1] = (x2[1] >> 13) | (x2[2] << 38);
|
||||
x2[2] = (x2[2] >> 26) | (x2[3] << 25);
|
||||
x2[3] = (x2[3] >> 39) | (x2[4] << 12);
|
||||
br_enc64le(G, x2[0]);
|
||||
br_enc64le(G + 8, x2[1]);
|
||||
br_enc64le(G + 16, x2[2]);
|
||||
br_enc64le(G + 24, x2[3]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
const unsigned char *G;
|
||||
size_t Glen;
|
||||
|
||||
G = api_generator(curve, &Glen);
|
||||
memcpy(R, G, Glen);
|
||||
api_mul(R, Glen, x, xlen, curve);
|
||||
return Glen;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
/*
|
||||
* We don't implement this method, since it is used for ECDSA
|
||||
* only, and there is no ECDSA over Curve25519 (which instead
|
||||
* uses EdDSA).
|
||||
*/
|
||||
(void)A;
|
||||
(void)B;
|
||||
(void)len;
|
||||
(void)x;
|
||||
(void)xlen;
|
||||
(void)y;
|
||||
(void)ylen;
|
||||
(void)curve;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_c25519_m62 = {
|
||||
(uint32_t)0x20000000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl *
|
||||
br_ec_c25519_m62_get(void)
|
||||
{
|
||||
return &br_ec_c25519_m62;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl *
|
||||
br_ec_c25519_m62_get(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,831 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_INT128 || BR_UMUL128
|
||||
|
||||
#if BR_UMUL128
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
static const unsigned char GEN[] = {
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
static const unsigned char ORDER[] = {
|
||||
0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
|
||||
};
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return GEN;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return ORDER;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
(void)curve;
|
||||
*len = 32;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* A field element is encoded as four 64-bit integers, in basis 2^63.
|
||||
* Operations return partially reduced values, which may range up to
|
||||
* 2^255+37.
|
||||
*/
|
||||
|
||||
#define MASK63 (((uint64_t)1 << 63) - (uint64_t)1)
|
||||
|
||||
/*
|
||||
* Swap two field elements, conditionally on a flag.
|
||||
*/
|
||||
static inline void
|
||||
f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl)
|
||||
{
|
||||
uint64_t m, w;
|
||||
|
||||
m = -(uint64_t)ctl;
|
||||
w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w;
|
||||
w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w;
|
||||
w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w;
|
||||
w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w;
|
||||
}
|
||||
|
||||
/*
|
||||
* Addition in the field.
|
||||
*/
|
||||
static inline void
|
||||
f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
|
||||
{
|
||||
#if BR_INT128
|
||||
|
||||
uint64_t t0, t1, t2, t3, cc;
|
||||
unsigned __int128 z;
|
||||
|
||||
z = (unsigned __int128)a[0] + (unsigned __int128)b[0];
|
||||
t0 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[1] + (unsigned __int128)b[1] + (z >> 64);
|
||||
t1 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[2] + (unsigned __int128)b[2] + (z >> 64);
|
||||
t2 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[3] + (unsigned __int128)b[3] + (z >> 64);
|
||||
t3 = (uint64_t)z & MASK63;
|
||||
cc = (uint64_t)(z >> 63);
|
||||
|
||||
/*
|
||||
* Since operands are at most 2^255+37, the sum is at most
|
||||
* 2^256+74; thus, the carry cc is equal to 0, 1 or 2.
|
||||
*
|
||||
* We use: 2^255 = 19 mod p.
|
||||
* Since we add 0, 19 or 38 to a value that fits on 255 bits,
|
||||
* the result is at most 2^255+37.
|
||||
*/
|
||||
z = (unsigned __int128)t0 + (unsigned __int128)(19 * cc);
|
||||
d[0] = (uint64_t)z;
|
||||
z = (unsigned __int128)t1 + (z >> 64);
|
||||
d[1] = (uint64_t)z;
|
||||
z = (unsigned __int128)t2 + (z >> 64);
|
||||
d[2] = (uint64_t)z;
|
||||
d[3] = t3 + (uint64_t)(z >> 64);
|
||||
|
||||
#elif BR_UMUL128
|
||||
|
||||
uint64_t t0, t1, t2, t3, cc;
|
||||
unsigned char k;
|
||||
|
||||
k = _addcarry_u64(0, a[0], b[0], &t0);
|
||||
k = _addcarry_u64(k, a[1], b[1], &t1);
|
||||
k = _addcarry_u64(k, a[2], b[2], &t2);
|
||||
k = _addcarry_u64(k, a[3], b[3], &t3);
|
||||
cc = (k << 1) + (t3 >> 63);
|
||||
t3 &= MASK63;
|
||||
|
||||
/*
|
||||
* Since operands are at most 2^255+37, the sum is at most
|
||||
* 2^256+74; thus, the carry cc is equal to 0, 1 or 2.
|
||||
*
|
||||
* We use: 2^255 = 19 mod p.
|
||||
* Since we add 0, 19 or 38 to a value that fits on 255 bits,
|
||||
* the result is at most 2^255+37.
|
||||
*/
|
||||
k = _addcarry_u64(0, t0, 19 * cc, &d[0]);
|
||||
k = _addcarry_u64(k, t1, 0, &d[1]);
|
||||
k = _addcarry_u64(k, t2, 0, &d[2]);
|
||||
(void)_addcarry_u64(k, t3, 0, &d[3]);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Subtraction.
|
||||
*/
|
||||
static inline void
|
||||
f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
|
||||
{
|
||||
#if BR_INT128
|
||||
|
||||
/*
|
||||
* We compute t = 2^256 - 38 + a - b, which is necessarily
|
||||
* positive but lower than 2^256 + 2^255, since a <= 2^255 + 37
|
||||
* and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending
|
||||
* on the two upper bits of t (bits 255 and 256).
|
||||
*/
|
||||
|
||||
uint64_t t0, t1, t2, t3, t4, cc;
|
||||
unsigned __int128 z;
|
||||
|
||||
z = (unsigned __int128)a[0] - (unsigned __int128)b[0] - 38;
|
||||
t0 = (uint64_t)z;
|
||||
cc = -(uint64_t)(z >> 64);
|
||||
z = (unsigned __int128)a[1] - (unsigned __int128)b[1]
|
||||
- (unsigned __int128)cc;
|
||||
t1 = (uint64_t)z;
|
||||
cc = -(uint64_t)(z >> 64);
|
||||
z = (unsigned __int128)a[2] - (unsigned __int128)b[2]
|
||||
- (unsigned __int128)cc;
|
||||
t2 = (uint64_t)z;
|
||||
cc = -(uint64_t)(z >> 64);
|
||||
z = (unsigned __int128)a[3] - (unsigned __int128)b[3]
|
||||
- (unsigned __int128)cc;
|
||||
t3 = (uint64_t)z;
|
||||
t4 = 1 + (uint64_t)(z >> 64);
|
||||
|
||||
/*
|
||||
* We have a 257-bit result. The two top bits can be 00, 01 or 10,
|
||||
* but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1).
|
||||
* Therefore, we can truncate to 255 bits, and add 0, 19 or 38.
|
||||
* This guarantees that the result is at most 2^255+37.
|
||||
*/
|
||||
cc = (38 & -t4) + (19 & -(t3 >> 63));
|
||||
t3 &= MASK63;
|
||||
z = (unsigned __int128)t0 + (unsigned __int128)cc;
|
||||
d[0] = (uint64_t)z;
|
||||
z = (unsigned __int128)t1 + (z >> 64);
|
||||
d[1] = (uint64_t)z;
|
||||
z = (unsigned __int128)t2 + (z >> 64);
|
||||
d[2] = (uint64_t)z;
|
||||
d[3] = t3 + (uint64_t)(z >> 64);
|
||||
|
||||
#elif BR_UMUL128
|
||||
|
||||
/*
|
||||
* We compute t = 2^256 - 38 + a - b, which is necessarily
|
||||
* positive but lower than 2^256 + 2^255, since a <= 2^255 + 37
|
||||
* and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending
|
||||
* on the two upper bits of t (bits 255 and 256).
|
||||
*/
|
||||
|
||||
uint64_t t0, t1, t2, t3, t4;
|
||||
unsigned char k;
|
||||
|
||||
k = _subborrow_u64(0, a[0], b[0], &t0);
|
||||
k = _subborrow_u64(k, a[1], b[1], &t1);
|
||||
k = _subborrow_u64(k, a[2], b[2], &t2);
|
||||
k = _subborrow_u64(k, a[3], b[3], &t3);
|
||||
(void)_subborrow_u64(k, 1, 0, &t4);
|
||||
|
||||
k = _subborrow_u64(0, t0, 38, &t0);
|
||||
k = _subborrow_u64(k, t1, 0, &t1);
|
||||
k = _subborrow_u64(k, t2, 0, &t2);
|
||||
k = _subborrow_u64(k, t3, 0, &t3);
|
||||
(void)_subborrow_u64(k, t4, 0, &t4);
|
||||
|
||||
/*
|
||||
* We have a 257-bit result. The two top bits can be 00, 01 or 10,
|
||||
* but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1).
|
||||
* Therefore, we can truncate to 255 bits, and add 0, 19 or 38.
|
||||
* This guarantees that the result is at most 2^255+37.
|
||||
*/
|
||||
t4 = (38 & -t4) + (19 & -(t3 >> 63));
|
||||
t3 &= MASK63;
|
||||
k = _addcarry_u64(0, t0, t4, &d[0]);
|
||||
k = _addcarry_u64(k, t1, 0, &d[1]);
|
||||
k = _addcarry_u64(k, t2, 0, &d[2]);
|
||||
(void)_addcarry_u64(k, t3, 0, &d[3]);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Multiplication.
|
||||
*/
|
||||
static inline void
|
||||
f255_mul(uint64_t *d, uint64_t *a, uint64_t *b)
|
||||
{
|
||||
#if BR_INT128
|
||||
|
||||
unsigned __int128 z;
|
||||
uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th;
|
||||
|
||||
/*
|
||||
* Compute the product a*b over plain integers.
|
||||
*/
|
||||
z = (unsigned __int128)a[0] * (unsigned __int128)b[0];
|
||||
t0 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[0] * (unsigned __int128)b[1] + (z >> 64);
|
||||
t1 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[0] * (unsigned __int128)b[2] + (z >> 64);
|
||||
t2 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[0] * (unsigned __int128)b[3] + (z >> 64);
|
||||
t3 = (uint64_t)z;
|
||||
t4 = (uint64_t)(z >> 64);
|
||||
|
||||
z = (unsigned __int128)a[1] * (unsigned __int128)b[0]
|
||||
+ (unsigned __int128)t1;
|
||||
t1 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[1] * (unsigned __int128)b[1]
|
||||
+ (unsigned __int128)t2 + (z >> 64);
|
||||
t2 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[1] * (unsigned __int128)b[2]
|
||||
+ (unsigned __int128)t3 + (z >> 64);
|
||||
t3 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[1] * (unsigned __int128)b[3]
|
||||
+ (unsigned __int128)t4 + (z >> 64);
|
||||
t4 = (uint64_t)z;
|
||||
t5 = (uint64_t)(z >> 64);
|
||||
|
||||
z = (unsigned __int128)a[2] * (unsigned __int128)b[0]
|
||||
+ (unsigned __int128)t2;
|
||||
t2 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[2] * (unsigned __int128)b[1]
|
||||
+ (unsigned __int128)t3 + (z >> 64);
|
||||
t3 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[2] * (unsigned __int128)b[2]
|
||||
+ (unsigned __int128)t4 + (z >> 64);
|
||||
t4 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[2] * (unsigned __int128)b[3]
|
||||
+ (unsigned __int128)t5 + (z >> 64);
|
||||
t5 = (uint64_t)z;
|
||||
t6 = (uint64_t)(z >> 64);
|
||||
|
||||
z = (unsigned __int128)a[3] * (unsigned __int128)b[0]
|
||||
+ (unsigned __int128)t3;
|
||||
t3 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[3] * (unsigned __int128)b[1]
|
||||
+ (unsigned __int128)t4 + (z >> 64);
|
||||
t4 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[3] * (unsigned __int128)b[2]
|
||||
+ (unsigned __int128)t5 + (z >> 64);
|
||||
t5 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[3] * (unsigned __int128)b[3]
|
||||
+ (unsigned __int128)t6 + (z >> 64);
|
||||
t6 = (uint64_t)z;
|
||||
t7 = (uint64_t)(z >> 64);
|
||||
|
||||
/*
|
||||
* Modulo p, we have:
|
||||
*
|
||||
* 2^255 = 19
|
||||
* 2^510 = 19*19 = 361
|
||||
*
|
||||
* We split the intermediate t into three parts, in basis
|
||||
* 2^255. The low one will be in t0..t3; the middle one in t4..t7.
|
||||
* The upper one can only be a single bit (th), since the
|
||||
* multiplication operands are at most 2^255+37 each.
|
||||
*/
|
||||
th = t7 >> 62;
|
||||
t7 = ((t7 << 1) | (t6 >> 63)) & MASK63;
|
||||
t6 = (t6 << 1) | (t5 >> 63);
|
||||
t5 = (t5 << 1) | (t4 >> 63);
|
||||
t4 = (t4 << 1) | (t3 >> 63);
|
||||
t3 &= MASK63;
|
||||
|
||||
/*
|
||||
* Multiply the middle part (t4..t7) by 19. We truncate it to
|
||||
* 255 bits; the extra bits will go along with th.
|
||||
*/
|
||||
z = (unsigned __int128)t4 * 19;
|
||||
t4 = (uint64_t)z;
|
||||
z = (unsigned __int128)t5 * 19 + (z >> 64);
|
||||
t5 = (uint64_t)z;
|
||||
z = (unsigned __int128)t6 * 19 + (z >> 64);
|
||||
t6 = (uint64_t)z;
|
||||
z = (unsigned __int128)t7 * 19 + (z >> 64);
|
||||
t7 = (uint64_t)z & MASK63;
|
||||
|
||||
th = (361 & -th) + (19 * (uint64_t)(z >> 63));
|
||||
|
||||
/*
|
||||
* Add elements together.
|
||||
* At this point:
|
||||
* t0..t3 fits on 255 bits.
|
||||
* t4..t7 fits on 255 bits.
|
||||
* th <= 361 + 342 = 703.
|
||||
*/
|
||||
z = (unsigned __int128)t0 + (unsigned __int128)t4
|
||||
+ (unsigned __int128)th;
|
||||
t0 = (uint64_t)z;
|
||||
z = (unsigned __int128)t1 + (unsigned __int128)t5 + (z >> 64);
|
||||
t1 = (uint64_t)z;
|
||||
z = (unsigned __int128)t2 + (unsigned __int128)t6 + (z >> 64);
|
||||
t2 = (uint64_t)z;
|
||||
z = (unsigned __int128)t3 + (unsigned __int128)t7 + (z >> 64);
|
||||
t3 = (uint64_t)z & MASK63;
|
||||
th = (uint64_t)(z >> 63);
|
||||
|
||||
/*
|
||||
* Since the sum is at most 2^256 + 703, the two upper bits, in th,
|
||||
* can only have value 0, 1 or 2. We just add th*19, which
|
||||
* guarantees a result of at most 2^255+37.
|
||||
*/
|
||||
z = (unsigned __int128)t0 + (19 * th);
|
||||
d[0] = (uint64_t)z;
|
||||
z = (unsigned __int128)t1 + (z >> 64);
|
||||
d[1] = (uint64_t)z;
|
||||
z = (unsigned __int128)t2 + (z >> 64);
|
||||
d[2] = (uint64_t)z;
|
||||
d[3] = t3 + (uint64_t)(z >> 64);
|
||||
|
||||
#elif BR_UMUL128
|
||||
|
||||
uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th;
|
||||
uint64_t h0, h1, h2, h3;
|
||||
unsigned char k;
|
||||
|
||||
/*
|
||||
* Compute the product a*b over plain integers.
|
||||
*/
|
||||
t0 = _umul128(a[0], b[0], &h0);
|
||||
t1 = _umul128(a[0], b[1], &h1);
|
||||
k = _addcarry_u64(0, t1, h0, &t1);
|
||||
t2 = _umul128(a[0], b[2], &h2);
|
||||
k = _addcarry_u64(k, t2, h1, &t2);
|
||||
t3 = _umul128(a[0], b[3], &h3);
|
||||
k = _addcarry_u64(k, t3, h2, &t3);
|
||||
(void)_addcarry_u64(k, h3, 0, &t4);
|
||||
|
||||
k = _addcarry_u64(0, _umul128(a[1], b[0], &h0), t1, &t1);
|
||||
k = _addcarry_u64(k, _umul128(a[1], b[1], &h1), t2, &t2);
|
||||
k = _addcarry_u64(k, _umul128(a[1], b[2], &h2), t3, &t3);
|
||||
k = _addcarry_u64(k, _umul128(a[1], b[3], &h3), t4, &t4);
|
||||
t5 = k;
|
||||
k = _addcarry_u64(0, t2, h0, &t2);
|
||||
k = _addcarry_u64(k, t3, h1, &t3);
|
||||
k = _addcarry_u64(k, t4, h2, &t4);
|
||||
(void)_addcarry_u64(k, t5, h3, &t5);
|
||||
|
||||
k = _addcarry_u64(0, _umul128(a[2], b[0], &h0), t2, &t2);
|
||||
k = _addcarry_u64(k, _umul128(a[2], b[1], &h1), t3, &t3);
|
||||
k = _addcarry_u64(k, _umul128(a[2], b[2], &h2), t4, &t4);
|
||||
k = _addcarry_u64(k, _umul128(a[2], b[3], &h3), t5, &t5);
|
||||
t6 = k;
|
||||
k = _addcarry_u64(0, t3, h0, &t3);
|
||||
k = _addcarry_u64(k, t4, h1, &t4);
|
||||
k = _addcarry_u64(k, t5, h2, &t5);
|
||||
(void)_addcarry_u64(k, t6, h3, &t6);
|
||||
|
||||
k = _addcarry_u64(0, _umul128(a[3], b[0], &h0), t3, &t3);
|
||||
k = _addcarry_u64(k, _umul128(a[3], b[1], &h1), t4, &t4);
|
||||
k = _addcarry_u64(k, _umul128(a[3], b[2], &h2), t5, &t5);
|
||||
k = _addcarry_u64(k, _umul128(a[3], b[3], &h3), t6, &t6);
|
||||
t7 = k;
|
||||
k = _addcarry_u64(0, t4, h0, &t4);
|
||||
k = _addcarry_u64(k, t5, h1, &t5);
|
||||
k = _addcarry_u64(k, t6, h2, &t6);
|
||||
(void)_addcarry_u64(k, t7, h3, &t7);
|
||||
|
||||
/*
|
||||
* Modulo p, we have:
|
||||
*
|
||||
* 2^255 = 19
|
||||
* 2^510 = 19*19 = 361
|
||||
*
|
||||
* We split the intermediate t into three parts, in basis
|
||||
* 2^255. The low one will be in t0..t3; the middle one in t4..t7.
|
||||
* The upper one can only be a single bit (th), since the
|
||||
* multiplication operands are at most 2^255+37 each.
|
||||
*/
|
||||
th = t7 >> 62;
|
||||
t7 = ((t7 << 1) | (t6 >> 63)) & MASK63;
|
||||
t6 = (t6 << 1) | (t5 >> 63);
|
||||
t5 = (t5 << 1) | (t4 >> 63);
|
||||
t4 = (t4 << 1) | (t3 >> 63);
|
||||
t3 &= MASK63;
|
||||
|
||||
/*
|
||||
* Multiply the middle part (t4..t7) by 19. We truncate it to
|
||||
* 255 bits; the extra bits will go along with th.
|
||||
*/
|
||||
t4 = _umul128(t4, 19, &h0);
|
||||
t5 = _umul128(t5, 19, &h1);
|
||||
t6 = _umul128(t6, 19, &h2);
|
||||
t7 = _umul128(t7, 19, &h3);
|
||||
k = _addcarry_u64(0, t5, h0, &t5);
|
||||
k = _addcarry_u64(k, t6, h1, &t6);
|
||||
k = _addcarry_u64(k, t7, h2, &t7);
|
||||
(void)_addcarry_u64(k, h3, 0, &h3);
|
||||
th = (361 & -th) + (19 * ((h3 << 1) + (t7 >> 63)));
|
||||
t7 &= MASK63;
|
||||
|
||||
/*
|
||||
* Add elements together.
|
||||
* At this point:
|
||||
* t0..t3 fits on 255 bits.
|
||||
* t4..t7 fits on 255 bits.
|
||||
* th <= 361 + 342 = 703.
|
||||
*/
|
||||
k = _addcarry_u64(0, t0, t4, &t0);
|
||||
k = _addcarry_u64(k, t1, t5, &t1);
|
||||
k = _addcarry_u64(k, t2, t6, &t2);
|
||||
k = _addcarry_u64(k, t3, t7, &t3);
|
||||
t4 = k;
|
||||
k = _addcarry_u64(0, t0, th, &t0);
|
||||
k = _addcarry_u64(k, t1, 0, &t1);
|
||||
k = _addcarry_u64(k, t2, 0, &t2);
|
||||
k = _addcarry_u64(k, t3, 0, &t3);
|
||||
(void)_addcarry_u64(k, t4, 0, &t4);
|
||||
|
||||
th = (t4 << 1) + (t3 >> 63);
|
||||
t3 &= MASK63;
|
||||
|
||||
/*
|
||||
* Since the sum is at most 2^256 + 703, the two upper bits, in th,
|
||||
* can only have value 0, 1 or 2. We just add th*19, which
|
||||
* guarantees a result of at most 2^255+37.
|
||||
*/
|
||||
k = _addcarry_u64(0, t0, 19 * th, &d[0]);
|
||||
k = _addcarry_u64(k, t1, 0, &d[1]);
|
||||
k = _addcarry_u64(k, t2, 0, &d[2]);
|
||||
(void)_addcarry_u64(k, t3, 0, &d[3]);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Multiplication by A24 = 121665.
|
||||
*/
|
||||
static inline void
|
||||
f255_mul_a24(uint64_t *d, const uint64_t *a)
|
||||
{
|
||||
#if BR_INT128
|
||||
|
||||
uint64_t t0, t1, t2, t3;
|
||||
unsigned __int128 z;
|
||||
|
||||
z = (unsigned __int128)a[0] * 121665;
|
||||
t0 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[1] * 121665 + (z >> 64);
|
||||
t1 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[2] * 121665 + (z >> 64);
|
||||
t2 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[3] * 121665 + (z >> 64);
|
||||
t3 = (uint64_t)z & MASK63;
|
||||
|
||||
z = (unsigned __int128)t0 + (19 * (uint64_t)(z >> 63));
|
||||
t0 = (uint64_t)z;
|
||||
z = (unsigned __int128)t1 + (z >> 64);
|
||||
t1 = (uint64_t)z;
|
||||
z = (unsigned __int128)t2 + (z >> 64);
|
||||
t2 = (uint64_t)z;
|
||||
t3 = t3 + (uint64_t)(z >> 64);
|
||||
|
||||
z = (unsigned __int128)t0 + (19 & -(t3 >> 63));
|
||||
d[0] = (uint64_t)z;
|
||||
z = (unsigned __int128)t1 + (z >> 64);
|
||||
d[1] = (uint64_t)z;
|
||||
z = (unsigned __int128)t2 + (z >> 64);
|
||||
d[2] = (uint64_t)z;
|
||||
d[3] = (t3 & MASK63) + (uint64_t)(z >> 64);
|
||||
|
||||
#elif BR_UMUL128
|
||||
|
||||
uint64_t t0, t1, t2, t3, t4, h0, h1, h2, h3;
|
||||
unsigned char k;
|
||||
|
||||
t0 = _umul128(a[0], 121665, &h0);
|
||||
t1 = _umul128(a[1], 121665, &h1);
|
||||
k = _addcarry_u64(0, t1, h0, &t1);
|
||||
t2 = _umul128(a[2], 121665, &h2);
|
||||
k = _addcarry_u64(k, t2, h1, &t2);
|
||||
t3 = _umul128(a[3], 121665, &h3);
|
||||
k = _addcarry_u64(k, t3, h2, &t3);
|
||||
(void)_addcarry_u64(k, h3, 0, &t4);
|
||||
|
||||
t4 = (t4 << 1) + (t3 >> 63);
|
||||
t3 &= MASK63;
|
||||
k = _addcarry_u64(0, t0, 19 * t4, &t0);
|
||||
k = _addcarry_u64(k, t1, 0, &t1);
|
||||
k = _addcarry_u64(k, t2, 0, &t2);
|
||||
(void)_addcarry_u64(k, t3, 0, &t3);
|
||||
|
||||
t4 = 19 & -(t3 >> 63);
|
||||
t3 &= MASK63;
|
||||
k = _addcarry_u64(0, t0, t4, &d[0]);
|
||||
k = _addcarry_u64(k, t1, 0, &d[1]);
|
||||
k = _addcarry_u64(k, t2, 0, &d[2]);
|
||||
(void)_addcarry_u64(k, t3, 0, &d[3]);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Finalize reduction.
|
||||
*/
|
||||
static inline void
|
||||
f255_final_reduce(uint64_t *a)
|
||||
{
|
||||
#if BR_INT128
|
||||
|
||||
uint64_t t0, t1, t2, t3, m;
|
||||
unsigned __int128 z;
|
||||
|
||||
/*
|
||||
* We add 19. If the result (in t) is below 2^255, then a[]
|
||||
* is already less than 2^255-19, thus already reduced.
|
||||
* Otherwise, we subtract 2^255 from t[], in which case we
|
||||
* have t = a - (2^255-19), and that's our result.
|
||||
*/
|
||||
z = (unsigned __int128)a[0] + 19;
|
||||
t0 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[1] + (z >> 64);
|
||||
t1 = (uint64_t)z;
|
||||
z = (unsigned __int128)a[2] + (z >> 64);
|
||||
t2 = (uint64_t)z;
|
||||
t3 = a[3] + (uint64_t)(z >> 64);
|
||||
|
||||
m = -(t3 >> 63);
|
||||
t3 &= MASK63;
|
||||
a[0] ^= m & (a[0] ^ t0);
|
||||
a[1] ^= m & (a[1] ^ t1);
|
||||
a[2] ^= m & (a[2] ^ t2);
|
||||
a[3] ^= m & (a[3] ^ t3);
|
||||
|
||||
#elif BR_UMUL128
|
||||
|
||||
uint64_t t0, t1, t2, t3, m;
|
||||
unsigned char k;
|
||||
|
||||
/*
|
||||
* We add 19. If the result (in t) is below 2^255, then a[]
|
||||
* is already less than 2^255-19, thus already reduced.
|
||||
* Otherwise, we subtract 2^255 from t[], in which case we
|
||||
* have t = a - (2^255-19), and that's our result.
|
||||
*/
|
||||
k = _addcarry_u64(0, a[0], 19, &t0);
|
||||
k = _addcarry_u64(k, a[1], 0, &t1);
|
||||
k = _addcarry_u64(k, a[2], 0, &t2);
|
||||
(void)_addcarry_u64(k, a[3], 0, &t3);
|
||||
|
||||
m = -(t3 >> 63);
|
||||
t3 &= MASK63;
|
||||
a[0] ^= m & (a[0] ^ t0);
|
||||
a[1] ^= m & (a[1] ^ t1);
|
||||
a[2] ^= m & (a[2] ^ t2);
|
||||
a[3] ^= m & (a[3] ^ t3);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *kb, size_t kblen, int curve)
|
||||
{
|
||||
unsigned char k[32];
|
||||
uint64_t x1[4], x2[4], z2[4], x3[4], z3[4];
|
||||
uint32_t swap;
|
||||
int i;
|
||||
|
||||
(void)curve;
|
||||
|
||||
/*
|
||||
* Points are encoded over exactly 32 bytes. Multipliers must fit
|
||||
* in 32 bytes as well.
|
||||
*/
|
||||
if (Glen != 32 || kblen > 32) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* RFC 7748 mandates that the high bit of the last point byte must
|
||||
* be ignored/cleared.
|
||||
*/
|
||||
x1[0] = br_dec64le(&G[ 0]);
|
||||
x1[1] = br_dec64le(&G[ 8]);
|
||||
x1[2] = br_dec64le(&G[16]);
|
||||
x1[3] = br_dec64le(&G[24]) & MASK63;
|
||||
|
||||
/*
|
||||
* We can use memset() to clear values, because exact-width types
|
||||
* like uint64_t are guaranteed to have no padding bits or
|
||||
* trap representations.
|
||||
*/
|
||||
memset(x2, 0, sizeof x2);
|
||||
x2[0] = 1;
|
||||
memset(z2, 0, sizeof z2);
|
||||
memcpy(x3, x1, sizeof x1);
|
||||
memcpy(z3, x2, sizeof x2);
|
||||
|
||||
/*
|
||||
* The multiplier is provided in big-endian notation, and
|
||||
* possibly shorter than 32 bytes.
|
||||
*/
|
||||
memset(k, 0, (sizeof k) - kblen);
|
||||
memcpy(k + (sizeof k) - kblen, kb, kblen);
|
||||
k[31] &= 0xF8;
|
||||
k[0] &= 0x7F;
|
||||
k[0] |= 0x40;
|
||||
|
||||
swap = 0;
|
||||
|
||||
for (i = 254; i >= 0; i --) {
|
||||
uint64_t a[4], aa[4], b[4], bb[4], e[4];
|
||||
uint64_t c[4], d[4], da[4], cb[4];
|
||||
uint32_t kt;
|
||||
|
||||
kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
|
||||
swap ^= kt;
|
||||
f255_cswap(x2, x3, swap);
|
||||
f255_cswap(z2, z3, swap);
|
||||
swap = kt;
|
||||
|
||||
/* A = x_2 + z_2 */
|
||||
f255_add(a, x2, z2);
|
||||
|
||||
/* AA = A^2 */
|
||||
f255_mul(aa, a, a);
|
||||
|
||||
/* B = x_2 - z_2 */
|
||||
f255_sub(b, x2, z2);
|
||||
|
||||
/* BB = B^2 */
|
||||
f255_mul(bb, b, b);
|
||||
|
||||
/* E = AA - BB */
|
||||
f255_sub(e, aa, bb);
|
||||
|
||||
/* C = x_3 + z_3 */
|
||||
f255_add(c, x3, z3);
|
||||
|
||||
/* D = x_3 - z_3 */
|
||||
f255_sub(d, x3, z3);
|
||||
|
||||
/* DA = D * A */
|
||||
f255_mul(da, d, a);
|
||||
|
||||
/* CB = C * B */
|
||||
f255_mul(cb, c, b);
|
||||
|
||||
/* x_3 = (DA + CB)^2 */
|
||||
f255_add(x3, da, cb);
|
||||
f255_mul(x3, x3, x3);
|
||||
|
||||
/* z_3 = x_1 * (DA - CB)^2 */
|
||||
f255_sub(z3, da, cb);
|
||||
f255_mul(z3, z3, z3);
|
||||
f255_mul(z3, x1, z3);
|
||||
|
||||
/* x_2 = AA * BB */
|
||||
f255_mul(x2, aa, bb);
|
||||
|
||||
/* z_2 = E * (AA + a24 * E) */
|
||||
f255_mul_a24(z2, e);
|
||||
f255_add(z2, aa, z2);
|
||||
f255_mul(z2, e, z2);
|
||||
}
|
||||
|
||||
f255_cswap(x2, x3, swap);
|
||||
f255_cswap(z2, z3, swap);
|
||||
|
||||
/*
|
||||
* Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize
|
||||
* most non-squarings. We use x1 and x3, now useless, as temporaries.
|
||||
*/
|
||||
memcpy(x1, z2, sizeof z2);
|
||||
for (i = 0; i < 15; i ++) {
|
||||
f255_mul(x1, x1, x1);
|
||||
f255_mul(x1, x1, z2);
|
||||
}
|
||||
memcpy(x3, x1, sizeof x1);
|
||||
for (i = 0; i < 14; i ++) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < 16; j ++) {
|
||||
f255_mul(x3, x3, x3);
|
||||
}
|
||||
f255_mul(x3, x3, x1);
|
||||
}
|
||||
for (i = 14; i >= 0; i --) {
|
||||
f255_mul(x3, x3, x3);
|
||||
if ((0xFFEB >> i) & 1) {
|
||||
f255_mul(x3, z2, x3);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute x2/z2. We have 1/z2 in x3.
|
||||
*/
|
||||
f255_mul(x2, x2, x3);
|
||||
f255_final_reduce(x2);
|
||||
|
||||
/*
|
||||
* Encode the final x2 value in little-endian.
|
||||
*/
|
||||
br_enc64le(G, x2[0]);
|
||||
br_enc64le(G + 8, x2[1]);
|
||||
br_enc64le(G + 16, x2[2]);
|
||||
br_enc64le(G + 24, x2[3]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
const unsigned char *G;
|
||||
size_t Glen;
|
||||
|
||||
G = api_generator(curve, &Glen);
|
||||
memcpy(R, G, Glen);
|
||||
api_mul(R, Glen, x, xlen, curve);
|
||||
return Glen;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
/*
|
||||
* We don't implement this method, since it is used for ECDSA
|
||||
* only, and there is no ECDSA over Curve25519 (which instead
|
||||
* uses EdDSA).
|
||||
*/
|
||||
(void)A;
|
||||
(void)B;
|
||||
(void)len;
|
||||
(void)x;
|
||||
(void)xlen;
|
||||
(void)y;
|
||||
(void)ylen;
|
||||
(void)curve;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_c25519_m64 = {
|
||||
(uint32_t)0x20000000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl *
|
||||
br_ec_c25519_m64_get(void)
|
||||
{
|
||||
return &br_ec_c25519_m64;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl *
|
||||
br_ec_c25519_m64_get(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static const unsigned char GEN[] = {
|
||||
0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
|
||||
};
|
||||
|
||||
static const unsigned char ORDER[] = {
|
||||
0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
|
||||
};
|
||||
|
||||
/* see inner.h */
|
||||
const br_ec_curve_def br_curve25519 = {
|
||||
BR_EC_curve25519,
|
||||
ORDER, sizeof ORDER,
|
||||
GEN, sizeof GEN
|
||||
};
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl *
|
||||
br_ec_get_default(void)
|
||||
{
|
||||
#if BR_LOMUL
|
||||
return &br_ec_all_m15;
|
||||
#else
|
||||
return &br_ec_all_m31;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
size_t
|
||||
br_ec_keygen(const br_prng_class **rng_ctx,
|
||||
const br_ec_impl *impl, br_ec_private_key *sk,
|
||||
void *kbuf, int curve)
|
||||
{
|
||||
const unsigned char *order;
|
||||
unsigned char *buf;
|
||||
size_t len;
|
||||
unsigned mask;
|
||||
|
||||
if (curve < 0 || curve >= 32
|
||||
|| ((impl->supported_curves >> curve) & 1) == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
order = impl->order(curve, &len);
|
||||
while (len > 0 && *order == 0) {
|
||||
order ++;
|
||||
len --;
|
||||
}
|
||||
if (kbuf == NULL || len == 0) {
|
||||
return len;
|
||||
}
|
||||
mask = order[0];
|
||||
mask |= (mask >> 1);
|
||||
mask |= (mask >> 2);
|
||||
mask |= (mask >> 4);
|
||||
|
||||
/*
|
||||
* We generate sequences of random bits of the right size, until
|
||||
* the value is strictly lower than the curve order (we also
|
||||
* check for all-zero values, which are invalid).
|
||||
*/
|
||||
buf = kbuf;
|
||||
for (;;) {
|
||||
size_t u;
|
||||
unsigned cc, zz;
|
||||
|
||||
(*rng_ctx)->generate(rng_ctx, buf, len);
|
||||
buf[0] &= mask;
|
||||
cc = 0;
|
||||
u = len;
|
||||
zz = 0;
|
||||
while (u -- > 0) {
|
||||
cc = ((unsigned)(buf[u] - order[u] - cc) >> 8) & 1;
|
||||
zz |= buf[u];
|
||||
}
|
||||
if (cc != 0 && zz != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (sk != NULL) {
|
||||
sk->curve = curve;
|
||||
sk->x = buf;
|
||||
sk->xlen = len;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,824 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* Parameters for supported curves:
|
||||
* - field modulus p
|
||||
* - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
|
||||
* - b*R mod p (b is the second curve equation parameter)
|
||||
*/
|
||||
|
||||
static const uint16_t P256_P[] = {
|
||||
0x0111,
|
||||
0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
|
||||
0x7FFF, 0x0001
|
||||
};
|
||||
|
||||
static const uint16_t P256_R2[] = {
|
||||
0x0111,
|
||||
0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
|
||||
0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
|
||||
0x4FFF, 0x0000
|
||||
};
|
||||
|
||||
static const uint16_t P256_B[] = {
|
||||
0x0111,
|
||||
0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
|
||||
0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
|
||||
0x0187, 0x0000
|
||||
};
|
||||
|
||||
static const uint16_t P384_P[] = {
|
||||
0x0199,
|
||||
0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
|
||||
0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
|
||||
0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
|
||||
0x7FFF, 0x01FF
|
||||
};
|
||||
|
||||
static const uint16_t P384_R2[] = {
|
||||
0x0199,
|
||||
0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
|
||||
0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
|
||||
0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000
|
||||
};
|
||||
|
||||
static const uint16_t P384_B[] = {
|
||||
0x0199,
|
||||
0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
|
||||
0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
|
||||
0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
|
||||
0x0452, 0x0084
|
||||
};
|
||||
|
||||
static const uint16_t P521_P[] = {
|
||||
0x022B,
|
||||
0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
|
||||
0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
|
||||
0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
|
||||
0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
|
||||
0x7FFF, 0x7FFF, 0x07FF
|
||||
};
|
||||
|
||||
static const uint16_t P521_R2[] = {
|
||||
0x022B,
|
||||
0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000
|
||||
};
|
||||
|
||||
static const uint16_t P521_B[] = {
|
||||
0x022B,
|
||||
0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
|
||||
0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
|
||||
0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
|
||||
0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
|
||||
0x1618, 0x27D7, 0x0465
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
const uint16_t *p;
|
||||
const uint16_t *b;
|
||||
const uint16_t *R2;
|
||||
uint16_t p0i;
|
||||
size_t point_len;
|
||||
} curve_params;
|
||||
|
||||
static inline const curve_params *
|
||||
id_to_curve(int curve)
|
||||
{
|
||||
static const curve_params pp[] = {
|
||||
{ P256_P, P256_B, P256_R2, 0x0001, 65 },
|
||||
{ P384_P, P384_B, P384_R2, 0x0001, 97 },
|
||||
{ P521_P, P521_B, P521_R2, 0x0001, 133 }
|
||||
};
|
||||
|
||||
return &pp[curve - BR_EC_secp256r1];
|
||||
}
|
||||
|
||||
#define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15)
|
||||
|
||||
/*
|
||||
* Type for a point in Jacobian coordinates:
|
||||
* -- three values, x, y and z, in Montgomery representation
|
||||
* -- affine coordinates are X = x / z^2 and Y = y / z^3
|
||||
* -- for the point at infinity, z = 0
|
||||
*/
|
||||
typedef struct {
|
||||
uint16_t c[3][I15_LEN];
|
||||
} jacobian;
|
||||
|
||||
/*
|
||||
* We use a custom interpreter that uses a dozen registers, and
|
||||
* only six operations:
|
||||
* MSET(d, a) copy a into d
|
||||
* MADD(d, a) d = d+a (modular)
|
||||
* MSUB(d, a) d = d-a (modular)
|
||||
* MMUL(d, a, b) d = a*b (Montgomery multiplication)
|
||||
* MINV(d, a, b) invert d modulo p; a and b are used as scratch registers
|
||||
* MTZ(d) clear return value if d = 0
|
||||
* Destination of MMUL (d) must be distinct from operands (a and b).
|
||||
* There is no such constraint for MSUB and MADD.
|
||||
*
|
||||
* Registers include the operand coordinates, and temporaries.
|
||||
*/
|
||||
#define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4))
|
||||
#define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4))
|
||||
#define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4))
|
||||
#define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b))
|
||||
#define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b))
|
||||
#define MTZ(d) (0x5000 + ((d) << 8))
|
||||
#define ENDCODE 0
|
||||
|
||||
/*
|
||||
* Registers for the input operands.
|
||||
*/
|
||||
#define P1x 0
|
||||
#define P1y 1
|
||||
#define P1z 2
|
||||
#define P2x 3
|
||||
#define P2y 4
|
||||
#define P2z 5
|
||||
|
||||
/*
|
||||
* Alternate names for the first input operand.
|
||||
*/
|
||||
#define Px 0
|
||||
#define Py 1
|
||||
#define Pz 2
|
||||
|
||||
/*
|
||||
* Temporaries.
|
||||
*/
|
||||
#define t1 6
|
||||
#define t2 7
|
||||
#define t3 8
|
||||
#define t4 9
|
||||
#define t5 10
|
||||
#define t6 11
|
||||
#define t7 12
|
||||
|
||||
/*
|
||||
* Extra scratch registers available when there is no second operand (e.g.
|
||||
* for "double" and "affine").
|
||||
*/
|
||||
#define t8 3
|
||||
#define t9 4
|
||||
#define t10 5
|
||||
|
||||
/*
|
||||
* Doubling formulas are:
|
||||
*
|
||||
* s = 4*x*y^2
|
||||
* m = 3*(x + z^2)*(x - z^2)
|
||||
* x' = m^2 - 2*s
|
||||
* y' = m*(s - x') - 8*y^4
|
||||
* z' = 2*y*z
|
||||
*
|
||||
* If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
|
||||
* should. This case should not happen anyway, because our curves have
|
||||
* prime order, and thus do not contain any point of order 2.
|
||||
*
|
||||
* If P is infinity (z = 0), then again the formulas yield infinity,
|
||||
* which is correct. Thus, this code works for all points.
|
||||
*
|
||||
* Cost: 8 multiplications
|
||||
*/
|
||||
static const uint16_t code_double[] = {
|
||||
/*
|
||||
* Compute z^2 (in t1).
|
||||
*/
|
||||
MMUL(t1, Pz, Pz),
|
||||
|
||||
/*
|
||||
* Compute x-z^2 (in t2) and then x+z^2 (in t1).
|
||||
*/
|
||||
MSET(t2, Px),
|
||||
MSUB(t2, t1),
|
||||
MADD(t1, Px),
|
||||
|
||||
/*
|
||||
* Compute m = 3*(x+z^2)*(x-z^2) (in t1).
|
||||
*/
|
||||
MMUL(t3, t1, t2),
|
||||
MSET(t1, t3),
|
||||
MADD(t1, t3),
|
||||
MADD(t1, t3),
|
||||
|
||||
/*
|
||||
* Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
|
||||
*/
|
||||
MMUL(t3, Py, Py),
|
||||
MADD(t3, t3),
|
||||
MMUL(t2, Px, t3),
|
||||
MADD(t2, t2),
|
||||
|
||||
/*
|
||||
* Compute x' = m^2 - 2*s.
|
||||
*/
|
||||
MMUL(Px, t1, t1),
|
||||
MSUB(Px, t2),
|
||||
MSUB(Px, t2),
|
||||
|
||||
/*
|
||||
* Compute z' = 2*y*z.
|
||||
*/
|
||||
MMUL(t4, Py, Pz),
|
||||
MSET(Pz, t4),
|
||||
MADD(Pz, t4),
|
||||
|
||||
/*
|
||||
* Compute y' = m*(s - x') - 8*y^4. Note that we already have
|
||||
* 2*y^2 in t3.
|
||||
*/
|
||||
MSUB(t2, Px),
|
||||
MMUL(Py, t1, t2),
|
||||
MMUL(t4, t3, t3),
|
||||
MSUB(Py, t4),
|
||||
MSUB(Py, t4),
|
||||
|
||||
ENDCODE
|
||||
};
|
||||
|
||||
/*
|
||||
* Addtions formulas are:
|
||||
*
|
||||
* u1 = x1 * z2^2
|
||||
* u2 = x2 * z1^2
|
||||
* s1 = y1 * z2^3
|
||||
* s2 = y2 * z1^3
|
||||
* h = u2 - u1
|
||||
* r = s2 - s1
|
||||
* x3 = r^2 - h^3 - 2 * u1 * h^2
|
||||
* y3 = r * (u1 * h^2 - x3) - s1 * h^3
|
||||
* z3 = h * z1 * z2
|
||||
*
|
||||
* If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
|
||||
* z3 == 0, so the result is correct.
|
||||
* If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
|
||||
* not correct.
|
||||
* h == 0 only if u1 == u2; this happens in two cases:
|
||||
* -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
|
||||
* -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
|
||||
*
|
||||
* Thus, the following situations are not handled correctly:
|
||||
* -- P1 = 0 and P2 != 0
|
||||
* -- P1 != 0 and P2 = 0
|
||||
* -- P1 = P2
|
||||
* All other cases are properly computed. However, even in "incorrect"
|
||||
* situations, the three coordinates still are properly formed field
|
||||
* elements.
|
||||
*
|
||||
* The returned flag is cleared if r == 0. This happens in the following
|
||||
* cases:
|
||||
* -- Both points are on the same horizontal line (same Y coordinate).
|
||||
* -- Both points are infinity.
|
||||
* -- One point is infinity and the other is on line Y = 0.
|
||||
* The third case cannot happen with our curves (there is no valid point
|
||||
* on line Y = 0 since that would be a point of order 2). If the two
|
||||
* source points are non-infinity, then remains only the case where the
|
||||
* two points are on the same horizontal line.
|
||||
*
|
||||
* This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
|
||||
* P2 != 0:
|
||||
* -- If the returned value is not the point at infinity, then it was properly
|
||||
* computed.
|
||||
* -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
|
||||
* is indeed the point at infinity.
|
||||
* -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
|
||||
* use the 'double' code.
|
||||
*
|
||||
* Cost: 16 multiplications
|
||||
*/
|
||||
static const uint16_t code_add[] = {
|
||||
/*
|
||||
* Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
|
||||
*/
|
||||
MMUL(t3, P2z, P2z),
|
||||
MMUL(t1, P1x, t3),
|
||||
MMUL(t4, P2z, t3),
|
||||
MMUL(t3, P1y, t4),
|
||||
|
||||
/*
|
||||
* Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
|
||||
*/
|
||||
MMUL(t4, P1z, P1z),
|
||||
MMUL(t2, P2x, t4),
|
||||
MMUL(t5, P1z, t4),
|
||||
MMUL(t4, P2y, t5),
|
||||
|
||||
/*
|
||||
* Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
|
||||
*/
|
||||
MSUB(t2, t1),
|
||||
MSUB(t4, t3),
|
||||
|
||||
/*
|
||||
* Report cases where r = 0 through the returned flag.
|
||||
*/
|
||||
MTZ(t4),
|
||||
|
||||
/*
|
||||
* Compute u1*h^2 (in t6) and h^3 (in t5).
|
||||
*/
|
||||
MMUL(t7, t2, t2),
|
||||
MMUL(t6, t1, t7),
|
||||
MMUL(t5, t7, t2),
|
||||
|
||||
/*
|
||||
* Compute x3 = r^2 - h^3 - 2*u1*h^2.
|
||||
* t1 and t7 can be used as scratch registers.
|
||||
*/
|
||||
MMUL(P1x, t4, t4),
|
||||
MSUB(P1x, t5),
|
||||
MSUB(P1x, t6),
|
||||
MSUB(P1x, t6),
|
||||
|
||||
/*
|
||||
* Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
|
||||
*/
|
||||
MSUB(t6, P1x),
|
||||
MMUL(P1y, t4, t6),
|
||||
MMUL(t1, t5, t3),
|
||||
MSUB(P1y, t1),
|
||||
|
||||
/*
|
||||
* Compute z3 = h*z1*z2.
|
||||
*/
|
||||
MMUL(t1, P1z, P2z),
|
||||
MMUL(P1z, t1, t2),
|
||||
|
||||
ENDCODE
|
||||
};
|
||||
|
||||
/*
|
||||
* Check that the point is on the curve. This code snippet assumes the
|
||||
* following conventions:
|
||||
* -- Coordinates x and y have been freshly decoded in P1 (but not
|
||||
* converted to Montgomery coordinates yet).
|
||||
* -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
|
||||
*/
|
||||
static const uint16_t code_check[] = {
|
||||
|
||||
/* Convert x and y to Montgomery representation. */
|
||||
MMUL(t1, P1x, P2x),
|
||||
MMUL(t2, P1y, P2x),
|
||||
MSET(P1x, t1),
|
||||
MSET(P1y, t2),
|
||||
|
||||
/* Compute x^3 in t1. */
|
||||
MMUL(t2, P1x, P1x),
|
||||
MMUL(t1, P1x, t2),
|
||||
|
||||
/* Subtract 3*x from t1. */
|
||||
MSUB(t1, P1x),
|
||||
MSUB(t1, P1x),
|
||||
MSUB(t1, P1x),
|
||||
|
||||
/* Add b. */
|
||||
MADD(t1, P2y),
|
||||
|
||||
/* Compute y^2 in t2. */
|
||||
MMUL(t2, P1y, P1y),
|
||||
|
||||
/* Compare y^2 with x^3 - 3*x + b; they must match. */
|
||||
MSUB(t1, t2),
|
||||
MTZ(t1),
|
||||
|
||||
/* Set z to 1 (in Montgomery representation). */
|
||||
MMUL(P1z, P2x, P2z),
|
||||
|
||||
ENDCODE
|
||||
};
|
||||
|
||||
/*
|
||||
* Conversion back to affine coordinates. This code snippet assumes that
|
||||
* the z coordinate of P2 is set to 1 (not in Montgomery representation).
|
||||
*/
|
||||
static const uint16_t code_affine[] = {
|
||||
|
||||
/* Save z*R in t1. */
|
||||
MSET(t1, P1z),
|
||||
|
||||
/* Compute z^3 in t2. */
|
||||
MMUL(t2, P1z, P1z),
|
||||
MMUL(t3, P1z, t2),
|
||||
MMUL(t2, t3, P2z),
|
||||
|
||||
/* Invert to (1/z^3) in t2. */
|
||||
MINV(t2, t3, t4),
|
||||
|
||||
/* Compute y. */
|
||||
MSET(t3, P1y),
|
||||
MMUL(P1y, t2, t3),
|
||||
|
||||
/* Compute (1/z^2) in t3. */
|
||||
MMUL(t3, t2, t1),
|
||||
|
||||
/* Compute x. */
|
||||
MSET(t2, P1x),
|
||||
MMUL(P1x, t2, t3),
|
||||
|
||||
ENDCODE
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
run_code(jacobian *P1, const jacobian *P2,
|
||||
const curve_params *cc, const uint16_t *code)
|
||||
{
|
||||
uint32_t r;
|
||||
uint16_t t[13][I15_LEN];
|
||||
size_t u;
|
||||
|
||||
r = 1;
|
||||
|
||||
/*
|
||||
* Copy the two operands in the dedicated registers.
|
||||
*/
|
||||
memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
|
||||
memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
|
||||
|
||||
/*
|
||||
* Run formulas.
|
||||
*/
|
||||
for (u = 0;; u ++) {
|
||||
unsigned op, d, a, b;
|
||||
|
||||
op = code[u];
|
||||
if (op == 0) {
|
||||
break;
|
||||
}
|
||||
d = (op >> 8) & 0x0F;
|
||||
a = (op >> 4) & 0x0F;
|
||||
b = op & 0x0F;
|
||||
op >>= 12;
|
||||
switch (op) {
|
||||
uint32_t ctl;
|
||||
size_t plen;
|
||||
unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
|
||||
|
||||
case 0:
|
||||
memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
|
||||
break;
|
||||
case 1:
|
||||
ctl = br_i15_add(t[d], t[a], 1);
|
||||
ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
|
||||
br_i15_sub(t[d], cc->p, ctl);
|
||||
break;
|
||||
case 2:
|
||||
br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
|
||||
break;
|
||||
case 3:
|
||||
br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
|
||||
break;
|
||||
case 4:
|
||||
plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
|
||||
br_i15_encode(tp, plen, cc->p);
|
||||
tp[plen - 1] -= 2;
|
||||
br_i15_modpow(t[d], tp, plen,
|
||||
cc->p, cc->p0i, t[a], t[b]);
|
||||
break;
|
||||
default:
|
||||
r &= ~br_i15_iszero(t[d]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy back result.
|
||||
*/
|
||||
memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
set_one(uint16_t *x, const uint16_t *p)
|
||||
{
|
||||
size_t plen;
|
||||
|
||||
plen = (p[0] + 31) >> 4;
|
||||
memset(x, 0, plen * sizeof *x);
|
||||
x[0] = p[0];
|
||||
x[1] = 0x0001;
|
||||
}
|
||||
|
||||
static void
|
||||
point_zero(jacobian *P, const curve_params *cc)
|
||||
{
|
||||
memset(P, 0, sizeof *P);
|
||||
P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
|
||||
}
|
||||
|
||||
static inline void
|
||||
point_double(jacobian *P, const curve_params *cc)
|
||||
{
|
||||
run_code(P, P, cc, code_double);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
|
||||
{
|
||||
return run_code(P1, P2, cc, code_add);
|
||||
}
|
||||
|
||||
static void
|
||||
point_mul(jacobian *P, const unsigned char *x, size_t xlen,
|
||||
const curve_params *cc)
|
||||
{
|
||||
/*
|
||||
* We do a simple double-and-add ladder with a 2-bit window
|
||||
* to make only one add every two doublings. We thus first
|
||||
* precompute 2P and 3P in some local buffers.
|
||||
*
|
||||
* We always perform two doublings and one addition; the
|
||||
* addition is with P, 2P and 3P and is done in a temporary
|
||||
* array.
|
||||
*
|
||||
* The addition code cannot handle cases where one of the
|
||||
* operands is infinity, which is the case at the start of the
|
||||
* ladder. We therefore need to maintain a flag that controls
|
||||
* this situation.
|
||||
*/
|
||||
uint32_t qz;
|
||||
jacobian P2, P3, Q, T, U;
|
||||
|
||||
memcpy(&P2, P, sizeof P2);
|
||||
point_double(&P2, cc);
|
||||
memcpy(&P3, P, sizeof P3);
|
||||
point_add(&P3, &P2, cc);
|
||||
|
||||
point_zero(&Q, cc);
|
||||
qz = 1;
|
||||
while (xlen -- > 0) {
|
||||
int k;
|
||||
|
||||
for (k = 6; k >= 0; k -= 2) {
|
||||
uint32_t bits;
|
||||
uint32_t bnz;
|
||||
|
||||
point_double(&Q, cc);
|
||||
point_double(&Q, cc);
|
||||
memcpy(&T, P, sizeof T);
|
||||
memcpy(&U, &Q, sizeof U);
|
||||
bits = (*x >> k) & (uint32_t)3;
|
||||
bnz = NEQ(bits, 0);
|
||||
CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
|
||||
CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
|
||||
point_add(&U, &T, cc);
|
||||
CCOPY(bnz & qz, &Q, &T, sizeof Q);
|
||||
CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
|
||||
qz &= ~bnz;
|
||||
}
|
||||
x ++;
|
||||
}
|
||||
memcpy(P, &Q, sizeof Q);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode point into Jacobian coordinates. This function does not support
|
||||
* the point at infinity. If the point is invalid then this returns 0, but
|
||||
* the coordinates are still set to properly formed field elements.
|
||||
*/
|
||||
static uint32_t
|
||||
point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
|
||||
{
|
||||
/*
|
||||
* Points must use uncompressed format:
|
||||
* -- first byte is 0x04;
|
||||
* -- coordinates X and Y use unsigned big-endian, with the same
|
||||
* length as the field modulus.
|
||||
*
|
||||
* We don't support hybrid format (uncompressed, but first byte
|
||||
* has value 0x06 or 0x07, depending on the least significant bit
|
||||
* of Y) because it is rather useless, and explicitly forbidden
|
||||
* by PKIX (RFC 5480, section 2.2).
|
||||
*
|
||||
* We don't support compressed format either, because it is not
|
||||
* much used in practice (there are or were patent-related
|
||||
* concerns about point compression, which explains the lack of
|
||||
* generalised support). Also, point compression support would
|
||||
* need a bit more code.
|
||||
*/
|
||||
const unsigned char *buf;
|
||||
size_t plen, zlen;
|
||||
uint32_t r;
|
||||
jacobian Q;
|
||||
|
||||
buf = src;
|
||||
point_zero(P, cc);
|
||||
plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
|
||||
if (len != 1 + (plen << 1)) {
|
||||
return 0;
|
||||
}
|
||||
r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
|
||||
r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
|
||||
|
||||
/*
|
||||
* Check first byte.
|
||||
*/
|
||||
r &= EQ(buf[0], 0x04);
|
||||
/* obsolete
|
||||
r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
|
||||
& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convert coordinates and check that the point is valid.
|
||||
*/
|
||||
zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
|
||||
memcpy(Q.c[0], cc->R2, zlen);
|
||||
memcpy(Q.c[1], cc->b, zlen);
|
||||
set_one(Q.c[2], cc->p);
|
||||
r &= ~run_code(P, &Q, cc, code_check);
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Encode a point. This method assumes that the point is correct and is
|
||||
* not the point at infinity. Encoded size is always 1+2*plen, where
|
||||
* plen is the field modulus length, in bytes.
|
||||
*/
|
||||
static void
|
||||
point_encode(void *dst, const jacobian *P, const curve_params *cc)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t plen;
|
||||
jacobian Q, T;
|
||||
|
||||
buf = dst;
|
||||
plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
|
||||
buf[0] = 0x04;
|
||||
memcpy(&Q, P, sizeof *P);
|
||||
set_one(T.c[2], cc->p);
|
||||
run_code(&Q, &T, cc, code_affine);
|
||||
br_i15_encode(buf + 1, plen, Q.c[0]);
|
||||
br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
|
||||
}
|
||||
|
||||
static const br_ec_curve_def *
|
||||
id_to_curve_def(int curve)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
return &br_secp256r1;
|
||||
case BR_EC_secp384r1:
|
||||
return &br_secp384r1;
|
||||
case BR_EC_secp521r1:
|
||||
return &br_secp521r1;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
const br_ec_curve_def *cd;
|
||||
|
||||
cd = id_to_curve_def(curve);
|
||||
*len = cd->generator_len;
|
||||
return cd->generator;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
const br_ec_curve_def *cd;
|
||||
|
||||
cd = id_to_curve_def(curve);
|
||||
*len = cd->order_len;
|
||||
return cd->order;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
api_generator(curve, len);
|
||||
*len >>= 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
uint32_t r;
|
||||
const curve_params *cc;
|
||||
jacobian P;
|
||||
|
||||
cc = id_to_curve(curve);
|
||||
if (Glen != cc->point_len) {
|
||||
return 0;
|
||||
}
|
||||
r = point_decode(&P, G, Glen, cc);
|
||||
point_mul(&P, x, xlen, cc);
|
||||
point_encode(G, &P, cc);
|
||||
return r;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
const unsigned char *G;
|
||||
size_t Glen;
|
||||
|
||||
G = api_generator(curve, &Glen);
|
||||
memcpy(R, G, Glen);
|
||||
api_mul(R, Glen, x, xlen, curve);
|
||||
return Glen;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
uint32_t r, t, z;
|
||||
const curve_params *cc;
|
||||
jacobian P, Q;
|
||||
|
||||
/*
|
||||
* TODO: see about merging the two ladders. Right now, we do
|
||||
* two independent point multiplications, which is a bit
|
||||
* wasteful of CPU resources (but yields short code).
|
||||
*/
|
||||
|
||||
cc = id_to_curve(curve);
|
||||
if (len != cc->point_len) {
|
||||
return 0;
|
||||
}
|
||||
r = point_decode(&P, A, len, cc);
|
||||
if (B == NULL) {
|
||||
size_t Glen;
|
||||
|
||||
B = api_generator(curve, &Glen);
|
||||
}
|
||||
r &= point_decode(&Q, B, len, cc);
|
||||
point_mul(&P, x, xlen, cc);
|
||||
point_mul(&Q, y, ylen, cc);
|
||||
|
||||
/*
|
||||
* We want to compute P+Q. Since the base points A and B are distinct
|
||||
* from infinity, and the multipliers are non-zero and lower than the
|
||||
* curve order, then we know that P and Q are non-infinity. This
|
||||
* leaves two special situations to test for:
|
||||
* -- If P = Q then we must use point_double().
|
||||
* -- If P+Q = 0 then we must report an error.
|
||||
*/
|
||||
t = point_add(&P, &Q, cc);
|
||||
point_double(&Q, cc);
|
||||
z = br_i15_iszero(P.c[2]);
|
||||
|
||||
/*
|
||||
* If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
|
||||
* have the following:
|
||||
*
|
||||
* z = 0, t = 0 return P (normal addition)
|
||||
* z = 0, t = 1 return P (normal addition)
|
||||
* z = 1, t = 0 return Q (a 'double' case)
|
||||
* z = 1, t = 1 report an error (P+Q = 0)
|
||||
*/
|
||||
CCOPY(z & ~t, &P, &Q, sizeof Q);
|
||||
point_encode(A, &P, cc);
|
||||
r &= ~(z & t);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_prime_i15 = {
|
||||
(uint32_t)0x03800000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
|
|
@ -0,0 +1,826 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* Parameters for supported curves (field modulus, and 'b' equation
|
||||
* parameter; both values use the 'i31' format, and 'b' is in Montgomery
|
||||
* representation).
|
||||
*/
|
||||
|
||||
static const uint32_t P256_P[] = {
|
||||
0x00000108,
|
||||
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
|
||||
0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
|
||||
0x000000FF
|
||||
};
|
||||
|
||||
static const uint32_t P256_R2[] = {
|
||||
0x00000108,
|
||||
0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
|
||||
0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
|
||||
0x00000000
|
||||
};
|
||||
|
||||
static const uint32_t P256_B[] = {
|
||||
0x00000108,
|
||||
0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
|
||||
0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
|
||||
0x0000000E
|
||||
};
|
||||
|
||||
static const uint32_t P384_P[] = {
|
||||
0x0000018C,
|
||||
0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
|
||||
0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
|
||||
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
|
||||
0x00000FFF
|
||||
};
|
||||
|
||||
static const uint32_t P384_R2[] = {
|
||||
0x0000018C,
|
||||
0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
|
||||
0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
|
||||
0x00008000, 0x00008000, 0x00000000, 0x00000000,
|
||||
0x00000000
|
||||
};
|
||||
|
||||
static const uint32_t P384_B[] = {
|
||||
0x0000018C,
|
||||
0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
|
||||
0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
|
||||
0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
|
||||
0x000008A5
|
||||
};
|
||||
|
||||
static const uint32_t P521_P[] = {
|
||||
0x00000219,
|
||||
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
|
||||
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
|
||||
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
|
||||
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
|
||||
0x01FFFFFF
|
||||
};
|
||||
|
||||
static const uint32_t P521_R2[] = {
|
||||
0x00000219,
|
||||
0x00001000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000
|
||||
};
|
||||
|
||||
static const uint32_t P521_B[] = {
|
||||
0x00000219,
|
||||
0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
|
||||
0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
|
||||
0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
|
||||
0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
|
||||
0x00654FAE
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
const uint32_t *p;
|
||||
const uint32_t *b;
|
||||
const uint32_t *R2;
|
||||
uint32_t p0i;
|
||||
size_t point_len;
|
||||
} curve_params;
|
||||
|
||||
static inline const curve_params *
|
||||
id_to_curve(int curve)
|
||||
{
|
||||
static const curve_params pp[] = {
|
||||
{ P256_P, P256_B, P256_R2, 0x00000001, 65 },
|
||||
{ P384_P, P384_B, P384_R2, 0x00000001, 97 },
|
||||
{ P521_P, P521_B, P521_R2, 0x00000001, 133 }
|
||||
};
|
||||
|
||||
return &pp[curve - BR_EC_secp256r1];
|
||||
}
|
||||
|
||||
#define I31_LEN ((BR_MAX_EC_SIZE + 61) / 31)
|
||||
|
||||
/*
|
||||
* Type for a point in Jacobian coordinates:
|
||||
* -- three values, x, y and z, in Montgomery representation
|
||||
* -- affine coordinates are X = x / z^2 and Y = y / z^3
|
||||
* -- for the point at infinity, z = 0
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t c[3][I31_LEN];
|
||||
} jacobian;
|
||||
|
||||
/*
|
||||
* We use a custom interpreter that uses a dozen registers, and
|
||||
* only six operations:
|
||||
* MSET(d, a) copy a into d
|
||||
* MADD(d, a) d = d+a (modular)
|
||||
* MSUB(d, a) d = d-a (modular)
|
||||
* MMUL(d, a, b) d = a*b (Montgomery multiplication)
|
||||
* MINV(d, a, b) invert d modulo p; a and b are used as scratch registers
|
||||
* MTZ(d) clear return value if d = 0
|
||||
* Destination of MMUL (d) must be distinct from operands (a and b).
|
||||
* There is no such constraint for MSUB and MADD.
|
||||
*
|
||||
* Registers include the operand coordinates, and temporaries.
|
||||
*/
|
||||
#define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4))
|
||||
#define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4))
|
||||
#define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4))
|
||||
#define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b))
|
||||
#define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b))
|
||||
#define MTZ(d) (0x5000 + ((d) << 8))
|
||||
#define ENDCODE 0
|
||||
|
||||
/*
|
||||
* Registers for the input operands.
|
||||
*/
|
||||
#define P1x 0
|
||||
#define P1y 1
|
||||
#define P1z 2
|
||||
#define P2x 3
|
||||
#define P2y 4
|
||||
#define P2z 5
|
||||
|
||||
/*
|
||||
* Alternate names for the first input operand.
|
||||
*/
|
||||
#define Px 0
|
||||
#define Py 1
|
||||
#define Pz 2
|
||||
|
||||
/*
|
||||
* Temporaries.
|
||||
*/
|
||||
#define t1 6
|
||||
#define t2 7
|
||||
#define t3 8
|
||||
#define t4 9
|
||||
#define t5 10
|
||||
#define t6 11
|
||||
#define t7 12
|
||||
|
||||
/*
|
||||
* Extra scratch registers available when there is no second operand (e.g.
|
||||
* for "double" and "affine").
|
||||
*/
|
||||
#define t8 3
|
||||
#define t9 4
|
||||
#define t10 5
|
||||
|
||||
/*
|
||||
* Doubling formulas are:
|
||||
*
|
||||
* s = 4*x*y^2
|
||||
* m = 3*(x + z^2)*(x - z^2)
|
||||
* x' = m^2 - 2*s
|
||||
* y' = m*(s - x') - 8*y^4
|
||||
* z' = 2*y*z
|
||||
*
|
||||
* If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
|
||||
* should. This case should not happen anyway, because our curves have
|
||||
* prime order, and thus do not contain any point of order 2.
|
||||
*
|
||||
* If P is infinity (z = 0), then again the formulas yield infinity,
|
||||
* which is correct. Thus, this code works for all points.
|
||||
*
|
||||
* Cost: 8 multiplications
|
||||
*/
|
||||
static const uint16_t code_double[] = {
|
||||
/*
|
||||
* Compute z^2 (in t1).
|
||||
*/
|
||||
MMUL(t1, Pz, Pz),
|
||||
|
||||
/*
|
||||
* Compute x-z^2 (in t2) and then x+z^2 (in t1).
|
||||
*/
|
||||
MSET(t2, Px),
|
||||
MSUB(t2, t1),
|
||||
MADD(t1, Px),
|
||||
|
||||
/*
|
||||
* Compute m = 3*(x+z^2)*(x-z^2) (in t1).
|
||||
*/
|
||||
MMUL(t3, t1, t2),
|
||||
MSET(t1, t3),
|
||||
MADD(t1, t3),
|
||||
MADD(t1, t3),
|
||||
|
||||
/*
|
||||
* Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
|
||||
*/
|
||||
MMUL(t3, Py, Py),
|
||||
MADD(t3, t3),
|
||||
MMUL(t2, Px, t3),
|
||||
MADD(t2, t2),
|
||||
|
||||
/*
|
||||
* Compute x' = m^2 - 2*s.
|
||||
*/
|
||||
MMUL(Px, t1, t1),
|
||||
MSUB(Px, t2),
|
||||
MSUB(Px, t2),
|
||||
|
||||
/*
|
||||
* Compute z' = 2*y*z.
|
||||
*/
|
||||
MMUL(t4, Py, Pz),
|
||||
MSET(Pz, t4),
|
||||
MADD(Pz, t4),
|
||||
|
||||
/*
|
||||
* Compute y' = m*(s - x') - 8*y^4. Note that we already have
|
||||
* 2*y^2 in t3.
|
||||
*/
|
||||
MSUB(t2, Px),
|
||||
MMUL(Py, t1, t2),
|
||||
MMUL(t4, t3, t3),
|
||||
MSUB(Py, t4),
|
||||
MSUB(Py, t4),
|
||||
|
||||
ENDCODE
|
||||
};
|
||||
|
||||
/*
|
||||
* Addtions formulas are:
|
||||
*
|
||||
* u1 = x1 * z2^2
|
||||
* u2 = x2 * z1^2
|
||||
* s1 = y1 * z2^3
|
||||
* s2 = y2 * z1^3
|
||||
* h = u2 - u1
|
||||
* r = s2 - s1
|
||||
* x3 = r^2 - h^3 - 2 * u1 * h^2
|
||||
* y3 = r * (u1 * h^2 - x3) - s1 * h^3
|
||||
* z3 = h * z1 * z2
|
||||
*
|
||||
* If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
|
||||
* z3 == 0, so the result is correct.
|
||||
* If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
|
||||
* not correct.
|
||||
* h == 0 only if u1 == u2; this happens in two cases:
|
||||
* -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
|
||||
* -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
|
||||
*
|
||||
* Thus, the following situations are not handled correctly:
|
||||
* -- P1 = 0 and P2 != 0
|
||||
* -- P1 != 0 and P2 = 0
|
||||
* -- P1 = P2
|
||||
* All other cases are properly computed. However, even in "incorrect"
|
||||
* situations, the three coordinates still are properly formed field
|
||||
* elements.
|
||||
*
|
||||
* The returned flag is cleared if r == 0. This happens in the following
|
||||
* cases:
|
||||
* -- Both points are on the same horizontal line (same Y coordinate).
|
||||
* -- Both points are infinity.
|
||||
* -- One point is infinity and the other is on line Y = 0.
|
||||
* The third case cannot happen with our curves (there is no valid point
|
||||
* on line Y = 0 since that would be a point of order 2). If the two
|
||||
* source points are non-infinity, then remains only the case where the
|
||||
* two points are on the same horizontal line.
|
||||
*
|
||||
* This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
|
||||
* P2 != 0:
|
||||
* -- If the returned value is not the point at infinity, then it was properly
|
||||
* computed.
|
||||
* -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
|
||||
* is indeed the point at infinity.
|
||||
* -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
|
||||
* use the 'double' code.
|
||||
*
|
||||
* Cost: 16 multiplications
|
||||
*/
|
||||
static const uint16_t code_add[] = {
|
||||
/*
|
||||
* Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
|
||||
*/
|
||||
MMUL(t3, P2z, P2z),
|
||||
MMUL(t1, P1x, t3),
|
||||
MMUL(t4, P2z, t3),
|
||||
MMUL(t3, P1y, t4),
|
||||
|
||||
/*
|
||||
* Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
|
||||
*/
|
||||
MMUL(t4, P1z, P1z),
|
||||
MMUL(t2, P2x, t4),
|
||||
MMUL(t5, P1z, t4),
|
||||
MMUL(t4, P2y, t5),
|
||||
|
||||
/*
|
||||
* Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
|
||||
*/
|
||||
MSUB(t2, t1),
|
||||
MSUB(t4, t3),
|
||||
|
||||
/*
|
||||
* Report cases where r = 0 through the returned flag.
|
||||
*/
|
||||
MTZ(t4),
|
||||
|
||||
/*
|
||||
* Compute u1*h^2 (in t6) and h^3 (in t5).
|
||||
*/
|
||||
MMUL(t7, t2, t2),
|
||||
MMUL(t6, t1, t7),
|
||||
MMUL(t5, t7, t2),
|
||||
|
||||
/*
|
||||
* Compute x3 = r^2 - h^3 - 2*u1*h^2.
|
||||
* t1 and t7 can be used as scratch registers.
|
||||
*/
|
||||
MMUL(P1x, t4, t4),
|
||||
MSUB(P1x, t5),
|
||||
MSUB(P1x, t6),
|
||||
MSUB(P1x, t6),
|
||||
|
||||
/*
|
||||
* Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
|
||||
*/
|
||||
MSUB(t6, P1x),
|
||||
MMUL(P1y, t4, t6),
|
||||
MMUL(t1, t5, t3),
|
||||
MSUB(P1y, t1),
|
||||
|
||||
/*
|
||||
* Compute z3 = h*z1*z2.
|
||||
*/
|
||||
MMUL(t1, P1z, P2z),
|
||||
MMUL(P1z, t1, t2),
|
||||
|
||||
ENDCODE
|
||||
};
|
||||
|
||||
/*
|
||||
* Check that the point is on the curve. This code snippet assumes the
|
||||
* following conventions:
|
||||
* -- Coordinates x and y have been freshly decoded in P1 (but not
|
||||
* converted to Montgomery coordinates yet).
|
||||
* -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
|
||||
*/
|
||||
static const uint16_t code_check[] = {
|
||||
|
||||
/* Convert x and y to Montgomery representation. */
|
||||
MMUL(t1, P1x, P2x),
|
||||
MMUL(t2, P1y, P2x),
|
||||
MSET(P1x, t1),
|
||||
MSET(P1y, t2),
|
||||
|
||||
/* Compute x^3 in t1. */
|
||||
MMUL(t2, P1x, P1x),
|
||||
MMUL(t1, P1x, t2),
|
||||
|
||||
/* Subtract 3*x from t1. */
|
||||
MSUB(t1, P1x),
|
||||
MSUB(t1, P1x),
|
||||
MSUB(t1, P1x),
|
||||
|
||||
/* Add b. */
|
||||
MADD(t1, P2y),
|
||||
|
||||
/* Compute y^2 in t2. */
|
||||
MMUL(t2, P1y, P1y),
|
||||
|
||||
/* Compare y^2 with x^3 - 3*x + b; they must match. */
|
||||
MSUB(t1, t2),
|
||||
MTZ(t1),
|
||||
|
||||
/* Set z to 1 (in Montgomery representation). */
|
||||
MMUL(P1z, P2x, P2z),
|
||||
|
||||
ENDCODE
|
||||
};
|
||||
|
||||
/*
|
||||
* Conversion back to affine coordinates. This code snippet assumes that
|
||||
* the z coordinate of P2 is set to 1 (not in Montgomery representation).
|
||||
*/
|
||||
static const uint16_t code_affine[] = {
|
||||
|
||||
/* Save z*R in t1. */
|
||||
MSET(t1, P1z),
|
||||
|
||||
/* Compute z^3 in t2. */
|
||||
MMUL(t2, P1z, P1z),
|
||||
MMUL(t3, P1z, t2),
|
||||
MMUL(t2, t3, P2z),
|
||||
|
||||
/* Invert to (1/z^3) in t2. */
|
||||
MINV(t2, t3, t4),
|
||||
|
||||
/* Compute y. */
|
||||
MSET(t3, P1y),
|
||||
MMUL(P1y, t2, t3),
|
||||
|
||||
/* Compute (1/z^2) in t3. */
|
||||
MMUL(t3, t2, t1),
|
||||
|
||||
/* Compute x. */
|
||||
MSET(t2, P1x),
|
||||
MMUL(P1x, t2, t3),
|
||||
|
||||
ENDCODE
|
||||
};
|
||||
|
||||
static uint32_t
|
||||
run_code(jacobian *P1, const jacobian *P2,
|
||||
const curve_params *cc, const uint16_t *code)
|
||||
{
|
||||
uint32_t r;
|
||||
uint32_t t[13][I31_LEN];
|
||||
size_t u;
|
||||
|
||||
r = 1;
|
||||
|
||||
/*
|
||||
* Copy the two operands in the dedicated registers.
|
||||
*/
|
||||
memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t));
|
||||
memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t));
|
||||
|
||||
/*
|
||||
* Run formulas.
|
||||
*/
|
||||
for (u = 0;; u ++) {
|
||||
unsigned op, d, a, b;
|
||||
|
||||
op = code[u];
|
||||
if (op == 0) {
|
||||
break;
|
||||
}
|
||||
d = (op >> 8) & 0x0F;
|
||||
a = (op >> 4) & 0x0F;
|
||||
b = op & 0x0F;
|
||||
op >>= 12;
|
||||
switch (op) {
|
||||
uint32_t ctl;
|
||||
size_t plen;
|
||||
unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
|
||||
|
||||
case 0:
|
||||
memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t));
|
||||
break;
|
||||
case 1:
|
||||
ctl = br_i31_add(t[d], t[a], 1);
|
||||
ctl |= NOT(br_i31_sub(t[d], cc->p, 0));
|
||||
br_i31_sub(t[d], cc->p, ctl);
|
||||
break;
|
||||
case 2:
|
||||
br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1));
|
||||
break;
|
||||
case 3:
|
||||
br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
|
||||
break;
|
||||
case 4:
|
||||
plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
|
||||
br_i31_encode(tp, plen, cc->p);
|
||||
tp[plen - 1] -= 2;
|
||||
br_i31_modpow(t[d], tp, plen,
|
||||
cc->p, cc->p0i, t[a], t[b]);
|
||||
break;
|
||||
default:
|
||||
r &= ~br_i31_iszero(t[d]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy back result.
|
||||
*/
|
||||
memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t));
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
set_one(uint32_t *x, const uint32_t *p)
|
||||
{
|
||||
size_t plen;
|
||||
|
||||
plen = (p[0] + 63) >> 5;
|
||||
memset(x, 0, plen * sizeof *x);
|
||||
x[0] = p[0];
|
||||
x[1] = 0x00000001;
|
||||
}
|
||||
|
||||
static void
|
||||
point_zero(jacobian *P, const curve_params *cc)
|
||||
{
|
||||
memset(P, 0, sizeof *P);
|
||||
P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
|
||||
}
|
||||
|
||||
static inline void
|
||||
point_double(jacobian *P, const curve_params *cc)
|
||||
{
|
||||
run_code(P, P, cc, code_double);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
|
||||
{
|
||||
return run_code(P1, P2, cc, code_add);
|
||||
}
|
||||
|
||||
static void
|
||||
point_mul(jacobian *P, const unsigned char *x, size_t xlen,
|
||||
const curve_params *cc)
|
||||
{
|
||||
/*
|
||||
* We do a simple double-and-add ladder with a 2-bit window
|
||||
* to make only one add every two doublings. We thus first
|
||||
* precompute 2P and 3P in some local buffers.
|
||||
*
|
||||
* We always perform two doublings and one addition; the
|
||||
* addition is with P, 2P and 3P and is done in a temporary
|
||||
* array.
|
||||
*
|
||||
* The addition code cannot handle cases where one of the
|
||||
* operands is infinity, which is the case at the start of the
|
||||
* ladder. We therefore need to maintain a flag that controls
|
||||
* this situation.
|
||||
*/
|
||||
uint32_t qz;
|
||||
jacobian P2, P3, Q, T, U;
|
||||
|
||||
memcpy(&P2, P, sizeof P2);
|
||||
point_double(&P2, cc);
|
||||
memcpy(&P3, P, sizeof P3);
|
||||
point_add(&P3, &P2, cc);
|
||||
|
||||
point_zero(&Q, cc);
|
||||
qz = 1;
|
||||
while (xlen -- > 0) {
|
||||
int k;
|
||||
|
||||
for (k = 6; k >= 0; k -= 2) {
|
||||
uint32_t bits;
|
||||
uint32_t bnz;
|
||||
|
||||
point_double(&Q, cc);
|
||||
point_double(&Q, cc);
|
||||
memcpy(&T, P, sizeof T);
|
||||
memcpy(&U, &Q, sizeof U);
|
||||
bits = (*x >> k) & (uint32_t)3;
|
||||
bnz = NEQ(bits, 0);
|
||||
CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
|
||||
CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
|
||||
point_add(&U, &T, cc);
|
||||
CCOPY(bnz & qz, &Q, &T, sizeof Q);
|
||||
CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
|
||||
qz &= ~bnz;
|
||||
}
|
||||
x ++;
|
||||
}
|
||||
memcpy(P, &Q, sizeof Q);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode point into Jacobian coordinates. This function does not support
|
||||
* the point at infinity. If the point is invalid then this returns 0, but
|
||||
* the coordinates are still set to properly formed field elements.
|
||||
*/
|
||||
static uint32_t
|
||||
point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
|
||||
{
|
||||
/*
|
||||
* Points must use uncompressed format:
|
||||
* -- first byte is 0x04;
|
||||
* -- coordinates X and Y use unsigned big-endian, with the same
|
||||
* length as the field modulus.
|
||||
*
|
||||
* We don't support hybrid format (uncompressed, but first byte
|
||||
* has value 0x06 or 0x07, depending on the least significant bit
|
||||
* of Y) because it is rather useless, and explicitly forbidden
|
||||
* by PKIX (RFC 5480, section 2.2).
|
||||
*
|
||||
* We don't support compressed format either, because it is not
|
||||
* much used in practice (there are or were patent-related
|
||||
* concerns about point compression, which explains the lack of
|
||||
* generalised support). Also, point compression support would
|
||||
* need a bit more code.
|
||||
*/
|
||||
const unsigned char *buf;
|
||||
size_t plen, zlen;
|
||||
uint32_t r;
|
||||
jacobian Q;
|
||||
|
||||
buf = src;
|
||||
point_zero(P, cc);
|
||||
plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
|
||||
if (len != 1 + (plen << 1)) {
|
||||
return 0;
|
||||
}
|
||||
r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p);
|
||||
r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
|
||||
|
||||
/*
|
||||
* Check first byte.
|
||||
*/
|
||||
r &= EQ(buf[0], 0x04);
|
||||
/* obsolete
|
||||
r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
|
||||
& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
|
||||
*/
|
||||
|
||||
/*
|
||||
* Convert coordinates and check that the point is valid.
|
||||
*/
|
||||
zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t);
|
||||
memcpy(Q.c[0], cc->R2, zlen);
|
||||
memcpy(Q.c[1], cc->b, zlen);
|
||||
set_one(Q.c[2], cc->p);
|
||||
r &= ~run_code(P, &Q, cc, code_check);
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Encode a point. This method assumes that the point is correct and is
|
||||
* not the point at infinity. Encoded size is always 1+2*plen, where
|
||||
* plen is the field modulus length, in bytes.
|
||||
*/
|
||||
static void
|
||||
point_encode(void *dst, const jacobian *P, const curve_params *cc)
|
||||
{
|
||||
unsigned char *buf;
|
||||
uint32_t xbl;
|
||||
size_t plen;
|
||||
jacobian Q, T;
|
||||
|
||||
buf = dst;
|
||||
xbl = cc->p[0];
|
||||
xbl -= (xbl >> 5);
|
||||
plen = (xbl + 7) >> 3;
|
||||
buf[0] = 0x04;
|
||||
memcpy(&Q, P, sizeof *P);
|
||||
set_one(T.c[2], cc->p);
|
||||
run_code(&Q, &T, cc, code_affine);
|
||||
br_i31_encode(buf + 1, plen, Q.c[0]);
|
||||
br_i31_encode(buf + 1 + plen, plen, Q.c[1]);
|
||||
}
|
||||
|
||||
static const br_ec_curve_def *
|
||||
id_to_curve_def(int curve)
|
||||
{
|
||||
switch (curve) {
|
||||
case BR_EC_secp256r1:
|
||||
return &br_secp256r1;
|
||||
case BR_EC_secp384r1:
|
||||
return &br_secp384r1;
|
||||
case BR_EC_secp521r1:
|
||||
return &br_secp521r1;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_generator(int curve, size_t *len)
|
||||
{
|
||||
const br_ec_curve_def *cd;
|
||||
|
||||
cd = id_to_curve_def(curve);
|
||||
*len = cd->generator_len;
|
||||
return cd->generator;
|
||||
}
|
||||
|
||||
static const unsigned char *
|
||||
api_order(int curve, size_t *len)
|
||||
{
|
||||
const br_ec_curve_def *cd;
|
||||
|
||||
cd = id_to_curve_def(curve);
|
||||
*len = cd->order_len;
|
||||
return cd->order;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_xoff(int curve, size_t *len)
|
||||
{
|
||||
api_generator(curve, len);
|
||||
*len >>= 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_mul(unsigned char *G, size_t Glen,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
uint32_t r;
|
||||
const curve_params *cc;
|
||||
jacobian P;
|
||||
|
||||
cc = id_to_curve(curve);
|
||||
if (Glen != cc->point_len) {
|
||||
return 0;
|
||||
}
|
||||
r = point_decode(&P, G, Glen, cc);
|
||||
point_mul(&P, x, xlen, cc);
|
||||
point_encode(G, &P, cc);
|
||||
return r;
|
||||
}
|
||||
|
||||
static size_t
|
||||
api_mulgen(unsigned char *R,
|
||||
const unsigned char *x, size_t xlen, int curve)
|
||||
{
|
||||
const unsigned char *G;
|
||||
size_t Glen;
|
||||
|
||||
G = api_generator(curve, &Glen);
|
||||
memcpy(R, G, Glen);
|
||||
api_mul(R, Glen, x, xlen, curve);
|
||||
return Glen;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
api_muladd(unsigned char *A, const unsigned char *B, size_t len,
|
||||
const unsigned char *x, size_t xlen,
|
||||
const unsigned char *y, size_t ylen, int curve)
|
||||
{
|
||||
uint32_t r, t, z;
|
||||
const curve_params *cc;
|
||||
jacobian P, Q;
|
||||
|
||||
/*
|
||||
* TODO: see about merging the two ladders. Right now, we do
|
||||
* two independent point multiplications, which is a bit
|
||||
* wasteful of CPU resources (but yields short code).
|
||||
*/
|
||||
|
||||
cc = id_to_curve(curve);
|
||||
if (len != cc->point_len) {
|
||||
return 0;
|
||||
}
|
||||
r = point_decode(&P, A, len, cc);
|
||||
if (B == NULL) {
|
||||
size_t Glen;
|
||||
|
||||
B = api_generator(curve, &Glen);
|
||||
}
|
||||
r &= point_decode(&Q, B, len, cc);
|
||||
point_mul(&P, x, xlen, cc);
|
||||
point_mul(&Q, y, ylen, cc);
|
||||
|
||||
/*
|
||||
* We want to compute P+Q. Since the base points A and B are distinct
|
||||
* from infinity, and the multipliers are non-zero and lower than the
|
||||
* curve order, then we know that P and Q are non-infinity. This
|
||||
* leaves two special situations to test for:
|
||||
* -- If P = Q then we must use point_double().
|
||||
* -- If P+Q = 0 then we must report an error.
|
||||
*/
|
||||
t = point_add(&P, &Q, cc);
|
||||
point_double(&Q, cc);
|
||||
z = br_i31_iszero(P.c[2]);
|
||||
|
||||
/*
|
||||
* If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
|
||||
* have the following:
|
||||
*
|
||||
* z = 0, t = 0 return P (normal addition)
|
||||
* z = 0, t = 1 return P (normal addition)
|
||||
* z = 1, t = 0 return Q (a 'double' case)
|
||||
* z = 1, t = 1 report an error (P+Q = 0)
|
||||
*/
|
||||
CCOPY(z & ~t, &P, &Q, sizeof Q);
|
||||
point_encode(A, &P, cc);
|
||||
r &= ~(z & t);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
const br_ec_impl br_ec_prime_i31 = {
|
||||
(uint32_t)0x03800000,
|
||||
&api_generator,
|
||||
&api_order,
|
||||
&api_xoff,
|
||||
&api_mul,
|
||||
&api_mulgen,
|
||||
&api_muladd
|
||||
};
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static const unsigned char POINT_LEN[] = {
|
||||
0, /* 0: not a valid curve ID */
|
||||
43, /* sect163k1 */
|
||||
43, /* sect163r1 */
|
||||
43, /* sect163r2 */
|
||||
51, /* sect193r1 */
|
||||
51, /* sect193r2 */
|
||||
61, /* sect233k1 */
|
||||
61, /* sect233r1 */
|
||||
61, /* sect239k1 */
|
||||
73, /* sect283k1 */
|
||||
73, /* sect283r1 */
|
||||
105, /* sect409k1 */
|
||||
105, /* sect409r1 */
|
||||
145, /* sect571k1 */
|
||||
145, /* sect571r1 */
|
||||
41, /* secp160k1 */
|
||||
41, /* secp160r1 */
|
||||
41, /* secp160r2 */
|
||||
49, /* secp192k1 */
|
||||
49, /* secp192r1 */
|
||||
57, /* secp224k1 */
|
||||
57, /* secp224r1 */
|
||||
65, /* secp256k1 */
|
||||
65, /* secp256r1 */
|
||||
97, /* secp384r1 */
|
||||
133, /* secp521r1 */
|
||||
65, /* brainpoolP256r1 */
|
||||
97, /* brainpoolP384r1 */
|
||||
129, /* brainpoolP512r1 */
|
||||
32, /* curve25519 */
|
||||
56, /* curve448 */
|
||||
};
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
size_t
|
||||
br_ec_compute_pub(const br_ec_impl *impl, br_ec_public_key *pk,
|
||||
void *kbuf, const br_ec_private_key *sk)
|
||||
{
|
||||
int curve;
|
||||
size_t len;
|
||||
|
||||
curve = sk->curve;
|
||||
if (curve < 0 || curve >= 32 || curve >= (int)(sizeof POINT_LEN)
|
||||
|| ((impl->supported_curves >> curve) & 1) == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
if (kbuf == NULL) {
|
||||
return POINT_LEN[curve];
|
||||
}
|
||||
len = impl->mulgen(kbuf, sk->x, sk->xlen, curve);
|
||||
if (pk != NULL) {
|
||||
pk->curve = curve;
|
||||
pk->q = kbuf;
|
||||
pk->qlen = len;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static const unsigned char P256_N[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84,
|
||||
0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51
|
||||
};
|
||||
|
||||
static const unsigned char P256_G[] = {
|
||||
0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42,
|
||||
0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40,
|
||||
0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33,
|
||||
0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2,
|
||||
0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
|
||||
0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E,
|
||||
0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E,
|
||||
0xCE, 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51,
|
||||
0xF5
|
||||
};
|
||||
|
||||
/* see inner.h */
|
||||
const br_ec_curve_def br_secp256r1 = {
|
||||
BR_EC_secp256r1,
|
||||
P256_N, sizeof P256_N,
|
||||
P256_G, sizeof P256_G
|
||||
};
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static const unsigned char P384_N[] = {
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF,
|
||||
0x58, 0x1A, 0x0D, 0xB2, 0x48, 0xB0, 0xA7, 0x7A,
|
||||
0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73
|
||||
};
|
||||
|
||||
static const unsigned char P384_G[] = {
|
||||
0x04, 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05,
|
||||
0x37, 0x8E, 0xB1, 0xC7, 0x1E, 0xF3, 0x20, 0xAD,
|
||||
0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B,
|
||||
0x98, 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A,
|
||||
0x38, 0x55, 0x02, 0xF2, 0x5D, 0xBF, 0x55, 0x29,
|
||||
0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A,
|
||||
0xB7, 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C,
|
||||
0x6F, 0x5D, 0x9E, 0x98, 0xBF, 0x92, 0x92, 0xDC,
|
||||
0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14,
|
||||
0x7C, 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8,
|
||||
0xC0, 0x0A, 0x60, 0xB1, 0xCE, 0x1D, 0x7E, 0x81,
|
||||
0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E,
|
||||
0x5F
|
||||
};
|
||||
|
||||
/* see inner.h */
|
||||
const br_ec_curve_def br_secp384r1 = {
|
||||
BR_EC_secp384r1,
|
||||
P384_N, sizeof P384_N,
|
||||
P384_G, sizeof P384_G
|
||||
};
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
static const unsigned char P521_N[] = {
|
||||
0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
||||
0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F,
|
||||
0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
|
||||
0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C,
|
||||
0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38,
|
||||
0x64, 0x09
|
||||
};
|
||||
|
||||
static const unsigned char P521_G[] = {
|
||||
0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04,
|
||||
0x04, 0xE9, 0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23,
|
||||
0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05,
|
||||
0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B,
|
||||
0x4D, 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF,
|
||||
0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2,
|
||||
0xFF, 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85,
|
||||
0x6A, 0x42, 0x9B, 0xF9, 0x7E, 0x7E, 0x31, 0xC2,
|
||||
0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39, 0x29, 0x6A,
|
||||
0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F,
|
||||
0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44,
|
||||
0x49, 0x57, 0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD,
|
||||
0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, 0x72,
|
||||
0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9,
|
||||
0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70,
|
||||
0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94,
|
||||
0x76, 0x9F, 0xD1, 0x66, 0x50
|
||||
};
|
||||
|
||||
/* see inner.h */
|
||||
const br_ec_curve_def br_secp521r1 = {
|
||||
BR_EC_secp521r1,
|
||||
P521_N, sizeof P521_N,
|
||||
P521_G, sizeof P521_G
|
||||
};
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
size_t
|
||||
br_ecdsa_asn1_to_raw(void *sig, size_t sig_len)
|
||||
{
|
||||
/*
|
||||
* Note: this code is a bit lenient in that it accepts a few
|
||||
* deviations to DER with regards to minimality of encoding of
|
||||
* lengths and integer values. These deviations are still
|
||||
* unambiguous.
|
||||
*
|
||||
* Signature format is a SEQUENCE of two INTEGER values. We
|
||||
* support only integers of less than 127 bytes each (signed
|
||||
* encoding) so the resulting raw signature will have length
|
||||
* at most 254 bytes.
|
||||
*/
|
||||
|
||||
unsigned char *buf, *r, *s;
|
||||
size_t zlen, rlen, slen, off;
|
||||
unsigned char tmp[254];
|
||||
|
||||
buf = sig;
|
||||
if (sig_len < 8) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* First byte is SEQUENCE tag.
|
||||
*/
|
||||
if (buf[0] != 0x30) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The SEQUENCE length will be encoded over one or two bytes. We
|
||||
* limit the total SEQUENCE contents to 255 bytes, because it
|
||||
* makes things simpler; this is enough for subgroup orders up
|
||||
* to 999 bits.
|
||||
*/
|
||||
zlen = buf[1];
|
||||
if (zlen > 0x80) {
|
||||
if (zlen != 0x81) {
|
||||
return 0;
|
||||
}
|
||||
zlen = buf[2];
|
||||
if (zlen != sig_len - 3) {
|
||||
return 0;
|
||||
}
|
||||
off = 3;
|
||||
} else {
|
||||
if (zlen != sig_len - 2) {
|
||||
return 0;
|
||||
}
|
||||
off = 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* First INTEGER (r).
|
||||
*/
|
||||
if (buf[off ++] != 0x02) {
|
||||
return 0;
|
||||
}
|
||||
rlen = buf[off ++];
|
||||
if (rlen >= 0x80) {
|
||||
return 0;
|
||||
}
|
||||
r = buf + off;
|
||||
off += rlen;
|
||||
|
||||
/*
|
||||
* Second INTEGER (s).
|
||||
*/
|
||||
if (off + 2 > sig_len) {
|
||||
return 0;
|
||||
}
|
||||
if (buf[off ++] != 0x02) {
|
||||
return 0;
|
||||
}
|
||||
slen = buf[off ++];
|
||||
if (slen >= 0x80 || slen != sig_len - off) {
|
||||
return 0;
|
||||
}
|
||||
s = buf + off;
|
||||
|
||||
/*
|
||||
* Removing leading zeros from r and s.
|
||||
*/
|
||||
while (rlen > 0 && *r == 0) {
|
||||
rlen --;
|
||||
r ++;
|
||||
}
|
||||
while (slen > 0 && *s == 0) {
|
||||
slen --;
|
||||
s ++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute common length for the two integers, then copy integers
|
||||
* into the temporary buffer, and finally copy it back over the
|
||||
* signature buffer.
|
||||
*/
|
||||
zlen = rlen > slen ? rlen : slen;
|
||||
sig_len = zlen << 1;
|
||||
memset(tmp, 0, sig_len);
|
||||
memcpy(tmp + zlen - rlen, r, rlen);
|
||||
memcpy(tmp + sig_len - slen, s, slen);
|
||||
memcpy(sig, tmp, sig_len);
|
||||
return sig_len;
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "inner.h"
|
||||
|
||||
/* see bearssl_ec.h */
|
||||
br_ecdsa_sign
|
||||
br_ecdsa_sign_asn1_get_default(void)
|
||||
{
|
||||
#if BR_LOMUL
|
||||
return &br_ecdsa_i15_sign_asn1;
|
||||
#else
|
||||
return &br_ecdsa_i31_sign_asn1;
|
||||
#endif
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue