Tidy up Graphics.h a bit

This commit is contained in:
UnknownShadow200 2024-08-16 17:21:29 +10:00
parent 6dcd418acb
commit b3e545f202
14 changed files with 116 additions and 4039 deletions

View File

@ -5,7 +5,20 @@
CC_BEGIN_HEADER
/*
Abstracts a 3D graphics rendering API
SUMMARY:
- Provides a low level abstraction a 3D graphics rendering API.
- Because of the numerous possible rendering backends, only a small number of
functions are provided so that the available functionality behaves the same
regardless of the rendering backend being used. (as much as reasonably possible)
- Most code using Graphics.h therefore doesn' need to care about the rendering backend being used
IMPLEMENTATION NOTES:
- By default, a reasonable rendering backend is automatically selected in Core.h
- The selected rendering backend can be altered in two ways:
* explicitly defining CC_GFX_BACKEND in the compilation flags (recommended)
* altering DEFAULT_GFX_BACKEND for the platform in Core.h
- Rendering backends are implemented in Graphics_GL1.c, Graphics_D3D9.c etc
Copyright 2014-2023 ClassiCube | Licensed under BSD-3
*/
struct Bitmap;
@ -18,15 +31,6 @@ typedef enum VertexFormat_ {
VERTEX_FORMAT_COLOURED, VERTEX_FORMAT_TEXTURED
} VertexFormat;
typedef enum FogFunc_ {
FOG_LINEAR, FOG_EXP, FOG_EXP2
} FogFunc;
typedef enum MatrixType_ {
MATRIX_PROJ, /* Projection matrix */
MATRIX_VIEW /* Combined model view matrix */
} MatrixType;
#define SIZEOF_VERTEX_COLOURED 16
#define SIZEOF_VERTEX_TEXTURED 24
@ -88,11 +92,13 @@ extern const cc_string Gfx_LowPerfMessage;
#define GFX_MAX_INDICES (65536 / 4 * 6)
#define GFX_MAX_VERTICES 65536
typedef enum GfxBuffers_ {
GFX_BUFFER_COLOR = 1,
GFX_BUFFER_DEPTH = 2
} GfxBuffers;
void Gfx_RecreateTexture(GfxResourceID* tex, struct Bitmap* bmp, cc_uint8 flags, cc_bool mipmaps);
void* Gfx_RecreateAndLockVb(GfxResourceID* vb, VertexFormat fmt, int count);
/*########################################################################################################################*
*---------------------------------------------------------Textures--------------------------------------------------------*
*#########################################################################################################################*/
/* Texture should persist across gfx context loss (if backend supports ManagedTextures) */
#define TEXTURE_FLAG_MANAGED 0x01
/* Texture should allow updating via Gfx_UpdateTexture */
@ -104,9 +110,6 @@ typedef enum GfxBuffers_ {
/* Texture should be rendered using bilinear filtering if possible */
#define TEXTURE_FLAG_BILINEAR 0x10
void Gfx_RecreateTexture(GfxResourceID* tex, struct Bitmap* bmp, cc_uint8 flags, cc_bool mipmaps);
void* Gfx_RecreateAndLockVb(GfxResourceID* vb, VertexFormat fmt, int count);
cc_bool Gfx_CheckTextureSize(int width, int height, cc_uint8 flags);
/* Creates a new texture. (and also generates mipmaps if mipmaps) */
/* See TEXTURE_FLAG values for supported flags */
@ -134,6 +137,46 @@ CC_API void Gfx_EnableMipmaps(void);
/* NOTE: You must have created textures with mipmaps true for this to work */
CC_API void Gfx_DisableMipmaps(void);
/*########################################################################################################################*
*------------------------------------------------------Frame management---------------------------------------------------*
*#########################################################################################################################*/
typedef enum GfxBuffers_ {
GFX_BUFFER_COLOR = 1,
GFX_BUFFER_DEPTH = 2
} GfxBuffers;
/* Clears the given rendering buffer(s) to their default values. */
/* buffers can be either GFX_BUFFER_COLOR or GFX_BUFFER_DEPTH, or both */
CC_API void Gfx_ClearBuffers(GfxBuffers buffers);
/* Sets the default colour that the colour buffer is cleared to */
CC_API void Gfx_ClearColor(PackedCol color);
/* Sets up state for rendering a new frame */
void Gfx_BeginFrame(void);
/* Finishes rendering a frame, and swaps it with the back buffer */
void Gfx_EndFrame(void);
/* Sets whether to synchronise with monitor refresh to avoid tearing */
/* NOTE: VSync setting may be unsupported or just ignored */
void Gfx_SetVSync(cc_bool vsync);
enum Screen3DS { TOP_SCREEN, BOTTOM_SCREEN };
#ifdef CC_BUILD_DUALSCREEN
/* Selects which screen/display to render to */
void Gfx_3DS_SetRenderScreen(enum Screen3DS screen);
#else
/* Selects which screen/display to render to */
static CC_INLINE void Gfx_3DS_SetRenderScreen(enum Screen3DS screen) { }
#endif
/*########################################################################################################################*
*---------------------------------------------------------Fog state-------------------------------------------------------*
*#########################################################################################################################*/
typedef enum FogFunc_ {
FOG_LINEAR, FOG_EXP, FOG_EXP2
} FogFunc;
/* Returns whether fog blending is enabled */
CC_API cc_bool Gfx_GetFog(void);
/* Sets whether fog blending is enabled */
@ -147,6 +190,10 @@ CC_API void Gfx_SetFogEnd(float value);
/* Sets in what way fog is blended */
CC_API void Gfx_SetFogMode(FogFunc func);
/*########################################################################################################################*
*-----------------------------------------------------State management----------------------------------------------------*
*#########################################################################################################################*/
/* Sets whether backface culling is performed */
CC_API void Gfx_SetFaceCulling(cc_bool enabled);
/* Sets whether pixels with an alpha of less than 128 are discarded */
@ -156,11 +203,6 @@ CC_API void Gfx_SetAlphaBlending(cc_bool enabled);
/* Sets whether blending between the alpha components of texture and vertex colour is performed */
CC_API void Gfx_SetAlphaArgBlend(cc_bool enabled);
/* Clears the given rendering buffer(s) to default. */
/* buffers can be either GFX_BUFFER_COLOR or GFX_BUFFER_DEPTH, or both */
CC_API void Gfx_ClearBuffers(GfxBuffers buffers);
/* Sets the colour that the colour buffer is cleared to */
CC_API void Gfx_ClearColor(PackedCol color);
/* Sets whether pixels may be discard based on z/depth */
CC_API void Gfx_SetDepthTest(cc_bool enabled);
/* Sets whether z/depth of pixels is actually written to the depth buffer */
@ -171,11 +213,10 @@ CC_API void Gfx_SetColorWrite(cc_bool r, cc_bool g, cc_bool b, cc_bool a);
/* NOTE: Implicitly calls Gfx_SetColorWrite */
CC_API void Gfx_DepthOnlyRendering(cc_bool depthOnly);
/* Anaglyph 3D rendering support */
void Gfx_Set3DLeft( struct Matrix* proj, struct Matrix* view);
void Gfx_Set3DRight(struct Matrix* proj, struct Matrix* view);
void Gfx_End3D( struct Matrix* proj, struct Matrix* view);
/*########################################################################################################################*
*------------------------------------------------------Index buffers-----------------------------------------------------*
*#########################################################################################################################*/
/* Callback function to initialise/fill out the contents of an index buffer */
typedef void (*Gfx_FillIBFunc)(cc_uint16* indices, int count, void* obj);
/* Creates a new index buffer and fills out its contents */
@ -185,6 +226,10 @@ CC_API void Gfx_BindIb(GfxResourceID ib);
/* Deletes the given index buffer, then sets it to 0 */
CC_API void Gfx_DeleteIb(GfxResourceID* ib);
/*########################################################################################################################*
*------------------------------------------------------Vertex buffers-----------------------------------------------------*
*#########################################################################################################################*/
/* Creates a new vertex buffer */
CC_API GfxResourceID Gfx_CreateVb(VertexFormat fmt, int count);
/* Sets the currently active vertex buffer */
@ -222,6 +267,10 @@ CC_API void Gfx_UnlockDynamicVb(GfxResourceID vb);
/* Updates the data of a dynamic vertex buffer */
CC_API void Gfx_SetDynamicVbData(GfxResourceID vb, void* vertices, int vCount);
/*########################################################################################################################*
*------------------------------------------------------Vertex drawing-----------------------------------------------------*
*#########################################################################################################################*/
/* Sets the format of the rendered vertices */
CC_API void Gfx_SetVertexFormat(VertexFormat fmt);
/* Renders vertices from the currently bound vertex buffer as lines */
@ -234,9 +283,20 @@ CC_API void Gfx_DrawVb_IndexedTris(int verticesCount);
/* Special case Gfx_DrawVb_IndexedTris_Range for map renderer */
void Gfx_DrawIndexedTris_T2fC4b(int verticesCount, int startVertex);
/* Loads the given matrix over the currently active matrix */
/*########################################################################################################################*
*-----------------------------------------------------Vertex transform----------------------------------------------------*
*#########################################################################################################################*/
typedef enum MatrixType_ {
MATRIX_PROJ, /* Projection matrix */
MATRIX_VIEW /* Combined model view matrix */
} MatrixType;
/* Sets the currently active matrix projection or modelview matrix */
CC_API void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix);
/* Sets the texture U/V translation (not normally used) */
CC_API void Gfx_EnableTextureOffset(float x, float y);
/* Disables texture U/V translation */
CC_API void Gfx_DisableTextureOffset(void);
/* Loads given modelview and projection matrices, then calculates the combined MVP matrix */
void Gfx_LoadMVP(const struct Matrix* view, const struct Matrix* proj, struct Matrix* mvp);
@ -248,19 +308,25 @@ void Gfx_CalcPerspectiveMatrix(struct Matrix* matrix, float fov, float aspect, f
/* NOTE: Projection matrix calculation is here because it can depend the graphics backend */
/* (e.g. OpenGL uses a Z clip space range of [-1, 1], whereas Direct3D9 uses [0, 1]) */
/* Sets the region where transformed vertices are drawn in */
/* By default this region has origin 0,0 and size is window width/height */
/* This region should normally be the same as the scissor region */
CC_API void Gfx_SetViewport(int x, int y, int w, int h);
/* Sets the region where pixels can be drawn in (pixels outside this region are discarded) */
/* By default this region has origin 0,0 and size is window width/height */
/* This region should normally be the same as the viewport region */
CC_API void Gfx_SetScissor (int x, int y, int w, int h);
/*########################################################################################################################*
*------------------------------------------------------Misc utilities-----------------------------------------------------*
*#########################################################################################################################*/
/* Outputs a .png screenshot of the backbuffer */
cc_result Gfx_TakeScreenshot(struct Stream* output);
/* Warns in chat if the backend has problems with the user's GPU */
/* Returns whether legacy rendering mode for borders/sky/clouds is needed */
cc_bool Gfx_WarnIfNecessary(void);
cc_bool Gfx_GetUIOptions(struct MenuOptionsScreen* s);
/* Sets up state for rendering a new frame */
void Gfx_BeginFrame(void);
/* Finishes rendering a frame, and swaps it with the back buffer */
void Gfx_EndFrame(void);
/* Sets whether to synchronise with monitor refresh to avoid tearing */
/* NOTE: VSync setting may be unsupported or just ignored */
void Gfx_SetVSync(cc_bool vsync);
/* Gets information about the user's GPU and current backend state */
/* Backend state may include depth buffer bits, free memory, etc */
/* NOTE: Each line is separated by \n */
@ -269,16 +335,11 @@ void Gfx_GetApiInfo(cc_string* info);
/* Updates state when the window's dimensions have changed */
/* NOTE: This may require recreating the context depending on the backend */
void Gfx_OnWindowResize(void);
CC_API void Gfx_SetViewport(int x, int y, int w, int h);
CC_API void Gfx_SetScissor (int x, int y, int w, int h);
enum Screen3DS { TOP_SCREEN, BOTTOM_SCREEN };
#ifdef CC_BUILD_DUALSCREEN
/* Selects which screen on the 3DS to render to */
void Gfx_3DS_SetRenderScreen(enum Screen3DS screen);
#else
static CC_INLINE void Gfx_3DS_SetRenderScreen(enum Screen3DS screen) { }
#endif
/* Anaglyph 3D rendering support */
void Gfx_Set3DLeft( struct Matrix* proj, struct Matrix* view);
void Gfx_Set3DRight(struct Matrix* proj, struct Matrix* view);
void Gfx_End3D( struct Matrix* proj, struct Matrix* view);
/* Raises ContextLost event and updates state for lost contexts */
void Gfx_LoseContext(const char* reason);
@ -287,6 +348,15 @@ void Gfx_RecreateContext(void);
/* Attempts to restore a lost context */
cc_bool Gfx_TryRestoreContext(void);
/* Sets appropriate alpha test/blending for given block draw type */
void Gfx_SetupAlphaState(cc_uint8 draw);
/* Undoes changes to alpha test/blending state by Gfx_SetupAlphaState */
void Gfx_RestoreAlphaState(cc_uint8 draw);
/*########################################################################################################################*
*------------------------------------------------------2D rendering------------------------------------------------------*
*#########################################################################################################################*/
/* Renders a 2D flat coloured rectangle */
void Gfx_Draw2DFlat(int x, int y, int width, int height, PackedCol color);
/* Renders a 2D flat vertical gradient rectangle */
@ -303,11 +373,6 @@ void Gfx_Begin2D(int width, int height);
/* NOTE: This means restoring fog/depth test, restoring matrices, etc */
void Gfx_End2D(void);
/* Sets appropriate alpha test/blending for given block draw type */
void Gfx_SetupAlphaState(cc_uint8 draw);
/* Undoes changes to alpha test/blending state by Gfx_SetupAlphaState */
void Gfx_RestoreAlphaState(cc_uint8 draw);
/* Statically initialises the position and dimensions of this texture */
#define Tex_Rect(x,y, width,height) x,y,width,height
/* Statically initialises the texture coordinate corners of this texture */

View File

@ -299,7 +299,6 @@ extern "C" {
* | aes_ct | AES | 16 | 16, 24 and 32 |
* | aes_ct64 | AES | 16 | 16, 24 and 32 |
* | aes_x86ni | AES | 16 | 16, 24 and 32 |
* | aes_pwr8 | AES | 16 | 16, 24 and 32 |
* | des_ct | DES/3DES | 8 | 8, 16 and 24 |
* | des_tab | DES/3DES | 8 | 8, 16 and 24 |
*
@ -335,10 +334,6 @@ extern "C" {
* `aes_x86ni` exists only on x86 architectures (32-bit and 64-bit). It
* uses the AES-NI opcodes when available.
*
* `aes_pwr8` exists only on PowerPC / POWER architectures (32-bit and
* 64-bit, both little-endian and big-endian). It uses the AES opcodes
* present in POWER8 and later.
*
* `des_tab` is a classic, table-based implementation of DES/3DES. It
* is not constant-time.
*
@ -1860,296 +1855,6 @@ const br_block_ctr_class *br_aes_x86ni_ctr_get_vtable(void);
*/
const br_block_ctrcbc_class *br_aes_x86ni_ctrcbc_get_vtable(void);
/*
* AES implementation using POWER8 opcodes.
*/
/** \brief AES block size (16 bytes). */
#define br_aes_pwr8_BLOCK_SIZE 16
/**
* \brief Context for AES subkeys (`aes_pwr8` implementation, CBC encryption).
*
* First field is a pointer to the vtable; it is set by the initialisation
* function. Other fields are not supposed to be accessed by user code.
*/
typedef struct {
/** \brief Pointer to vtable for this context. */
const br_block_cbcenc_class *vtable;
#ifndef BR_DOXYGEN_IGNORE
union {
unsigned char skni[16 * 15];
} skey;
unsigned num_rounds;
#endif
} br_aes_pwr8_cbcenc_keys;
/**
* \brief Context for AES subkeys (`aes_pwr8` implementation, CBC decryption).
*
* First field is a pointer to the vtable; it is set by the initialisation
* function. Other fields are not supposed to be accessed by user code.
*/
typedef struct {
/** \brief Pointer to vtable for this context. */
const br_block_cbcdec_class *vtable;
#ifndef BR_DOXYGEN_IGNORE
union {
unsigned char skni[16 * 15];
} skey;
unsigned num_rounds;
#endif
} br_aes_pwr8_cbcdec_keys;
/**
* \brief Context for AES subkeys (`aes_pwr8` implementation, CTR encryption
* and decryption).
*
* First field is a pointer to the vtable; it is set by the initialisation
* function. Other fields are not supposed to be accessed by user code.
*/
typedef struct {
/** \brief Pointer to vtable for this context. */
const br_block_ctr_class *vtable;
#ifndef BR_DOXYGEN_IGNORE
union {
unsigned char skni[16 * 15];
} skey;
unsigned num_rounds;
#endif
} br_aes_pwr8_ctr_keys;
/**
* \brief Context for AES subkeys (`aes_pwr8` implementation, CTR encryption
* and decryption + CBC-MAC).
*
* First field is a pointer to the vtable; it is set by the initialisation
* function. Other fields are not supposed to be accessed by user code.
*/
typedef struct {
/** \brief Pointer to vtable for this context. */
const br_block_ctrcbc_class *vtable;
#ifndef BR_DOXYGEN_IGNORE
union {
unsigned char skni[16 * 15];
} skey;
unsigned num_rounds;
#endif
} br_aes_pwr8_ctrcbc_keys;
/**
* \brief Class instance for AES CBC encryption (`aes_pwr8` implementation).
*
* Since this implementation might be omitted from the library, or the
* AES opcode unavailable on the current CPU, a pointer to this class
* instance should be obtained through `br_aes_pwr8_cbcenc_get_vtable()`.
*/
extern const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable;
/**
* \brief Class instance for AES CBC decryption (`aes_pwr8` implementation).
*
* Since this implementation might be omitted from the library, or the
* AES opcode unavailable on the current CPU, a pointer to this class
* instance should be obtained through `br_aes_pwr8_cbcdec_get_vtable()`.
*/
extern const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable;
/**
* \brief Class instance for AES CTR encryption and decryption
* (`aes_pwr8` implementation).
*
* Since this implementation might be omitted from the library, or the
* AES opcode unavailable on the current CPU, a pointer to this class
* instance should be obtained through `br_aes_pwr8_ctr_get_vtable()`.
*/
extern const br_block_ctr_class br_aes_pwr8_ctr_vtable;
/**
* \brief Class instance for AES CTR encryption/decryption + CBC-MAC
* (`aes_pwr8` implementation).
*
* Since this implementation might be omitted from the library, or the
* AES opcode unavailable on the current CPU, a pointer to this class
* instance should be obtained through `br_aes_pwr8_ctrcbc_get_vtable()`.
*/
extern const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable;
/**
* \brief Context initialisation (key schedule) for AES CBC encryption
* (`aes_pwr8` implementation).
*
* \param ctx context to initialise.
* \param key secret key.
* \param len secret key length (in bytes).
*/
void br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
const void *key, size_t len);
/**
* \brief Context initialisation (key schedule) for AES CBC decryption
* (`aes_pwr8` implementation).
*
* \param ctx context to initialise.
* \param key secret key.
* \param len secret key length (in bytes).
*/
void br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
const void *key, size_t len);
/**
* \brief Context initialisation (key schedule) for AES CTR encryption
* and decryption (`aes_pwr8` implementation).
*
* \param ctx context to initialise.
* \param key secret key.
* \param len secret key length (in bytes).
*/
void br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
const void *key, size_t len);
/**
* \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
* (`aes_pwr8` implementation).
*
* \param ctx context to initialise.
* \param key secret key.
* \param len secret key length (in bytes).
*/
void br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
const void *key, size_t len);
/**
* \brief CBC encryption with AES (`aes_pwr8` implementation).
*
* \param ctx context (already initialised).
* \param iv IV (updated).
* \param data data to encrypt (updated).
* \param len data length (in bytes, MUST be multiple of 16).
*/
void br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx, void *iv,
void *data, size_t len);
/**
* \brief CBC decryption with AES (`aes_pwr8` implementation).
*
* \param ctx context (already initialised).
* \param iv IV (updated).
* \param data data to decrypt (updated).
* \param len data length (in bytes, MUST be multiple of 16).
*/
void br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx, void *iv,
void *data, size_t len);
/**
* \brief CTR encryption and decryption with AES (`aes_pwr8` implementation).
*
* \param ctx context (already initialised).
* \param iv IV (constant, 12 bytes).
* \param cc initial block counter value.
* \param data data to decrypt (updated).
* \param len data length (in bytes).
* \return new block counter value.
*/
uint32_t br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
const void *iv, uint32_t cc, void *data, size_t len);
/**
* \brief CTR encryption + CBC-MAC with AES (`aes_pwr8` implementation).
*
* \param ctx context (already initialised).
* \param ctr counter for CTR (16 bytes, updated).
* \param cbcmac IV for CBC-MAC (updated).
* \param data data to encrypt (updated).
* \param len data length (in bytes, MUST be a multiple of 16).
*/
void br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
void *ctr, void *cbcmac, void *data, size_t len);
/**
* \brief CTR decryption + CBC-MAC with AES (`aes_pwr8` implementation).
*
* \param ctx context (already initialised).
* \param ctr counter for CTR (16 bytes, updated).
* \param cbcmac IV for CBC-MAC (updated).
* \param data data to decrypt (updated).
* \param len data length (in bytes, MUST be a multiple of 16).
*/
void br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
void *ctr, void *cbcmac, void *data, size_t len);
/**
* \brief CTR encryption/decryption with AES (`aes_pwr8` implementation).
*
* \param ctx context (already initialised).
* \param ctr counter for CTR (16 bytes, updated).
* \param data data to MAC (updated).
* \param len data length (in bytes, MUST be a multiple of 16).
*/
void br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
void *ctr, void *data, size_t len);
/**
* \brief CBC-MAC with AES (`aes_pwr8` implementation).
*
* \param ctx context (already initialised).
* \param cbcmac IV for CBC-MAC (updated).
* \param data data to MAC (unmodified).
* \param len data length (in bytes, MUST be a multiple of 16).
*/
void br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
void *cbcmac, const void *data, size_t len);
/**
* \brief Obtain the `aes_pwr8` AES-CBC (encryption) implementation, if
* available.
*
* This function returns a pointer to `br_aes_pwr8_cbcenc_vtable`, if
* that implementation was compiled in the library _and_ the POWER8
* crypto opcodes are available on the currently running CPU. If either
* of these conditions is not met, then this function returns `NULL`.
*
* \return the `aes_pwr8` AES-CBC (encryption) implementation, or `NULL`.
*/
const br_block_cbcenc_class *br_aes_pwr8_cbcenc_get_vtable(void);
/**
* \brief Obtain the `aes_pwr8` AES-CBC (decryption) implementation, if
* available.
*
* This function returns a pointer to `br_aes_pwr8_cbcdec_vtable`, if
* that implementation was compiled in the library _and_ the POWER8
* crypto opcodes are available on the currently running CPU. If either
* of these conditions is not met, then this function returns `NULL`.
*
* \return the `aes_pwr8` AES-CBC (decryption) implementation, or `NULL`.
*/
const br_block_cbcdec_class *br_aes_pwr8_cbcdec_get_vtable(void);
/**
* \brief Obtain the `aes_pwr8` AES-CTR implementation, if available.
*
* This function returns a pointer to `br_aes_pwr8_ctr_vtable`, if that
* implementation was compiled in the library _and_ the POWER8 crypto
* opcodes are available on the currently running CPU. If either of
* these conditions is not met, then this function returns `NULL`.
*
* \return the `aes_pwr8` AES-CTR implementation, or `NULL`.
*/
const br_block_ctr_class *br_aes_pwr8_ctr_get_vtable(void);
/**
* \brief Obtain the `aes_pwr8` AES-CTR + CBC-MAC implementation, if
* available.
*
* This function returns a pointer to `br_aes_pwr8_ctrcbc_vtable`, if
* that implementation was compiled in the library _and_ the POWER8 AES
* opcodes are available on the currently running CPU. If either of
* these conditions is not met, then this function returns `NULL`.
*
* \return the `aes_pwr8` AES-CTR implementation, or `NULL`.
*/
const br_block_ctrcbc_class *br_aes_pwr8_ctrcbc_get_vtable(void);
/**
* \brief Aggregate structure large enough to be used as context for
@ -2162,7 +1867,6 @@ typedef union {
br_aes_ct_cbcenc_keys c_ct;
br_aes_ct64_cbcenc_keys c_ct64;
br_aes_x86ni_cbcenc_keys c_x86ni;
br_aes_pwr8_cbcenc_keys c_pwr8;
} br_aes_gen_cbcenc_keys;
/**
@ -2176,7 +1880,6 @@ typedef union {
br_aes_ct_cbcdec_keys c_ct;
br_aes_ct64_cbcdec_keys c_ct64;
br_aes_x86ni_cbcdec_keys c_x86ni;
br_aes_pwr8_cbcdec_keys c_pwr8;
} br_aes_gen_cbcdec_keys;
/**
@ -2190,7 +1893,6 @@ typedef union {
br_aes_ct_ctr_keys c_ct;
br_aes_ct64_ctr_keys c_ct64;
br_aes_x86ni_ctr_keys c_x86ni;
br_aes_pwr8_ctr_keys c_pwr8;
} br_aes_gen_ctr_keys;
/**
@ -2204,7 +1906,6 @@ typedef union {
br_aes_ct_ctrcbc_keys c_ct;
br_aes_ct64_ctrcbc_keys c_ct64;
br_aes_x86ni_ctrcbc_keys c_x86ni;
br_aes_pwr8_ctrcbc_keys c_pwr8;
} br_aes_gen_ctrcbc_keys;
/*

View File

@ -1313,32 +1313,6 @@ void br_ghash_pclmul(void *y, const void *h, const void *data, size_t len);
*/
br_ghash br_ghash_pclmul_get(void);
/**
* \brief GHASH implementation using the POWER8 opcodes.
*
* This implementation is available only on POWER8 platforms (and later).
* To safely obtain a pointer to this function when supported (or 0
* otherwise), use `br_ghash_pwr8_get()`.
*
* \param y the array to update.
* \param h the GHASH key.
* \param data the input data (may be `NULL` if `len` is zero).
* \param len the input data length (in bytes).
*/
void br_ghash_pwr8(void *y, const void *h, const void *data, size_t len);
/**
* \brief Obtain the `pwr8` GHASH implementation, if available.
*
* If the `pwr8` implementation was compiled in the library (depending
* on the compiler abilities) _and_ the local CPU appears to support the
* opcode, then this function will return a pointer to the
* `br_ghash_pwr8()` function. Otherwise, it will return `0`.
*
* \return the `pwr8` GHASH implementation, or `0`.
*/
br_ghash br_ghash_pwr8_get(void);
#ifdef __cplusplus
}
#endif

View File

@ -1,445 +0,0 @@
/*
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define BR_POWER_ASM_MACROS 1
#include "inner.h"
/*
* This code contains the AES key schedule implementation using the
* POWER8 opcodes.
*/
#if BR_POWER8
static void
key_schedule_128(unsigned char *sk, const unsigned char *key)
{
long cc;
static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B };
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc = 0;
/*
* We use the VSX instructions for loading and storing the
* key/subkeys, since they support unaligned accesses. The rest
* of the computation is VMX only. VMX register 0 is VSX
* register 32.
*/
asm volatile (
/*
* v0 = all-zero word
* v1 = constant -8 / +8, copied into four words
* v2 = current subkey
* v3 = Rcon (x4 words)
* v6 = constant 8, copied into four words
* v7 = constant 0x11B, copied into four words
* v8 = constant for byteswapping words
*/
vspltisw(0, 0)
#if BR_POWER8_LE
vspltisw(1, -8)
#else
vspltisw(1, 8)
#endif
lxvw4x(34, 0, %[key])
vspltisw(3, 1)
vspltisw(6, 8)
lxvw4x(39, 0, %[fmod])
#if BR_POWER8_LE
lxvw4x(40, 0, %[idx2be])
#endif
/*
* First subkey is a copy of the key itself.
*/
#if BR_POWER8_LE
vperm(4, 2, 2, 8)
stxvw4x(36, 0, %[sk])
#else
stxvw4x(34, 0, %[sk])
#endif
/*
* Loop must run 10 times.
*/
li(%[cc], 10)
mtctr(%[cc])
label(loop)
/* Increment subkey address */
addi(%[sk], %[sk], 16)
/* Compute SubWord(RotWord(temp)) xor Rcon (into v4, splat) */
vrlw(4, 2, 1)
vsbox(4, 4)
#if BR_POWER8_LE
vxor(4, 4, 3)
#else
vsldoi(5, 3, 0, 3)
vxor(4, 4, 5)
#endif
vspltw(4, 4, 3)
/* XOR words for next subkey */
vsldoi(5, 0, 2, 12)
vxor(2, 2, 5)
vsldoi(5, 0, 2, 12)
vxor(2, 2, 5)
vsldoi(5, 0, 2, 12)
vxor(2, 2, 5)
vxor(2, 2, 4)
/* Store next subkey */
#if BR_POWER8_LE
vperm(4, 2, 2, 8)
stxvw4x(36, 0, %[sk])
#else
stxvw4x(34, 0, %[sk])
#endif
/* Update Rcon */
vadduwm(3, 3, 3)
vsrw(4, 3, 6)
vsubuwm(4, 0, 4)
vand(4, 4, 7)
vxor(3, 3, 4)
bdnz(loop)
: [sk] "+b" (sk), [cc] "+b" (cc)
: [key] "b" (key), [fmod] "b" (fmod)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory"
);
}
static void
key_schedule_192(unsigned char *sk, const unsigned char *key)
{
long cc;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc = 0;
/*
* We use the VSX instructions for loading and storing the
* key/subkeys, since they support unaligned accesses. The rest
* of the computation is VMX only. VMX register 0 is VSX
* register 32.
*/
asm volatile (
/*
* v0 = all-zero word
* v1 = constant -8 / +8, copied into four words
* v2, v3 = current subkey
* v5 = Rcon (x4 words) (already shifted on big-endian)
* v6 = constant 8, copied into four words
* v8 = constant for byteswapping words
*
* The left two words of v3 are ignored.
*/
vspltisw(0, 0)
#if BR_POWER8_LE
vspltisw(1, -8)
#else
vspltisw(1, 8)
#endif
li(%[cc], 8)
lxvw4x(34, 0, %[key])
lxvw4x(35, %[cc], %[key])
vsldoi(3, 3, 0, 8)
vspltisw(5, 1)
#if !BR_POWER8_LE
vsldoi(5, 5, 0, 3)
#endif
vspltisw(6, 8)
#if BR_POWER8_LE
lxvw4x(40, 0, %[idx2be])
#endif
/*
* Loop must run 8 times. Each iteration produces 256
* bits of subkeys, with a 64-bit overlap.
*/
li(%[cc], 8)
mtctr(%[cc])
li(%[cc], 16)
label(loop)
/*
* Last 6 words in v2:v3l. Compute next 6 words into
* v3r:v4.
*/
vrlw(10, 3, 1)
vsbox(10, 10)
vxor(10, 10, 5)
vspltw(10, 10, 1)
vsldoi(11, 0, 10, 8)
vsldoi(12, 0, 2, 12)
vxor(12, 2, 12)
vsldoi(13, 0, 12, 12)
vxor(12, 12, 13)
vsldoi(13, 0, 12, 12)
vxor(12, 12, 13)
vspltw(13, 12, 3)
vxor(13, 13, 3)
vsldoi(14, 0, 3, 12)
vxor(13, 13, 14)
vsldoi(4, 12, 13, 8)
vsldoi(14, 0, 3, 8)
vsldoi(3, 14, 12, 8)
vxor(3, 3, 11)
vxor(4, 4, 10)
/*
* Update Rcon. Since for a 192-bit key, we use only 8
* such constants, we will not hit the field modulus,
* so a simple shift (addition) works well.
*/
vadduwm(5, 5, 5)
/*
* Write out the two left 128-bit words
*/
#if BR_POWER8_LE
vperm(10, 2, 2, 8)
vperm(11, 3, 3, 8)
stxvw4x(42, 0, %[sk])
stxvw4x(43, %[cc], %[sk])
#else
stxvw4x(34, 0, %[sk])
stxvw4x(35, %[cc], %[sk])
#endif
addi(%[sk], %[sk], 24)
/*
* Shift words for next iteration.
*/
vsldoi(2, 3, 4, 8)
vsldoi(3, 4, 0, 8)
bdnz(loop)
/*
* The loop wrote the first 50 subkey words, but we need
* to produce 52, so we must do one last write.
*/
#if BR_POWER8_LE
vperm(10, 2, 2, 8)
stxvw4x(42, 0, %[sk])
#else
stxvw4x(34, 0, %[sk])
#endif
: [sk] "+b" (sk), [cc] "+b" (cc)
: [key] "b" (key)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
);
}
static void
key_schedule_256(unsigned char *sk, const unsigned char *key)
{
long cc;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc = 0;
/*
* We use the VSX instructions for loading and storing the
* key/subkeys, since they support unaligned accesses. The rest
* of the computation is VMX only. VMX register 0 is VSX
* register 32.
*/
asm volatile (
/*
* v0 = all-zero word
* v1 = constant -8 / +8, copied into four words
* v2, v3 = current subkey
* v6 = Rcon (x4 words) (already shifted on big-endian)
* v7 = constant 8, copied into four words
* v8 = constant for byteswapping words
*
* The left two words of v3 are ignored.
*/
vspltisw(0, 0)
#if BR_POWER8_LE
vspltisw(1, -8)
#else
vspltisw(1, 8)
#endif
li(%[cc], 16)
lxvw4x(34, 0, %[key])
lxvw4x(35, %[cc], %[key])
vspltisw(6, 1)
#if !BR_POWER8_LE
vsldoi(6, 6, 0, 3)
#endif
vspltisw(7, 8)
#if BR_POWER8_LE
lxvw4x(40, 0, %[idx2be])
#endif
/*
* Loop must run 7 times. Each iteration produces two
* subkeys.
*/
li(%[cc], 7)
mtctr(%[cc])
li(%[cc], 16)
label(loop)
/*
* Current words are in v2:v3. Compute next word in v4.
*/
vrlw(10, 3, 1)
vsbox(10, 10)
vxor(10, 10, 6)
vspltw(10, 10, 3)
vsldoi(4, 0, 2, 12)
vxor(4, 2, 4)
vsldoi(5, 0, 4, 12)
vxor(4, 4, 5)
vsldoi(5, 0, 4, 12)
vxor(4, 4, 5)
vxor(4, 4, 10)
/*
* Then other word in v5.
*/
vsbox(10, 4)
vspltw(10, 10, 3)
vsldoi(5, 0, 3, 12)
vxor(5, 3, 5)
vsldoi(11, 0, 5, 12)
vxor(5, 5, 11)
vsldoi(11, 0, 5, 12)
vxor(5, 5, 11)
vxor(5, 5, 10)
/*
* Update Rcon. Since for a 256-bit key, we use only 7
* such constants, we will not hit the field modulus,
* so a simple shift (addition) works well.
*/
vadduwm(6, 6, 6)
/*
* Write out the two left 128-bit words
*/
#if BR_POWER8_LE
vperm(10, 2, 2, 8)
vperm(11, 3, 3, 8)
stxvw4x(42, 0, %[sk])
stxvw4x(43, %[cc], %[sk])
#else
stxvw4x(34, 0, %[sk])
stxvw4x(35, %[cc], %[sk])
#endif
addi(%[sk], %[sk], 32)
/*
* Replace v2:v3 with v4:v5.
*/
vxor(2, 0, 4)
vxor(3, 0, 5)
bdnz(loop)
/*
* The loop wrote the first 14 subkeys, but we need 15,
* so we must do an extra write.
*/
#if BR_POWER8_LE
vperm(10, 2, 2, 8)
stxvw4x(42, 0, %[sk])
#else
stxvw4x(34, 0, %[sk])
#endif
: [sk] "+b" (sk), [cc] "+b" (cc)
: [key] "b" (key)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
);
}
/* see inner.h */
int
br_aes_pwr8_supported(void)
{
return 1;
}
/* see inner.h */
unsigned
br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len)
{
switch (len) {
case 16:
key_schedule_128(sk, key);
return 10;
case 24:
key_schedule_192(sk, key);
return 12;
default:
key_schedule_256(sk, key);
return 14;
}
}
#endif

View File

@ -1,670 +0,0 @@
/*
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define BR_POWER_ASM_MACROS 1
#include "inner.h"
#if BR_POWER8
/* see bearssl_block.h */
void
br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
const void *key, size_t len)
{
ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
}
static void
cbcdec_128(const unsigned char *sk,
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
{
long cc0, cc1, cc2, cc3;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc0 = 0;
cc1 = 16;
cc2 = 32;
cc3 = 48;
asm volatile (
/*
* Load subkeys into v0..v10
*/
lxvw4x(32, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(33, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(34, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(35, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(36, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(37, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(38, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(39, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(40, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(41, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(42, %[cc0], %[sk])
li(%[cc0], 0)
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* Load IV into v24.
*/
lxvw4x(56, 0, %[iv])
#if BR_POWER8_LE
vperm(24, 24, 24, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Load next ciphertext words in v16..v19. Also save them
* in v20..v23.
*/
lxvw4x(48, %[cc0], %[buf])
lxvw4x(49, %[cc1], %[buf])
lxvw4x(50, %[cc2], %[buf])
lxvw4x(51, %[cc3], %[buf])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
vand(20, 16, 16)
vand(21, 17, 17)
vand(22, 18, 18)
vand(23, 19, 19)
/*
* Decrypt the blocks.
*/
vxor(16, 16, 10)
vxor(17, 17, 10)
vxor(18, 18, 10)
vxor(19, 19, 10)
vncipher(16, 16, 9)
vncipher(17, 17, 9)
vncipher(18, 18, 9)
vncipher(19, 19, 9)
vncipher(16, 16, 8)
vncipher(17, 17, 8)
vncipher(18, 18, 8)
vncipher(19, 19, 8)
vncipher(16, 16, 7)
vncipher(17, 17, 7)
vncipher(18, 18, 7)
vncipher(19, 19, 7)
vncipher(16, 16, 6)
vncipher(17, 17, 6)
vncipher(18, 18, 6)
vncipher(19, 19, 6)
vncipher(16, 16, 5)
vncipher(17, 17, 5)
vncipher(18, 18, 5)
vncipher(19, 19, 5)
vncipher(16, 16, 4)
vncipher(17, 17, 4)
vncipher(18, 18, 4)
vncipher(19, 19, 4)
vncipher(16, 16, 3)
vncipher(17, 17, 3)
vncipher(18, 18, 3)
vncipher(19, 19, 3)
vncipher(16, 16, 2)
vncipher(17, 17, 2)
vncipher(18, 18, 2)
vncipher(19, 19, 2)
vncipher(16, 16, 1)
vncipher(17, 17, 1)
vncipher(18, 18, 1)
vncipher(19, 19, 1)
vncipherlast(16, 16, 0)
vncipherlast(17, 17, 0)
vncipherlast(18, 18, 0)
vncipherlast(19, 19, 0)
/*
* XOR decrypted blocks with IV / previous block.
*/
vxor(16, 16, 24)
vxor(17, 17, 20)
vxor(18, 18, 21)
vxor(19, 19, 22)
/*
* Store back result (with byteswap)
*/
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
stxvw4x(48, %[cc0], %[buf])
stxvw4x(49, %[cc1], %[buf])
stxvw4x(50, %[cc2], %[buf])
stxvw4x(51, %[cc3], %[buf])
/*
* Fourth encrypted block is IV for next run.
*/
vand(24, 23, 23)
addi(%[buf], %[buf], 64)
bdnz(loop)
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
[buf] "+b" (buf)
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"ctr", "memory"
);
}
static void
cbcdec_192(const unsigned char *sk,
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
{
long cc0, cc1, cc2, cc3;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc0 = 0;
cc1 = 16;
cc2 = 32;
cc3 = 48;
asm volatile (
/*
* Load subkeys into v0..v12
*/
lxvw4x(32, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(33, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(34, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(35, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(36, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(37, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(38, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(39, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(40, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(41, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(42, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(43, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(44, %[cc0], %[sk])
li(%[cc0], 0)
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* Load IV into v24.
*/
lxvw4x(56, 0, %[iv])
#if BR_POWER8_LE
vperm(24, 24, 24, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Load next ciphertext words in v16..v19. Also save them
* in v20..v23.
*/
lxvw4x(48, %[cc0], %[buf])
lxvw4x(49, %[cc1], %[buf])
lxvw4x(50, %[cc2], %[buf])
lxvw4x(51, %[cc3], %[buf])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
vand(20, 16, 16)
vand(21, 17, 17)
vand(22, 18, 18)
vand(23, 19, 19)
/*
* Decrypt the blocks.
*/
vxor(16, 16, 12)
vxor(17, 17, 12)
vxor(18, 18, 12)
vxor(19, 19, 12)
vncipher(16, 16, 11)
vncipher(17, 17, 11)
vncipher(18, 18, 11)
vncipher(19, 19, 11)
vncipher(16, 16, 10)
vncipher(17, 17, 10)
vncipher(18, 18, 10)
vncipher(19, 19, 10)
vncipher(16, 16, 9)
vncipher(17, 17, 9)
vncipher(18, 18, 9)
vncipher(19, 19, 9)
vncipher(16, 16, 8)
vncipher(17, 17, 8)
vncipher(18, 18, 8)
vncipher(19, 19, 8)
vncipher(16, 16, 7)
vncipher(17, 17, 7)
vncipher(18, 18, 7)
vncipher(19, 19, 7)
vncipher(16, 16, 6)
vncipher(17, 17, 6)
vncipher(18, 18, 6)
vncipher(19, 19, 6)
vncipher(16, 16, 5)
vncipher(17, 17, 5)
vncipher(18, 18, 5)
vncipher(19, 19, 5)
vncipher(16, 16, 4)
vncipher(17, 17, 4)
vncipher(18, 18, 4)
vncipher(19, 19, 4)
vncipher(16, 16, 3)
vncipher(17, 17, 3)
vncipher(18, 18, 3)
vncipher(19, 19, 3)
vncipher(16, 16, 2)
vncipher(17, 17, 2)
vncipher(18, 18, 2)
vncipher(19, 19, 2)
vncipher(16, 16, 1)
vncipher(17, 17, 1)
vncipher(18, 18, 1)
vncipher(19, 19, 1)
vncipherlast(16, 16, 0)
vncipherlast(17, 17, 0)
vncipherlast(18, 18, 0)
vncipherlast(19, 19, 0)
/*
* XOR decrypted blocks with IV / previous block.
*/
vxor(16, 16, 24)
vxor(17, 17, 20)
vxor(18, 18, 21)
vxor(19, 19, 22)
/*
* Store back result (with byteswap)
*/
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
stxvw4x(48, %[cc0], %[buf])
stxvw4x(49, %[cc1], %[buf])
stxvw4x(50, %[cc2], %[buf])
stxvw4x(51, %[cc3], %[buf])
/*
* Fourth encrypted block is IV for next run.
*/
vand(24, 23, 23)
addi(%[buf], %[buf], 64)
bdnz(loop)
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
[buf] "+b" (buf)
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"ctr", "memory"
);
}
static void
cbcdec_256(const unsigned char *sk,
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
{
long cc0, cc1, cc2, cc3;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc0 = 0;
cc1 = 16;
cc2 = 32;
cc3 = 48;
asm volatile (
/*
* Load subkeys into v0..v14
*/
lxvw4x(32, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(33, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(34, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(35, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(36, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(37, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(38, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(39, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(40, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(41, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(42, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(43, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(44, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(45, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(46, %[cc0], %[sk])
li(%[cc0], 0)
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* Load IV into v24.
*/
lxvw4x(56, 0, %[iv])
#if BR_POWER8_LE
vperm(24, 24, 24, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Load next ciphertext words in v16..v19. Also save them
* in v20..v23.
*/
lxvw4x(48, %[cc0], %[buf])
lxvw4x(49, %[cc1], %[buf])
lxvw4x(50, %[cc2], %[buf])
lxvw4x(51, %[cc3], %[buf])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
vand(20, 16, 16)
vand(21, 17, 17)
vand(22, 18, 18)
vand(23, 19, 19)
/*
* Decrypt the blocks.
*/
vxor(16, 16, 14)
vxor(17, 17, 14)
vxor(18, 18, 14)
vxor(19, 19, 14)
vncipher(16, 16, 13)
vncipher(17, 17, 13)
vncipher(18, 18, 13)
vncipher(19, 19, 13)
vncipher(16, 16, 12)
vncipher(17, 17, 12)
vncipher(18, 18, 12)
vncipher(19, 19, 12)
vncipher(16, 16, 11)
vncipher(17, 17, 11)
vncipher(18, 18, 11)
vncipher(19, 19, 11)
vncipher(16, 16, 10)
vncipher(17, 17, 10)
vncipher(18, 18, 10)
vncipher(19, 19, 10)
vncipher(16, 16, 9)
vncipher(17, 17, 9)
vncipher(18, 18, 9)
vncipher(19, 19, 9)
vncipher(16, 16, 8)
vncipher(17, 17, 8)
vncipher(18, 18, 8)
vncipher(19, 19, 8)
vncipher(16, 16, 7)
vncipher(17, 17, 7)
vncipher(18, 18, 7)
vncipher(19, 19, 7)
vncipher(16, 16, 6)
vncipher(17, 17, 6)
vncipher(18, 18, 6)
vncipher(19, 19, 6)
vncipher(16, 16, 5)
vncipher(17, 17, 5)
vncipher(18, 18, 5)
vncipher(19, 19, 5)
vncipher(16, 16, 4)
vncipher(17, 17, 4)
vncipher(18, 18, 4)
vncipher(19, 19, 4)
vncipher(16, 16, 3)
vncipher(17, 17, 3)
vncipher(18, 18, 3)
vncipher(19, 19, 3)
vncipher(16, 16, 2)
vncipher(17, 17, 2)
vncipher(18, 18, 2)
vncipher(19, 19, 2)
vncipher(16, 16, 1)
vncipher(17, 17, 1)
vncipher(18, 18, 1)
vncipher(19, 19, 1)
vncipherlast(16, 16, 0)
vncipherlast(17, 17, 0)
vncipherlast(18, 18, 0)
vncipherlast(19, 19, 0)
/*
* XOR decrypted blocks with IV / previous block.
*/
vxor(16, 16, 24)
vxor(17, 17, 20)
vxor(18, 18, 21)
vxor(19, 19, 22)
/*
* Store back result (with byteswap)
*/
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
stxvw4x(48, %[cc0], %[buf])
stxvw4x(49, %[cc1], %[buf])
stxvw4x(50, %[cc2], %[buf])
stxvw4x(51, %[cc3], %[buf])
/*
* Fourth encrypted block is IV for next run.
*/
vand(24, 23, 23)
addi(%[buf], %[buf], 64)
bdnz(loop)
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
[buf] "+b" (buf)
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"ctr", "memory"
);
}
/* see bearssl_block.h */
void
br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
void *iv, void *data, size_t len)
{
unsigned char nextiv[16];
unsigned char *buf;
if (len == 0) {
return;
}
buf = data;
memcpy(nextiv, buf + len - 16, 16);
if (len >= 64) {
size_t num_blocks;
unsigned char tmp[16];
num_blocks = (len >> 4) & ~(size_t)3;
memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
switch (ctx->num_rounds) {
case 10:
cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
break;
case 12:
cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
break;
default:
cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
break;
}
buf += num_blocks << 4;
len &= 63;
memcpy(iv, tmp, 16);
}
if (len > 0) {
unsigned char tmp[64];
memcpy(tmp, buf, len);
memset(tmp + len, 0, (sizeof tmp) - len);
switch (ctx->num_rounds) {
case 10:
cbcdec_128(ctx->skey.skni, iv, tmp, 4);
break;
case 12:
cbcdec_192(ctx->skey.skni, iv, tmp, 4);
break;
default:
cbcdec_256(ctx->skey.skni, iv, tmp, 4);
break;
}
memcpy(buf, tmp, len);
}
memcpy(iv, nextiv, 16);
}
/* see bearssl_block.h */
const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
sizeof(br_aes_pwr8_cbcdec_keys),
16,
4,
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
&br_aes_pwr8_cbcdec_init,
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
&br_aes_pwr8_cbcdec_run
};
/* see bearssl_block.h */
const br_block_cbcdec_class *
br_aes_pwr8_cbcdec_get_vtable(void)
{
return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
}
#else
/* see bearssl_block.h */
const br_block_cbcdec_class *
br_aes_pwr8_cbcdec_get_vtable(void)
{
return NULL;
}
#endif

View File

@ -1,417 +0,0 @@
/*
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define BR_POWER_ASM_MACROS 1
#include "inner.h"
#if BR_POWER8
/* see bearssl_block.h */
void
br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
const void *key, size_t len)
{
ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
}
static void
cbcenc_128(const unsigned char *sk,
const unsigned char *iv, unsigned char *buf, size_t len)
{
long cc;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc = 0;
asm volatile (
/*
* Load subkeys into v0..v10
*/
lxvw4x(32, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(33, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(34, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(35, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(36, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(37, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(38, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(39, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(40, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(41, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(42, %[cc], %[sk])
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* Load IV into v16.
*/
lxvw4x(48, 0, %[iv])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Load next plaintext word and XOR with current IV.
*/
lxvw4x(49, 0, %[buf])
#if BR_POWER8_LE
vperm(17, 17, 17, 15)
#endif
vxor(16, 16, 17)
/*
* Encrypt the block.
*/
vxor(16, 16, 0)
vcipher(16, 16, 1)
vcipher(16, 16, 2)
vcipher(16, 16, 3)
vcipher(16, 16, 4)
vcipher(16, 16, 5)
vcipher(16, 16, 6)
vcipher(16, 16, 7)
vcipher(16, 16, 8)
vcipher(16, 16, 9)
vcipherlast(16, 16, 10)
/*
* Store back result (with byteswap)
*/
#if BR_POWER8_LE
vperm(17, 16, 16, 15)
stxvw4x(49, 0, %[buf])
#else
stxvw4x(48, 0, %[buf])
#endif
addi(%[buf], %[buf], 16)
bdnz(loop)
: [cc] "+b" (cc), [buf] "+b" (buf)
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"ctr", "memory"
);
}
static void
cbcenc_192(const unsigned char *sk,
const unsigned char *iv, unsigned char *buf, size_t len)
{
long cc;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc = 0;
asm volatile (
/*
* Load subkeys into v0..v12
*/
lxvw4x(32, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(33, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(34, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(35, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(36, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(37, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(38, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(39, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(40, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(41, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(42, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(43, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(44, %[cc], %[sk])
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* Load IV into v16.
*/
lxvw4x(48, 0, %[iv])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Load next plaintext word and XOR with current IV.
*/
lxvw4x(49, 0, %[buf])
#if BR_POWER8_LE
vperm(17, 17, 17, 15)
#endif
vxor(16, 16, 17)
/*
* Encrypt the block.
*/
vxor(16, 16, 0)
vcipher(16, 16, 1)
vcipher(16, 16, 2)
vcipher(16, 16, 3)
vcipher(16, 16, 4)
vcipher(16, 16, 5)
vcipher(16, 16, 6)
vcipher(16, 16, 7)
vcipher(16, 16, 8)
vcipher(16, 16, 9)
vcipher(16, 16, 10)
vcipher(16, 16, 11)
vcipherlast(16, 16, 12)
/*
* Store back result (with byteswap)
*/
#if BR_POWER8_LE
vperm(17, 16, 16, 15)
stxvw4x(49, 0, %[buf])
#else
stxvw4x(48, 0, %[buf])
#endif
addi(%[buf], %[buf], 16)
bdnz(loop)
: [cc] "+b" (cc), [buf] "+b" (buf)
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"ctr", "memory"
);
}
static void
cbcenc_256(const unsigned char *sk,
const unsigned char *iv, unsigned char *buf, size_t len)
{
long cc;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
cc = 0;
asm volatile (
/*
* Load subkeys into v0..v14
*/
lxvw4x(32, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(33, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(34, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(35, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(36, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(37, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(38, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(39, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(40, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(41, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(42, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(43, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(44, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(45, %[cc], %[sk])
addi(%[cc], %[cc], 16)
lxvw4x(46, %[cc], %[sk])
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* Load IV into v16.
*/
lxvw4x(48, 0, %[iv])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Load next plaintext word and XOR with current IV.
*/
lxvw4x(49, 0, %[buf])
#if BR_POWER8_LE
vperm(17, 17, 17, 15)
#endif
vxor(16, 16, 17)
/*
* Encrypt the block.
*/
vxor(16, 16, 0)
vcipher(16, 16, 1)
vcipher(16, 16, 2)
vcipher(16, 16, 3)
vcipher(16, 16, 4)
vcipher(16, 16, 5)
vcipher(16, 16, 6)
vcipher(16, 16, 7)
vcipher(16, 16, 8)
vcipher(16, 16, 9)
vcipher(16, 16, 10)
vcipher(16, 16, 11)
vcipher(16, 16, 12)
vcipher(16, 16, 13)
vcipherlast(16, 16, 14)
/*
* Store back result (with byteswap)
*/
#if BR_POWER8_LE
vperm(17, 16, 16, 15)
stxvw4x(49, 0, %[buf])
#else
stxvw4x(48, 0, %[buf])
#endif
addi(%[buf], %[buf], 16)
bdnz(loop)
: [cc] "+b" (cc), [buf] "+b" (buf)
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"ctr", "memory"
);
}
/* see bearssl_block.h */
void
br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
void *iv, void *data, size_t len)
{
if (len > 0) {
switch (ctx->num_rounds) {
case 10:
cbcenc_128(ctx->skey.skni, iv, data, len);
break;
case 12:
cbcenc_192(ctx->skey.skni, iv, data, len);
break;
default:
cbcenc_256(ctx->skey.skni, iv, data, len);
break;
}
memcpy(iv, (unsigned char *)data + (len - 16), 16);
}
}
/* see bearssl_block.h */
const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
sizeof(br_aes_pwr8_cbcenc_keys),
16,
4,
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
&br_aes_pwr8_cbcenc_init,
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
&br_aes_pwr8_cbcenc_run
};
/* see bearssl_block.h */
const br_block_cbcenc_class *
br_aes_pwr8_cbcenc_get_vtable(void)
{
return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
}
#else
/* see bearssl_block.h */
const br_block_cbcenc_class *
br_aes_pwr8_cbcenc_get_vtable(void)
{
return NULL;
}
#endif

View File

@ -1,717 +0,0 @@
/*
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define BR_POWER_ASM_MACROS 1
#include "inner.h"
#if BR_POWER8
/* see bearssl_block.h */
void
br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
const void *key, size_t len)
{
ctx->vtable = &br_aes_pwr8_ctr_vtable;
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
}
static void
ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
unsigned char *buf, size_t num_blocks)
{
long cc0, cc1, cc2, cc3;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
static const uint32_t ctrinc[] = {
0, 0, 0, 4
};
cc0 = 0;
cc1 = 16;
cc2 = 32;
cc3 = 48;
asm volatile (
/*
* Load subkeys into v0..v10
*/
lxvw4x(32, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(33, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(34, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(35, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(36, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(37, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(38, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(39, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(40, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(41, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(42, %[cc0], %[sk])
li(%[cc0], 0)
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* v28 = increment for IV counter.
*/
lxvw4x(60, 0, %[ctrinc])
/*
* Load IV into v16..v19
*/
lxvw4x(48, %[cc0], %[ivbuf])
lxvw4x(49, %[cc1], %[ivbuf])
lxvw4x(50, %[cc2], %[ivbuf])
lxvw4x(51, %[cc3], %[ivbuf])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Compute next IV into v24..v27
*/
vadduwm(24, 16, 28)
vadduwm(25, 17, 28)
vadduwm(26, 18, 28)
vadduwm(27, 19, 28)
/*
* Load next data blocks. We do this early on but we
* won't need them until IV encryption is done.
*/
lxvw4x(52, %[cc0], %[buf])
lxvw4x(53, %[cc1], %[buf])
lxvw4x(54, %[cc2], %[buf])
lxvw4x(55, %[cc3], %[buf])
/*
* Encrypt the current IV.
*/
vxor(16, 16, 0)
vxor(17, 17, 0)
vxor(18, 18, 0)
vxor(19, 19, 0)
vcipher(16, 16, 1)
vcipher(17, 17, 1)
vcipher(18, 18, 1)
vcipher(19, 19, 1)
vcipher(16, 16, 2)
vcipher(17, 17, 2)
vcipher(18, 18, 2)
vcipher(19, 19, 2)
vcipher(16, 16, 3)
vcipher(17, 17, 3)
vcipher(18, 18, 3)
vcipher(19, 19, 3)
vcipher(16, 16, 4)
vcipher(17, 17, 4)
vcipher(18, 18, 4)
vcipher(19, 19, 4)
vcipher(16, 16, 5)
vcipher(17, 17, 5)
vcipher(18, 18, 5)
vcipher(19, 19, 5)
vcipher(16, 16, 6)
vcipher(17, 17, 6)
vcipher(18, 18, 6)
vcipher(19, 19, 6)
vcipher(16, 16, 7)
vcipher(17, 17, 7)
vcipher(18, 18, 7)
vcipher(19, 19, 7)
vcipher(16, 16, 8)
vcipher(17, 17, 8)
vcipher(18, 18, 8)
vcipher(19, 19, 8)
vcipher(16, 16, 9)
vcipher(17, 17, 9)
vcipher(18, 18, 9)
vcipher(19, 19, 9)
vcipherlast(16, 16, 10)
vcipherlast(17, 17, 10)
vcipherlast(18, 18, 10)
vcipherlast(19, 19, 10)
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
/*
* Load next plaintext word and XOR with encrypted IV.
*/
vxor(16, 20, 16)
vxor(17, 21, 17)
vxor(18, 22, 18)
vxor(19, 23, 19)
stxvw4x(48, %[cc0], %[buf])
stxvw4x(49, %[cc1], %[buf])
stxvw4x(50, %[cc2], %[buf])
stxvw4x(51, %[cc3], %[buf])
addi(%[buf], %[buf], 64)
/*
* Update IV.
*/
vand(16, 24, 24)
vand(17, 25, 25)
vand(18, 26, 26)
vand(19, 27, 27)
bdnz(loop)
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
[buf] "+b" (buf)
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
[ctrinc] "b" (ctrinc)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"ctr", "memory"
);
}
static void
ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
unsigned char *buf, size_t num_blocks)
{
long cc0, cc1, cc2, cc3;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
static const uint32_t ctrinc[] = {
0, 0, 0, 4
};
cc0 = 0;
cc1 = 16;
cc2 = 32;
cc3 = 48;
asm volatile (
/*
* Load subkeys into v0..v12
*/
lxvw4x(32, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(33, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(34, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(35, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(36, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(37, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(38, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(39, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(40, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(41, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(42, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(43, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(44, %[cc0], %[sk])
li(%[cc0], 0)
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* v28 = increment for IV counter.
*/
lxvw4x(60, 0, %[ctrinc])
/*
* Load IV into v16..v19
*/
lxvw4x(48, %[cc0], %[ivbuf])
lxvw4x(49, %[cc1], %[ivbuf])
lxvw4x(50, %[cc2], %[ivbuf])
lxvw4x(51, %[cc3], %[ivbuf])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Compute next IV into v24..v27
*/
vadduwm(24, 16, 28)
vadduwm(25, 17, 28)
vadduwm(26, 18, 28)
vadduwm(27, 19, 28)
/*
* Load next data blocks. We do this early on but we
* won't need them until IV encryption is done.
*/
lxvw4x(52, %[cc0], %[buf])
lxvw4x(53, %[cc1], %[buf])
lxvw4x(54, %[cc2], %[buf])
lxvw4x(55, %[cc3], %[buf])
/*
* Encrypt the current IV.
*/
vxor(16, 16, 0)
vxor(17, 17, 0)
vxor(18, 18, 0)
vxor(19, 19, 0)
vcipher(16, 16, 1)
vcipher(17, 17, 1)
vcipher(18, 18, 1)
vcipher(19, 19, 1)
vcipher(16, 16, 2)
vcipher(17, 17, 2)
vcipher(18, 18, 2)
vcipher(19, 19, 2)
vcipher(16, 16, 3)
vcipher(17, 17, 3)
vcipher(18, 18, 3)
vcipher(19, 19, 3)
vcipher(16, 16, 4)
vcipher(17, 17, 4)
vcipher(18, 18, 4)
vcipher(19, 19, 4)
vcipher(16, 16, 5)
vcipher(17, 17, 5)
vcipher(18, 18, 5)
vcipher(19, 19, 5)
vcipher(16, 16, 6)
vcipher(17, 17, 6)
vcipher(18, 18, 6)
vcipher(19, 19, 6)
vcipher(16, 16, 7)
vcipher(17, 17, 7)
vcipher(18, 18, 7)
vcipher(19, 19, 7)
vcipher(16, 16, 8)
vcipher(17, 17, 8)
vcipher(18, 18, 8)
vcipher(19, 19, 8)
vcipher(16, 16, 9)
vcipher(17, 17, 9)
vcipher(18, 18, 9)
vcipher(19, 19, 9)
vcipher(16, 16, 10)
vcipher(17, 17, 10)
vcipher(18, 18, 10)
vcipher(19, 19, 10)
vcipher(16, 16, 11)
vcipher(17, 17, 11)
vcipher(18, 18, 11)
vcipher(19, 19, 11)
vcipherlast(16, 16, 12)
vcipherlast(17, 17, 12)
vcipherlast(18, 18, 12)
vcipherlast(19, 19, 12)
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
/*
* Load next plaintext word and XOR with encrypted IV.
*/
vxor(16, 20, 16)
vxor(17, 21, 17)
vxor(18, 22, 18)
vxor(19, 23, 19)
stxvw4x(48, %[cc0], %[buf])
stxvw4x(49, %[cc1], %[buf])
stxvw4x(50, %[cc2], %[buf])
stxvw4x(51, %[cc3], %[buf])
addi(%[buf], %[buf], 64)
/*
* Update IV.
*/
vand(16, 24, 24)
vand(17, 25, 25)
vand(18, 26, 26)
vand(19, 27, 27)
bdnz(loop)
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
[buf] "+b" (buf)
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
[ctrinc] "b" (ctrinc)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"ctr", "memory"
);
}
static void
ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
unsigned char *buf, size_t num_blocks)
{
long cc0, cc1, cc2, cc3;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
static const uint32_t ctrinc[] = {
0, 0, 0, 4
};
cc0 = 0;
cc1 = 16;
cc2 = 32;
cc3 = 48;
asm volatile (
/*
* Load subkeys into v0..v14
*/
lxvw4x(32, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(33, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(34, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(35, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(36, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(37, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(38, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(39, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(40, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(41, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(42, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(43, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(44, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(45, %[cc0], %[sk])
addi(%[cc0], %[cc0], 16)
lxvw4x(46, %[cc0], %[sk])
li(%[cc0], 0)
#if BR_POWER8_LE
/*
* v15 = constant for byteswapping words
*/
lxvw4x(47, 0, %[idx2be])
#endif
/*
* v28 = increment for IV counter.
*/
lxvw4x(60, 0, %[ctrinc])
/*
* Load IV into v16..v19
*/
lxvw4x(48, %[cc0], %[ivbuf])
lxvw4x(49, %[cc1], %[ivbuf])
lxvw4x(50, %[cc2], %[ivbuf])
lxvw4x(51, %[cc3], %[ivbuf])
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
mtctr(%[num_blocks])
label(loop)
/*
* Compute next IV into v24..v27
*/
vadduwm(24, 16, 28)
vadduwm(25, 17, 28)
vadduwm(26, 18, 28)
vadduwm(27, 19, 28)
/*
* Load next data blocks. We do this early on but we
* won't need them until IV encryption is done.
*/
lxvw4x(52, %[cc0], %[buf])
lxvw4x(53, %[cc1], %[buf])
lxvw4x(54, %[cc2], %[buf])
lxvw4x(55, %[cc3], %[buf])
/*
* Encrypt the current IV.
*/
vxor(16, 16, 0)
vxor(17, 17, 0)
vxor(18, 18, 0)
vxor(19, 19, 0)
vcipher(16, 16, 1)
vcipher(17, 17, 1)
vcipher(18, 18, 1)
vcipher(19, 19, 1)
vcipher(16, 16, 2)
vcipher(17, 17, 2)
vcipher(18, 18, 2)
vcipher(19, 19, 2)
vcipher(16, 16, 3)
vcipher(17, 17, 3)
vcipher(18, 18, 3)
vcipher(19, 19, 3)
vcipher(16, 16, 4)
vcipher(17, 17, 4)
vcipher(18, 18, 4)
vcipher(19, 19, 4)
vcipher(16, 16, 5)
vcipher(17, 17, 5)
vcipher(18, 18, 5)
vcipher(19, 19, 5)
vcipher(16, 16, 6)
vcipher(17, 17, 6)
vcipher(18, 18, 6)
vcipher(19, 19, 6)
vcipher(16, 16, 7)
vcipher(17, 17, 7)
vcipher(18, 18, 7)
vcipher(19, 19, 7)
vcipher(16, 16, 8)
vcipher(17, 17, 8)
vcipher(18, 18, 8)
vcipher(19, 19, 8)
vcipher(16, 16, 9)
vcipher(17, 17, 9)
vcipher(18, 18, 9)
vcipher(19, 19, 9)
vcipher(16, 16, 10)
vcipher(17, 17, 10)
vcipher(18, 18, 10)
vcipher(19, 19, 10)
vcipher(16, 16, 11)
vcipher(17, 17, 11)
vcipher(18, 18, 11)
vcipher(19, 19, 11)
vcipher(16, 16, 12)
vcipher(17, 17, 12)
vcipher(18, 18, 12)
vcipher(19, 19, 12)
vcipher(16, 16, 13)
vcipher(17, 17, 13)
vcipher(18, 18, 13)
vcipher(19, 19, 13)
vcipherlast(16, 16, 14)
vcipherlast(17, 17, 14)
vcipherlast(18, 18, 14)
vcipherlast(19, 19, 14)
#if BR_POWER8_LE
vperm(16, 16, 16, 15)
vperm(17, 17, 17, 15)
vperm(18, 18, 18, 15)
vperm(19, 19, 19, 15)
#endif
/*
* Load next plaintext word and XOR with encrypted IV.
*/
vxor(16, 20, 16)
vxor(17, 21, 17)
vxor(18, 22, 18)
vxor(19, 23, 19)
stxvw4x(48, %[cc0], %[buf])
stxvw4x(49, %[cc1], %[buf])
stxvw4x(50, %[cc2], %[buf])
stxvw4x(51, %[cc3], %[buf])
addi(%[buf], %[buf], 64)
/*
* Update IV.
*/
vand(16, 24, 24)
vand(17, 25, 25)
vand(18, 26, 26)
vand(19, 27, 27)
bdnz(loop)
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
[buf] "+b" (buf)
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
[ctrinc] "b" (ctrinc)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"ctr", "memory"
);
}
/* see bearssl_block.h */
uint32_t
br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
const void *iv, uint32_t cc, void *data, size_t len)
{
unsigned char *buf;
unsigned char ivbuf[64];
buf = data;
memcpy(ivbuf + 0, iv, 12);
memcpy(ivbuf + 16, iv, 12);
memcpy(ivbuf + 32, iv, 12);
memcpy(ivbuf + 48, iv, 12);
if (len >= 64) {
br_enc32be(ivbuf + 12, cc + 0);
br_enc32be(ivbuf + 28, cc + 1);
br_enc32be(ivbuf + 44, cc + 2);
br_enc32be(ivbuf + 60, cc + 3);
switch (ctx->num_rounds) {
case 10:
ctr_128(ctx->skey.skni, ivbuf, buf,
(len >> 4) & ~(size_t)3);
break;
case 12:
ctr_192(ctx->skey.skni, ivbuf, buf,
(len >> 4) & ~(size_t)3);
break;
default:
ctr_256(ctx->skey.skni, ivbuf, buf,
(len >> 4) & ~(size_t)3);
break;
}
cc += (len >> 4) & ~(size_t)3;
buf += len & ~(size_t)63;
len &= 63;
}
if (len > 0) {
unsigned char tmp[64];
memcpy(tmp, buf, len);
memset(tmp + len, 0, (sizeof tmp) - len);
br_enc32be(ivbuf + 12, cc + 0);
br_enc32be(ivbuf + 28, cc + 1);
br_enc32be(ivbuf + 44, cc + 2);
br_enc32be(ivbuf + 60, cc + 3);
switch (ctx->num_rounds) {
case 10:
ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
break;
case 12:
ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
break;
default:
ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
break;
}
memcpy(buf, tmp, len);
cc += (len + 15) >> 4;
}
return cc;
}
/* see bearssl_block.h */
const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
sizeof(br_aes_pwr8_ctr_keys),
16,
4,
(void (*)(const br_block_ctr_class **, const void *, size_t))
&br_aes_pwr8_ctr_init,
(uint32_t (*)(const br_block_ctr_class *const *,
const void *, uint32_t, void *, size_t))
&br_aes_pwr8_ctr_run
};
/* see bearssl_block.h */
const br_block_ctr_class *
br_aes_pwr8_ctr_get_vtable(void)
{
return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
}
#else
/* see bearssl_block.h */
const br_block_ctr_class *
br_aes_pwr8_ctr_get_vtable(void)
{
return NULL;
}
#endif

View File

@ -1,946 +0,0 @@
/*
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define BR_POWER_ASM_MACROS 1
#include "inner.h"
#if BR_POWER8
/* see bearssl_block.h */
const br_block_ctrcbc_class *
br_aes_pwr8_ctrcbc_get_vtable(void)
{
return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
}
/* see bearssl_block.h */
void
br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
const void *key, size_t len)
{
ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
}
/*
* Register conventions for CTR + CBC-MAC:
*
* AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
* Register v15 contains the byteswap index register (little-endian only)
* Register v16 contains the CTR counter value
* Register v17 contains the CBC-MAC current value
* Registers v18 to v27 are scratch
* Counter increment uses v28, v29 and v30
*
* For CTR alone:
*
* AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
* Register v15 contains the byteswap index register (little-endian only)
* Registers v16 to v19 contain the CTR counter values (four blocks)
* Registers v20 to v27 are scratch
* Counter increment uses v28, v29 and v30
*/
#define LOAD_SUBKEYS_128 \
lxvw4x(32, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(33, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(34, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(35, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(36, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(37, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(38, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(39, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(40, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(41, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(42, %[cc], %[sk])
#define LOAD_SUBKEYS_192 \
LOAD_SUBKEYS_128 \
addi(%[cc], %[cc], 16) \
lxvw4x(43, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(44, %[cc], %[sk])
#define LOAD_SUBKEYS_256 \
LOAD_SUBKEYS_192 \
addi(%[cc], %[cc], 16) \
lxvw4x(45, %[cc], %[sk]) \
addi(%[cc], %[cc], 16) \
lxvw4x(46, %[cc], %[sk])
#define BLOCK_ENCRYPT_128(x) \
vxor(x, x, 0) \
vcipher(x, x, 1) \
vcipher(x, x, 2) \
vcipher(x, x, 3) \
vcipher(x, x, 4) \
vcipher(x, x, 5) \
vcipher(x, x, 6) \
vcipher(x, x, 7) \
vcipher(x, x, 8) \
vcipher(x, x, 9) \
vcipherlast(x, x, 10)
#define BLOCK_ENCRYPT_192(x) \
vxor(x, x, 0) \
vcipher(x, x, 1) \
vcipher(x, x, 2) \
vcipher(x, x, 3) \
vcipher(x, x, 4) \
vcipher(x, x, 5) \
vcipher(x, x, 6) \
vcipher(x, x, 7) \
vcipher(x, x, 8) \
vcipher(x, x, 9) \
vcipher(x, x, 10) \
vcipher(x, x, 11) \
vcipherlast(x, x, 12)
#define BLOCK_ENCRYPT_256(x) \
vxor(x, x, 0) \
vcipher(x, x, 1) \
vcipher(x, x, 2) \
vcipher(x, x, 3) \
vcipher(x, x, 4) \
vcipher(x, x, 5) \
vcipher(x, x, 6) \
vcipher(x, x, 7) \
vcipher(x, x, 8) \
vcipher(x, x, 9) \
vcipher(x, x, 10) \
vcipher(x, x, 11) \
vcipher(x, x, 12) \
vcipher(x, x, 13) \
vcipherlast(x, x, 14)
#define BLOCK_ENCRYPT_X2_128(x, y) \
vxor(x, x, 0) \
vxor(y, y, 0) \
vcipher(x, x, 1) \
vcipher(y, y, 1) \
vcipher(x, x, 2) \
vcipher(y, y, 2) \
vcipher(x, x, 3) \
vcipher(y, y, 3) \
vcipher(x, x, 4) \
vcipher(y, y, 4) \
vcipher(x, x, 5) \
vcipher(y, y, 5) \
vcipher(x, x, 6) \
vcipher(y, y, 6) \
vcipher(x, x, 7) \
vcipher(y, y, 7) \
vcipher(x, x, 8) \
vcipher(y, y, 8) \
vcipher(x, x, 9) \
vcipher(y, y, 9) \
vcipherlast(x, x, 10) \
vcipherlast(y, y, 10)
#define BLOCK_ENCRYPT_X2_192(x, y) \
vxor(x, x, 0) \
vxor(y, y, 0) \
vcipher(x, x, 1) \
vcipher(y, y, 1) \
vcipher(x, x, 2) \
vcipher(y, y, 2) \
vcipher(x, x, 3) \
vcipher(y, y, 3) \
vcipher(x, x, 4) \
vcipher(y, y, 4) \
vcipher(x, x, 5) \
vcipher(y, y, 5) \
vcipher(x, x, 6) \
vcipher(y, y, 6) \
vcipher(x, x, 7) \
vcipher(y, y, 7) \
vcipher(x, x, 8) \
vcipher(y, y, 8) \
vcipher(x, x, 9) \
vcipher(y, y, 9) \
vcipher(x, x, 10) \
vcipher(y, y, 10) \
vcipher(x, x, 11) \
vcipher(y, y, 11) \
vcipherlast(x, x, 12) \
vcipherlast(y, y, 12)
#define BLOCK_ENCRYPT_X2_256(x, y) \
vxor(x, x, 0) \
vxor(y, y, 0) \
vcipher(x, x, 1) \
vcipher(y, y, 1) \
vcipher(x, x, 2) \
vcipher(y, y, 2) \
vcipher(x, x, 3) \
vcipher(y, y, 3) \
vcipher(x, x, 4) \
vcipher(y, y, 4) \
vcipher(x, x, 5) \
vcipher(y, y, 5) \
vcipher(x, x, 6) \
vcipher(y, y, 6) \
vcipher(x, x, 7) \
vcipher(y, y, 7) \
vcipher(x, x, 8) \
vcipher(y, y, 8) \
vcipher(x, x, 9) \
vcipher(y, y, 9) \
vcipher(x, x, 10) \
vcipher(y, y, 10) \
vcipher(x, x, 11) \
vcipher(y, y, 11) \
vcipher(x, x, 12) \
vcipher(y, y, 12) \
vcipher(x, x, 13) \
vcipher(y, y, 13) \
vcipherlast(x, x, 14) \
vcipherlast(y, y, 14)
#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
vxor(x0, x0, 0) \
vxor(x1, x1, 0) \
vxor(x2, x2, 0) \
vxor(x3, x3, 0) \
vcipher(x0, x0, 1) \
vcipher(x1, x1, 1) \
vcipher(x2, x2, 1) \
vcipher(x3, x3, 1) \
vcipher(x0, x0, 2) \
vcipher(x1, x1, 2) \
vcipher(x2, x2, 2) \
vcipher(x3, x3, 2) \
vcipher(x0, x0, 3) \
vcipher(x1, x1, 3) \
vcipher(x2, x2, 3) \
vcipher(x3, x3, 3) \
vcipher(x0, x0, 4) \
vcipher(x1, x1, 4) \
vcipher(x2, x2, 4) \
vcipher(x3, x3, 4) \
vcipher(x0, x0, 5) \
vcipher(x1, x1, 5) \
vcipher(x2, x2, 5) \
vcipher(x3, x3, 5) \
vcipher(x0, x0, 6) \
vcipher(x1, x1, 6) \
vcipher(x2, x2, 6) \
vcipher(x3, x3, 6) \
vcipher(x0, x0, 7) \
vcipher(x1, x1, 7) \
vcipher(x2, x2, 7) \
vcipher(x3, x3, 7) \
vcipher(x0, x0, 8) \
vcipher(x1, x1, 8) \
vcipher(x2, x2, 8) \
vcipher(x3, x3, 8) \
vcipher(x0, x0, 9) \
vcipher(x1, x1, 9) \
vcipher(x2, x2, 9) \
vcipher(x3, x3, 9) \
vcipherlast(x0, x0, 10) \
vcipherlast(x1, x1, 10) \
vcipherlast(x2, x2, 10) \
vcipherlast(x3, x3, 10)
#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
vxor(x0, x0, 0) \
vxor(x1, x1, 0) \
vxor(x2, x2, 0) \
vxor(x3, x3, 0) \
vcipher(x0, x0, 1) \
vcipher(x1, x1, 1) \
vcipher(x2, x2, 1) \
vcipher(x3, x3, 1) \
vcipher(x0, x0, 2) \
vcipher(x1, x1, 2) \
vcipher(x2, x2, 2) \
vcipher(x3, x3, 2) \
vcipher(x0, x0, 3) \
vcipher(x1, x1, 3) \
vcipher(x2, x2, 3) \
vcipher(x3, x3, 3) \
vcipher(x0, x0, 4) \
vcipher(x1, x1, 4) \
vcipher(x2, x2, 4) \
vcipher(x3, x3, 4) \
vcipher(x0, x0, 5) \
vcipher(x1, x1, 5) \
vcipher(x2, x2, 5) \
vcipher(x3, x3, 5) \
vcipher(x0, x0, 6) \
vcipher(x1, x1, 6) \
vcipher(x2, x2, 6) \
vcipher(x3, x3, 6) \
vcipher(x0, x0, 7) \
vcipher(x1, x1, 7) \
vcipher(x2, x2, 7) \
vcipher(x3, x3, 7) \
vcipher(x0, x0, 8) \
vcipher(x1, x1, 8) \
vcipher(x2, x2, 8) \
vcipher(x3, x3, 8) \
vcipher(x0, x0, 9) \
vcipher(x1, x1, 9) \
vcipher(x2, x2, 9) \
vcipher(x3, x3, 9) \
vcipher(x0, x0, 10) \
vcipher(x1, x1, 10) \
vcipher(x2, x2, 10) \
vcipher(x3, x3, 10) \
vcipher(x0, x0, 11) \
vcipher(x1, x1, 11) \
vcipher(x2, x2, 11) \
vcipher(x3, x3, 11) \
vcipherlast(x0, x0, 12) \
vcipherlast(x1, x1, 12) \
vcipherlast(x2, x2, 12) \
vcipherlast(x3, x3, 12)
#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
vxor(x0, x0, 0) \
vxor(x1, x1, 0) \
vxor(x2, x2, 0) \
vxor(x3, x3, 0) \
vcipher(x0, x0, 1) \
vcipher(x1, x1, 1) \
vcipher(x2, x2, 1) \
vcipher(x3, x3, 1) \
vcipher(x0, x0, 2) \
vcipher(x1, x1, 2) \
vcipher(x2, x2, 2) \
vcipher(x3, x3, 2) \
vcipher(x0, x0, 3) \
vcipher(x1, x1, 3) \
vcipher(x2, x2, 3) \
vcipher(x3, x3, 3) \
vcipher(x0, x0, 4) \
vcipher(x1, x1, 4) \
vcipher(x2, x2, 4) \
vcipher(x3, x3, 4) \
vcipher(x0, x0, 5) \
vcipher(x1, x1, 5) \
vcipher(x2, x2, 5) \
vcipher(x3, x3, 5) \
vcipher(x0, x0, 6) \
vcipher(x1, x1, 6) \
vcipher(x2, x2, 6) \
vcipher(x3, x3, 6) \
vcipher(x0, x0, 7) \
vcipher(x1, x1, 7) \
vcipher(x2, x2, 7) \
vcipher(x3, x3, 7) \
vcipher(x0, x0, 8) \
vcipher(x1, x1, 8) \
vcipher(x2, x2, 8) \
vcipher(x3, x3, 8) \
vcipher(x0, x0, 9) \
vcipher(x1, x1, 9) \
vcipher(x2, x2, 9) \
vcipher(x3, x3, 9) \
vcipher(x0, x0, 10) \
vcipher(x1, x1, 10) \
vcipher(x2, x2, 10) \
vcipher(x3, x3, 10) \
vcipher(x0, x0, 11) \
vcipher(x1, x1, 11) \
vcipher(x2, x2, 11) \
vcipher(x3, x3, 11) \
vcipher(x0, x0, 12) \
vcipher(x1, x1, 12) \
vcipher(x2, x2, 12) \
vcipher(x3, x3, 12) \
vcipher(x0, x0, 13) \
vcipher(x1, x1, 13) \
vcipher(x2, x2, 13) \
vcipher(x3, x3, 13) \
vcipherlast(x0, x0, 14) \
vcipherlast(x1, x1, 14) \
vcipherlast(x2, x2, 14) \
vcipherlast(x3, x3, 14)
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#define BYTESWAP_INIT lxvw4x(47, 0, %[idx2be])
#define BYTESWAP(x) vperm(x, x, x, 15)
#define BYTESWAPX(d, s) vperm(d, s, s, 15)
#define BYTESWAP_REG , [idx2be] "b" (idx2be)
#else
#define BYTESWAP_INIT
#define BYTESWAP(x)
#define BYTESWAPX(d, s) vand(d, s, s)
#define BYTESWAP_REG
#endif
static const uint32_t ctrinc[] = {
0, 0, 0, 1
};
static const uint32_t ctrinc_x4[] = {
0, 0, 0, 4
};
#define INCR_128_INIT lxvw4x(60, 0, %[ctrinc])
#define INCR_128_X4_INIT lxvw4x(60, 0, %[ctrinc_x4])
#define INCR_128(d, s) \
vaddcuw(29, s, 28) \
vadduwm(d, s, 28) \
vsldoi(30, 29, 29, 4) \
vaddcuw(29, d, 30) \
vadduwm(d, d, 30) \
vsldoi(30, 29, 29, 4) \
vaddcuw(29, d, 30) \
vadduwm(d, d, 30) \
vsldoi(30, 29, 29, 4) \
vadduwm(d, d, 30)
#define MKCTR(size) \
static void \
ctr_ ## size(const unsigned char *sk, \
unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
{ \
long cc, cc0, cc1, cc2, cc3; \
\
cc = 0; \
cc0 = 0; \
cc1 = 16; \
cc2 = 32; \
cc3 = 48; \
asm volatile ( \
\
/* \
* Load subkeys into v0..v10 \
*/ \
LOAD_SUBKEYS_ ## size \
li(%[cc], 0) \
\
BYTESWAP_INIT \
INCR_128_X4_INIT \
\
/* \
* Load current CTR counters into v16 to v19. \
*/ \
lxvw4x(48, %[cc0], %[ctrbuf]) \
lxvw4x(49, %[cc1], %[ctrbuf]) \
lxvw4x(50, %[cc2], %[ctrbuf]) \
lxvw4x(51, %[cc3], %[ctrbuf]) \
BYTESWAP(16) \
BYTESWAP(17) \
BYTESWAP(18) \
BYTESWAP(19) \
\
mtctr(%[num_blocks_x4]) \
\
label(loop) \
/* \
* Compute next counter values into v20..v23. \
*/ \
INCR_128(20, 16) \
INCR_128(21, 17) \
INCR_128(22, 18) \
INCR_128(23, 19) \
\
/* \
* Encrypt counter values and XOR into next data blocks. \
*/ \
lxvw4x(56, %[cc0], %[buf]) \
lxvw4x(57, %[cc1], %[buf]) \
lxvw4x(58, %[cc2], %[buf]) \
lxvw4x(59, %[cc3], %[buf]) \
BYTESWAP(24) \
BYTESWAP(25) \
BYTESWAP(26) \
BYTESWAP(27) \
BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
vxor(16, 16, 24) \
vxor(17, 17, 25) \
vxor(18, 18, 26) \
vxor(19, 19, 27) \
BYTESWAP(16) \
BYTESWAP(17) \
BYTESWAP(18) \
BYTESWAP(19) \
stxvw4x(48, %[cc0], %[buf]) \
stxvw4x(49, %[cc1], %[buf]) \
stxvw4x(50, %[cc2], %[buf]) \
stxvw4x(51, %[cc3], %[buf]) \
\
/* \
* Update counters and data pointer. \
*/ \
vand(16, 20, 20) \
vand(17, 21, 21) \
vand(18, 22, 22) \
vand(19, 23, 23) \
addi(%[buf], %[buf], 64) \
\
bdnz(loop) \
\
/* \
* Write back new counter values. \
*/ \
BYTESWAP(16) \
BYTESWAP(17) \
BYTESWAP(18) \
BYTESWAP(19) \
stxvw4x(48, %[cc0], %[ctrbuf]) \
stxvw4x(49, %[cc1], %[ctrbuf]) \
stxvw4x(50, %[cc2], %[ctrbuf]) \
stxvw4x(51, %[cc3], %[ctrbuf]) \
\
: [cc] "+b" (cc), [buf] "+b" (buf), \
[cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
[num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
BYTESWAP_REG \
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
"v30", "ctr", "memory" \
); \
}
MKCTR(128)
MKCTR(192)
MKCTR(256)
#define MKCBCMAC(size) \
static void \
cbcmac_ ## size(const unsigned char *sk, \
unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
{ \
long cc; \
\
cc = 0; \
asm volatile ( \
\
/* \
* Load subkeys into v0..v10 \
*/ \
LOAD_SUBKEYS_ ## size \
li(%[cc], 0) \
\
BYTESWAP_INIT \
\
/* \
* Load current CBC-MAC value into v16. \
*/ \
lxvw4x(48, %[cc], %[cbcmac]) \
BYTESWAP(16) \
\
mtctr(%[num_blocks]) \
\
label(loop) \
/* \
* Load next block, XOR into current CBC-MAC value, \
* and then encrypt it. \
*/ \
lxvw4x(49, %[cc], %[buf]) \
BYTESWAP(17) \
vxor(16, 16, 17) \
BLOCK_ENCRYPT_ ## size(16) \
addi(%[buf], %[buf], 16) \
\
bdnz(loop) \
\
/* \
* Write back new CBC-MAC value. \
*/ \
BYTESWAP(16) \
stxvw4x(48, %[cc], %[cbcmac]) \
\
: [cc] "+b" (cc), [buf] "+b" (buf) \
: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
BYTESWAP_REG \
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
"v30", "ctr", "memory" \
); \
}
MKCBCMAC(128)
MKCBCMAC(192)
MKCBCMAC(256)
#define MKENCRYPT(size) \
static void \
ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
size_t num_blocks) \
{ \
long cc; \
\
cc = 0; \
asm volatile ( \
\
/* \
* Load subkeys into v0..v10 \
*/ \
LOAD_SUBKEYS_ ## size \
li(%[cc], 0) \
\
BYTESWAP_INIT \
INCR_128_INIT \
\
/* \
* Load current CTR counter into v16, and current \
* CBC-MAC IV into v17. \
*/ \
lxvw4x(48, %[cc], %[ctr]) \
lxvw4x(49, %[cc], %[cbcmac]) \
BYTESWAP(16) \
BYTESWAP(17) \
\
/* \
* At each iteration, we do two parallel encryption: \
* - new counter value for encryption of the next block; \
* - CBC-MAC over the previous encrypted block. \
* Thus, each plaintext block implies two AES instances, \
* over two successive iterations. This requires a single \
* counter encryption before the loop, and a single \
* CBC-MAC encryption after the loop. \
*/ \
\
/* \
* Encrypt first block (into v20). \
*/ \
lxvw4x(52, %[cc], %[buf]) \
BYTESWAP(20) \
INCR_128(22, 16) \
BLOCK_ENCRYPT_ ## size(16) \
vxor(20, 20, 16) \
BYTESWAPX(21, 20) \
stxvw4x(53, %[cc], %[buf]) \
vand(16, 22, 22) \
addi(%[buf], %[buf], 16) \
\
/* \
* Load loop counter; skip the loop if there is only \
* one block in total (already handled by the boundary \
* conditions). \
*/ \
mtctr(%[num_blocks]) \
bdz(fastexit) \
\
label(loop) \
/* \
* Upon loop entry: \
* v16 counter value for next block \
* v17 current CBC-MAC value \
* v20 encrypted previous block \
*/ \
vxor(17, 17, 20) \
INCR_128(22, 16) \
lxvw4x(52, %[cc], %[buf]) \
BYTESWAP(20) \
BLOCK_ENCRYPT_X2_ ## size(16, 17) \
vxor(20, 20, 16) \
BYTESWAPX(21, 20) \
stxvw4x(53, %[cc], %[buf]) \
addi(%[buf], %[buf], 16) \
vand(16, 22, 22) \
\
bdnz(loop) \
\
label(fastexit) \
vxor(17, 17, 20) \
BLOCK_ENCRYPT_ ## size(17) \
BYTESWAP(16) \
BYTESWAP(17) \
stxvw4x(48, %[cc], %[ctr]) \
stxvw4x(49, %[cc], %[cbcmac]) \
\
: [cc] "+b" (cc), [buf] "+b" (buf) \
: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
BYTESWAP_REG \
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
"v30", "ctr", "memory" \
); \
}
MKENCRYPT(128)
MKENCRYPT(192)
MKENCRYPT(256)
#define MKDECRYPT(size) \
static void \
ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
size_t num_blocks) \
{ \
long cc; \
\
cc = 0; \
asm volatile ( \
\
/* \
* Load subkeys into v0..v10 \
*/ \
LOAD_SUBKEYS_ ## size \
li(%[cc], 0) \
\
BYTESWAP_INIT \
INCR_128_INIT \
\
/* \
* Load current CTR counter into v16, and current \
* CBC-MAC IV into v17. \
*/ \
lxvw4x(48, %[cc], %[ctr]) \
lxvw4x(49, %[cc], %[cbcmac]) \
BYTESWAP(16) \
BYTESWAP(17) \
\
/* \
* At each iteration, we do two parallel encryption: \
* - new counter value for decryption of the next block; \
* - CBC-MAC over the next encrypted block. \
* Each iteration performs the two AES instances related \
* to the current block; there is thus no need for some \
* extra pre-loop and post-loop work as in encryption. \
*/ \
\
mtctr(%[num_blocks]) \
\
label(loop) \
/* \
* Upon loop entry: \
* v16 counter value for next block \
* v17 current CBC-MAC value \
*/ \
lxvw4x(52, %[cc], %[buf]) \
BYTESWAP(20) \
vxor(17, 17, 20) \
INCR_128(22, 16) \
BLOCK_ENCRYPT_X2_ ## size(16, 17) \
vxor(20, 20, 16) \
BYTESWAPX(21, 20) \
stxvw4x(53, %[cc], %[buf]) \
addi(%[buf], %[buf], 16) \
vand(16, 22, 22) \
\
bdnz(loop) \
\
/* \
* Store back counter and CBC-MAC value. \
*/ \
BYTESWAP(16) \
BYTESWAP(17) \
stxvw4x(48, %[cc], %[ctr]) \
stxvw4x(49, %[cc], %[cbcmac]) \
\
: [cc] "+b" (cc), [buf] "+b" (buf) \
: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
BYTESWAP_REG \
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
"v30", "ctr", "memory" \
); \
}
MKDECRYPT(128)
MKDECRYPT(192)
MKDECRYPT(256)
/* see bearssl_block.h */
void
br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
void *ctr, void *cbcmac, void *data, size_t len)
{
if (len == 0) {
return;
}
switch (ctx->num_rounds) {
case 10:
ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
break;
case 12:
ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
break;
default:
ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
break;
}
}
/* see bearssl_block.h */
void
br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
void *ctr, void *cbcmac, void *data, size_t len)
{
if (len == 0) {
return;
}
switch (ctx->num_rounds) {
case 10:
ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
break;
case 12:
ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
break;
default:
ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
break;
}
}
static inline void
incr_ctr(void *dst, const void *src)
{
uint64_t hi, lo;
hi = br_dec64be(src);
lo = br_dec64be((const unsigned char *)src + 8);
lo ++;
hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
br_enc64be(dst, hi);
br_enc64be((unsigned char *)dst + 8, lo);
}
/* see bearssl_block.h */
void
br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
void *ctr, void *data, size_t len)
{
unsigned char ctrbuf[64];
memcpy(ctrbuf, ctr, 16);
incr_ctr(ctrbuf + 16, ctrbuf);
incr_ctr(ctrbuf + 32, ctrbuf + 16);
incr_ctr(ctrbuf + 48, ctrbuf + 32);
if (len >= 64) {
switch (ctx->num_rounds) {
case 10:
ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
break;
case 12:
ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
break;
default:
ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
break;
}
data = (unsigned char *)data + (len & ~(size_t)63);
len &= 63;
}
if (len > 0) {
unsigned char tmp[64];
if (len >= 32) {
if (len >= 48) {
memcpy(ctr, ctrbuf + 48, 16);
} else {
memcpy(ctr, ctrbuf + 32, 16);
}
} else {
if (len >= 16) {
memcpy(ctr, ctrbuf + 16, 16);
}
}
memcpy(tmp, data, len);
memset(tmp + len, 0, (sizeof tmp) - len);
switch (ctx->num_rounds) {
case 10:
ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
break;
case 12:
ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
break;
default:
ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
break;
}
memcpy(data, tmp, len);
} else {
memcpy(ctr, ctrbuf, 16);
}
}
/* see bearssl_block.h */
void
br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
void *cbcmac, const void *data, size_t len)
{
if (len > 0) {
switch (ctx->num_rounds) {
case 10:
cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
break;
case 12:
cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
break;
default:
cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
break;
}
}
}
/* see bearssl_block.h */
const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
sizeof(br_aes_pwr8_ctrcbc_keys),
16,
4,
(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
&br_aes_pwr8_ctrcbc_init,
(void (*)(const br_block_ctrcbc_class *const *,
void *, void *, void *, size_t))
&br_aes_pwr8_ctrcbc_encrypt,
(void (*)(const br_block_ctrcbc_class *const *,
void *, void *, void *, size_t))
&br_aes_pwr8_ctrcbc_decrypt,
(void (*)(const br_block_ctrcbc_class *const *,
void *, void *, size_t))
&br_aes_pwr8_ctrcbc_ctr,
(void (*)(const br_block_ctrcbc_class *const *,
void *, const void *, size_t))
&br_aes_pwr8_ctrcbc_mac
};
#else
/* see bearssl_block.h */
const br_block_ctrcbc_class *
br_aes_pwr8_ctrcbc_get_vtable(void)
{
return NULL;
}
#endif

View File

@ -1,411 +0,0 @@
/*
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define BR_POWER_ASM_MACROS 1
#include "inner.h"
/*
* This is the GHASH implementation that leverages the POWER8 opcodes.
*/
#if BR_POWER8
/*
* Some symbolic names for registers.
* HB0 = 16 bytes of value 0
* HB1 = 16 bytes of value 1
* HB2 = 16 bytes of value 2
* HB6 = 16 bytes of value 6
* HB7 = 16 bytes of value 7
* TT0, TT1 and TT2 are temporaries
*
* BSW holds the pattern for byteswapping 32-bit words; this is set only
* on little-endian systems. XBSW is the same register with the +32 offset
* for access with the VSX opcodes.
*/
#define HB0 0
#define HB1 1
#define HB2 2
#define HB6 3
#define HB7 4
#define TT0 5
#define TT1 6
#define TT2 7
#define BSW 8
#define XBSW 40
/*
* Macro to initialise the constants.
*/
#define INIT \
vxor(HB0, HB0, HB0) \
vspltisb(HB1, 1) \
vspltisb(HB2, 2) \
vspltisb(HB6, 6) \
vspltisb(HB7, 7) \
INIT_BSW
/*
* Fix endianness of a value after reading it or before writing it, if
* necessary.
*/
#if BR_POWER8_LE
#define INIT_BSW lxvw4x(XBSW, 0, %[idx2be])
#define FIX_ENDIAN(xx) vperm(xx, xx, xx, BSW)
#else
#define INIT_BSW
#define FIX_ENDIAN(xx)
#endif
/*
* Left-shift x0:x1 by one bit to the left. This is a corrective action
* needed because GHASH is defined in full little-endian specification,
* while the opcodes use full big-endian convention, so the 255-bit product
* ends up one bit to the right.
*/
#define SL_256(x0, x1) \
vsldoi(TT0, HB0, x1, 1) \
vsl(x0, x0, HB1) \
vsr(TT0, TT0, HB7) \
vsl(x1, x1, HB1) \
vxor(x0, x0, TT0)
/*
* Reduce x0:x1 in GF(2^128), result in xd (register xd may be the same as
* x0 or x1, or a different register). x0 and x1 are modified.
*/
#define REDUCE_F128(xd, x0, x1) \
vxor(x0, x0, x1) \
vsr(TT0, x1, HB1) \
vsr(TT1, x1, HB2) \
vsr(TT2, x1, HB7) \
vxor(x0, x0, TT0) \
vxor(TT1, TT1, TT2) \
vxor(x0, x0, TT1) \
vsldoi(x1, x1, HB0, 15) \
vsl(TT1, x1, HB6) \
vsl(TT2, x1, HB1) \
vxor(x1, TT1, TT2) \
vsr(TT0, x1, HB1) \
vsr(TT1, x1, HB2) \
vsr(TT2, x1, HB7) \
vxor(x0, x0, x1) \
vxor(x0, x0, TT0) \
vxor(TT1, TT1, TT2) \
vxor(xd, x0, TT1)
/* see bearssl_hash.h */
void
br_ghash_pwr8(void *y, const void *h, const void *data, size_t len)
{
const unsigned char *buf1, *buf2;
size_t num4, num1;
unsigned char tmp[64];
long cc0, cc1, cc2, cc3;
#if BR_POWER8_LE
static const uint32_t idx2be[] = {
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
};
#endif
buf1 = data;
/*
* Assembly code requires data into two chunks; first chunk
* must contain a number of blocks which is a multiple of 4.
* Since the processing for the first chunk is faster, we want
* to make it as big as possible.
*
* For the remainder, there are two possibilities:
* -- if the remainder size is a multiple of 16, then use it
* in place;
* -- otherwise, copy it to the tmp[] array and pad it with
* zeros.
*/
num4 = len >> 6;
buf2 = buf1 + (num4 << 6);
len &= 63;
num1 = (len + 15) >> 4;
if ((len & 15) != 0) {
memcpy(tmp, buf2, len);
memset(tmp + len, 0, (num1 << 4) - len);
buf2 = tmp;
}
cc0 = 0;
cc1 = 16;
cc2 = 32;
cc3 = 48;
asm volatile (
INIT
/*
* Load current h (denoted hereafter h1) in v9.
*/
lxvw4x(41, 0, %[h])
FIX_ENDIAN(9)
/*
* Load current y into v28.
*/
lxvw4x(60, 0, %[y])
FIX_ENDIAN(28)
/*
* Split h1 into three registers:
* v17 = h1_1:h1_0
* v18 = 0:h1_0
* v19 = h1_1:0
*/
xxpermdi(49, 41, 41, 2)
vsldoi(18, HB0, 9, 8)
vsldoi(19, 9, HB0, 8)
/*
* If num4 is 0, skip directly to the second chunk.
*/
cmpldi(%[num4], 0)
beq(chunk1)
/*
* Compute h2 = h*h in v10.
*/
vpmsumd(10, 18, 18)
vpmsumd(11, 19, 19)
SL_256(10, 11)
REDUCE_F128(10, 10, 11)
/*
* Compute h3 = h*h*h in v11.
* We first split h2 into:
* v10 = h2_0:h2_1
* v11 = 0:h2_0
* v12 = h2_1:0
* Then we do the product with h1, and reduce into v11.
*/
vsldoi(11, HB0, 10, 8)
vsldoi(12, 10, HB0, 8)
vpmsumd(13, 10, 17)
vpmsumd(11, 11, 18)
vpmsumd(12, 12, 19)
vsldoi(14, HB0, 13, 8)
vsldoi(15, 13, HB0, 8)
vxor(11, 11, 14)
vxor(12, 12, 15)
SL_256(11, 12)
REDUCE_F128(11, 11, 12)
/*
* Compute h4 = h*h*h*h in v12. This is done by squaring h2.
*/
vsldoi(12, HB0, 10, 8)
vsldoi(13, 10, HB0, 8)
vpmsumd(12, 12, 12)
vpmsumd(13, 13, 13)
SL_256(12, 13)
REDUCE_F128(12, 12, 13)
/*
* Repack h1, h2, h3 and h4:
* v13 = h4_0:h3_0
* v14 = h4_1:h3_1
* v15 = h2_0:h1_0
* v16 = h2_1:h1_1
*/
xxpermdi(45, 44, 43, 0)
xxpermdi(46, 44, 43, 3)
xxpermdi(47, 42, 41, 0)
xxpermdi(48, 42, 41, 3)
/*
* Loop for each group of four blocks.
*/
mtctr(%[num4])
label(loop4)
/*
* Read the four next blocks.
* v20 = y + a0 = b0
* v21 = a1 = b1
* v22 = a2 = b2
* v23 = a3 = b3
*/
lxvw4x(52, %[cc0], %[buf1])
lxvw4x(53, %[cc1], %[buf1])
lxvw4x(54, %[cc2], %[buf1])
lxvw4x(55, %[cc3], %[buf1])
FIX_ENDIAN(20)
FIX_ENDIAN(21)
FIX_ENDIAN(22)
FIX_ENDIAN(23)
addi(%[buf1], %[buf1], 64)
vxor(20, 20, 28)
/*
* Repack the blocks into v9, v10, v11 and v12.
* v9 = b0_0:b1_0
* v10 = b0_1:b1_1
* v11 = b2_0:b3_0
* v12 = b2_1:b3_1
*/
xxpermdi(41, 52, 53, 0)
xxpermdi(42, 52, 53, 3)
xxpermdi(43, 54, 55, 0)
xxpermdi(44, 54, 55, 3)
/*
* Compute the products.
* v20 = b0_0*h4_0 + b1_0*h3_0
* v21 = b0_1*h4_0 + b1_1*h3_0
* v22 = b0_0*h4_1 + b1_0*h3_1
* v23 = b0_1*h4_1 + b1_1*h3_1
* v24 = b2_0*h2_0 + b3_0*h1_0
* v25 = b2_1*h2_0 + b3_1*h1_0
* v26 = b2_0*h2_1 + b3_0*h1_1
* v27 = b2_1*h2_1 + b3_1*h1_1
*/
vpmsumd(20, 13, 9)
vpmsumd(21, 13, 10)
vpmsumd(22, 14, 9)
vpmsumd(23, 14, 10)
vpmsumd(24, 15, 11)
vpmsumd(25, 15, 12)
vpmsumd(26, 16, 11)
vpmsumd(27, 16, 12)
/*
* Sum products into a single 256-bit result in v11:v12.
*/
vxor(11, 20, 24)
vxor(12, 23, 27)
vxor( 9, 21, 22)
vxor(10, 25, 26)
vxor(20, 9, 10)
vsldoi( 9, HB0, 20, 8)
vsldoi(10, 20, HB0, 8)
vxor(11, 11, 9)
vxor(12, 12, 10)
/*
* Fix and reduce in GF(2^128); this is the new y (in v28).
*/
SL_256(11, 12)
REDUCE_F128(28, 11, 12)
/*
* Loop for next group of four blocks.
*/
bdnz(loop4)
/*
* Process second chunk, one block at a time.
*/
label(chunk1)
cmpldi(%[num1], 0)
beq(done)
mtctr(%[num1])
label(loop1)
/*
* Load next data block and XOR it into y.
*/
lxvw4x(41, 0, %[buf2])
#if BR_POWER8_LE
FIX_ENDIAN(9)
#endif
addi(%[buf2], %[buf2], 16)
vxor(9, 28, 9)
/*
* Split y into doublewords:
* v9 = y_0:y_1
* v10 = 0:y_0
* v11 = y_1:0
*/
vsldoi(10, HB0, 9, 8)
vsldoi(11, 9, HB0, 8)
/*
* Compute products with h:
* v12 = y_0 * h_0
* v13 = y_1 * h_1
* v14 = y_1 * h_0 + y_0 * h_1
*/
vpmsumd(14, 9, 17)
vpmsumd(12, 10, 18)
vpmsumd(13, 11, 19)
/*
* Propagate v14 into v12:v13 to finalise product.
*/
vsldoi(10, HB0, 14, 8)
vsldoi(11, 14, HB0, 8)
vxor(12, 12, 10)
vxor(13, 13, 11)
/*
* Fix result and reduce into v28 (next value for y).
*/
SL_256(12, 13)
REDUCE_F128(28, 12, 13)
bdnz(loop1)
label(done)
/*
* Write back the new y.
*/
FIX_ENDIAN(28)
stxvw4x(60, 0, %[y])
: [buf1] "+b" (buf1), [buf2] "+b" (buf2)
: [y] "b" (y), [h] "b" (h), [num4] "b" (num4), [num1] "b" (num1),
[cc0] "b" (cc0), [cc1] "b" (cc1), [cc2] "b" (cc2), [cc3] "b" (cc3)
#if BR_POWER8_LE
, [idx2be] "b" (idx2be)
#endif
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
"ctr", "memory"
);
}
/* see bearssl_hash.h */
br_ghash
br_ghash_pwr8_get(void)
{
return &br_ghash_pwr8;
}
#else
/* see bearssl_hash.h */
br_ghash
br_ghash_pwr8_get(void)
{
return 0;
}
#endif

View File

@ -1913,19 +1913,6 @@ unsigned br_aes_x86ni_keysched_enc(unsigned char *skni,
unsigned br_aes_x86ni_keysched_dec(unsigned char *skni,
const void *key, size_t len);
/*
* Test support for AES POWER8 opcodes.
*/
int br_aes_pwr8_supported(void);
/*
* AES key schedule, using POWER8 instructions. This yields the
* subkeys in the encryption direction. Number of rounds is returned.
* Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
*/
unsigned br_aes_pwr8_keysched(unsigned char *skni,
const void *key, size_t len);
/* ==================================================================== */
/*
* RSA.

View File

@ -229,13 +229,6 @@ static const br_config_option config[] = {
1
#else
0
#endif
},
{ "BR_POWER8",
#if BR_POWER8
1
#else
0
#endif
},
{ "BR_RDRAND",

View File

@ -28,7 +28,7 @@
void
br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc)
{
#if BR_AES_X86NI || BR_POWER8
#if BR_AES_X86NI
const br_block_cbcenc_class *ienc;
const br_block_cbcdec_class *idec;
#endif
@ -44,14 +44,6 @@ br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc)
return;
}
#endif
#if BR_POWER8
ienc = br_aes_pwr8_cbcenc_get_vtable();
idec = br_aes_pwr8_cbcdec_get_vtable();
if (ienc != NULL && idec != NULL) {
br_ssl_engine_set_aes_cbc(cc, ienc, idec);
return;
}
#endif
#if BR_64
br_ssl_engine_set_aes_cbc(cc,
&br_aes_ct64_cbcenc_vtable,

View File

@ -28,7 +28,7 @@
void
br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc)
{
#if BR_AES_X86NI || BR_POWER8
#if BR_AES_X86NI
const br_block_ctrcbc_class *ictrcbc;
#endif
@ -46,17 +46,6 @@ br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc)
br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
#endif
}
#elif BR_POWER8
ictrcbc = br_aes_pwr8_ctrcbc_get_vtable();
if (ictrcbc != NULL) {
br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc);
} else {
#if BR_64
br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
#else
br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
#endif
}
#else
#if BR_64
br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);

View File

@ -28,7 +28,7 @@
void
br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
{
#if BR_AES_X86NI || BR_POWER8
#if BR_AES_X86NI
const br_block_ctr_class *ictr;
br_ghash ighash;
#endif
@ -47,17 +47,6 @@ br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
#endif
}
#elif BR_POWER8
ictr = br_aes_pwr8_ctr_get_vtable();
if (ictr != NULL) {
br_ssl_engine_set_aes_ctr(cc, ictr);
} else {
#if BR_64
br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
#else
br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
#endif
}
#else
#if BR_64
br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
@ -72,13 +61,6 @@ br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
return;
}
#endif
#if BR_POWER8
ighash = br_ghash_pwr8_get();
if (ighash != 0) {
br_ssl_engine_set_ghash(cc, ighash);
return;
}
#endif
#if BR_LOMUL
br_ssl_engine_set_ghash(cc, &br_ghash_ctmul32);
#elif BR_64