mirror of https://github.com/ClassiCube/ClassiCube
Tidy up Graphics.h a bit
This commit is contained in:
parent
6dcd418acb
commit
b3e545f202
161
src/Graphics.h
161
src/Graphics.h
|
|
@ -5,7 +5,20 @@
|
|||
CC_BEGIN_HEADER
|
||||
|
||||
/*
|
||||
Abstracts a 3D graphics rendering API
|
||||
SUMMARY:
|
||||
- Provides a low level abstraction a 3D graphics rendering API.
|
||||
- Because of the numerous possible rendering backends, only a small number of
|
||||
functions are provided so that the available functionality behaves the same
|
||||
regardless of the rendering backend being used. (as much as reasonably possible)
|
||||
- Most code using Graphics.h therefore doesn' need to care about the rendering backend being used
|
||||
|
||||
IMPLEMENTATION NOTES:
|
||||
- By default, a reasonable rendering backend is automatically selected in Core.h
|
||||
- The selected rendering backend can be altered in two ways:
|
||||
* explicitly defining CC_GFX_BACKEND in the compilation flags (recommended)
|
||||
* altering DEFAULT_GFX_BACKEND for the platform in Core.h
|
||||
- Rendering backends are implemented in Graphics_GL1.c, Graphics_D3D9.c etc
|
||||
|
||||
Copyright 2014-2023 ClassiCube | Licensed under BSD-3
|
||||
*/
|
||||
struct Bitmap;
|
||||
|
|
@ -18,15 +31,6 @@ typedef enum VertexFormat_ {
|
|||
VERTEX_FORMAT_COLOURED, VERTEX_FORMAT_TEXTURED
|
||||
} VertexFormat;
|
||||
|
||||
typedef enum FogFunc_ {
|
||||
FOG_LINEAR, FOG_EXP, FOG_EXP2
|
||||
} FogFunc;
|
||||
|
||||
typedef enum MatrixType_ {
|
||||
MATRIX_PROJ, /* Projection matrix */
|
||||
MATRIX_VIEW /* Combined model view matrix */
|
||||
} MatrixType;
|
||||
|
||||
#define SIZEOF_VERTEX_COLOURED 16
|
||||
#define SIZEOF_VERTEX_TEXTURED 24
|
||||
|
||||
|
|
@ -88,11 +92,13 @@ extern const cc_string Gfx_LowPerfMessage;
|
|||
#define GFX_MAX_INDICES (65536 / 4 * 6)
|
||||
#define GFX_MAX_VERTICES 65536
|
||||
|
||||
typedef enum GfxBuffers_ {
|
||||
GFX_BUFFER_COLOR = 1,
|
||||
GFX_BUFFER_DEPTH = 2
|
||||
} GfxBuffers;
|
||||
void Gfx_RecreateTexture(GfxResourceID* tex, struct Bitmap* bmp, cc_uint8 flags, cc_bool mipmaps);
|
||||
void* Gfx_RecreateAndLockVb(GfxResourceID* vb, VertexFormat fmt, int count);
|
||||
|
||||
|
||||
/*########################################################################################################################*
|
||||
*---------------------------------------------------------Textures--------------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
/* Texture should persist across gfx context loss (if backend supports ManagedTextures) */
|
||||
#define TEXTURE_FLAG_MANAGED 0x01
|
||||
/* Texture should allow updating via Gfx_UpdateTexture */
|
||||
|
|
@ -104,9 +110,6 @@ typedef enum GfxBuffers_ {
|
|||
/* Texture should be rendered using bilinear filtering if possible */
|
||||
#define TEXTURE_FLAG_BILINEAR 0x10
|
||||
|
||||
void Gfx_RecreateTexture(GfxResourceID* tex, struct Bitmap* bmp, cc_uint8 flags, cc_bool mipmaps);
|
||||
void* Gfx_RecreateAndLockVb(GfxResourceID* vb, VertexFormat fmt, int count);
|
||||
|
||||
cc_bool Gfx_CheckTextureSize(int width, int height, cc_uint8 flags);
|
||||
/* Creates a new texture. (and also generates mipmaps if mipmaps) */
|
||||
/* See TEXTURE_FLAG values for supported flags */
|
||||
|
|
@ -134,6 +137,46 @@ CC_API void Gfx_EnableMipmaps(void);
|
|||
/* NOTE: You must have created textures with mipmaps true for this to work */
|
||||
CC_API void Gfx_DisableMipmaps(void);
|
||||
|
||||
|
||||
/*########################################################################################################################*
|
||||
*------------------------------------------------------Frame management---------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
typedef enum GfxBuffers_ {
|
||||
GFX_BUFFER_COLOR = 1,
|
||||
GFX_BUFFER_DEPTH = 2
|
||||
} GfxBuffers;
|
||||
|
||||
/* Clears the given rendering buffer(s) to their default values. */
|
||||
/* buffers can be either GFX_BUFFER_COLOR or GFX_BUFFER_DEPTH, or both */
|
||||
CC_API void Gfx_ClearBuffers(GfxBuffers buffers);
|
||||
/* Sets the default colour that the colour buffer is cleared to */
|
||||
CC_API void Gfx_ClearColor(PackedCol color);
|
||||
|
||||
/* Sets up state for rendering a new frame */
|
||||
void Gfx_BeginFrame(void);
|
||||
/* Finishes rendering a frame, and swaps it with the back buffer */
|
||||
void Gfx_EndFrame(void);
|
||||
/* Sets whether to synchronise with monitor refresh to avoid tearing */
|
||||
/* NOTE: VSync setting may be unsupported or just ignored */
|
||||
void Gfx_SetVSync(cc_bool vsync);
|
||||
|
||||
enum Screen3DS { TOP_SCREEN, BOTTOM_SCREEN };
|
||||
#ifdef CC_BUILD_DUALSCREEN
|
||||
/* Selects which screen/display to render to */
|
||||
void Gfx_3DS_SetRenderScreen(enum Screen3DS screen);
|
||||
#else
|
||||
/* Selects which screen/display to render to */
|
||||
static CC_INLINE void Gfx_3DS_SetRenderScreen(enum Screen3DS screen) { }
|
||||
#endif
|
||||
|
||||
|
||||
/*########################################################################################################################*
|
||||
*---------------------------------------------------------Fog state-------------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
typedef enum FogFunc_ {
|
||||
FOG_LINEAR, FOG_EXP, FOG_EXP2
|
||||
} FogFunc;
|
||||
|
||||
/* Returns whether fog blending is enabled */
|
||||
CC_API cc_bool Gfx_GetFog(void);
|
||||
/* Sets whether fog blending is enabled */
|
||||
|
|
@ -147,6 +190,10 @@ CC_API void Gfx_SetFogEnd(float value);
|
|||
/* Sets in what way fog is blended */
|
||||
CC_API void Gfx_SetFogMode(FogFunc func);
|
||||
|
||||
|
||||
/*########################################################################################################################*
|
||||
*-----------------------------------------------------State management----------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
/* Sets whether backface culling is performed */
|
||||
CC_API void Gfx_SetFaceCulling(cc_bool enabled);
|
||||
/* Sets whether pixels with an alpha of less than 128 are discarded */
|
||||
|
|
@ -156,11 +203,6 @@ CC_API void Gfx_SetAlphaBlending(cc_bool enabled);
|
|||
/* Sets whether blending between the alpha components of texture and vertex colour is performed */
|
||||
CC_API void Gfx_SetAlphaArgBlend(cc_bool enabled);
|
||||
|
||||
/* Clears the given rendering buffer(s) to default. */
|
||||
/* buffers can be either GFX_BUFFER_COLOR or GFX_BUFFER_DEPTH, or both */
|
||||
CC_API void Gfx_ClearBuffers(GfxBuffers buffers);
|
||||
/* Sets the colour that the colour buffer is cleared to */
|
||||
CC_API void Gfx_ClearColor(PackedCol color);
|
||||
/* Sets whether pixels may be discard based on z/depth */
|
||||
CC_API void Gfx_SetDepthTest(cc_bool enabled);
|
||||
/* Sets whether z/depth of pixels is actually written to the depth buffer */
|
||||
|
|
@ -171,11 +213,10 @@ CC_API void Gfx_SetColorWrite(cc_bool r, cc_bool g, cc_bool b, cc_bool a);
|
|||
/* NOTE: Implicitly calls Gfx_SetColorWrite */
|
||||
CC_API void Gfx_DepthOnlyRendering(cc_bool depthOnly);
|
||||
|
||||
/* Anaglyph 3D rendering support */
|
||||
void Gfx_Set3DLeft( struct Matrix* proj, struct Matrix* view);
|
||||
void Gfx_Set3DRight(struct Matrix* proj, struct Matrix* view);
|
||||
void Gfx_End3D( struct Matrix* proj, struct Matrix* view);
|
||||
|
||||
/*########################################################################################################################*
|
||||
*------------------------------------------------------Index buffers-----------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
/* Callback function to initialise/fill out the contents of an index buffer */
|
||||
typedef void (*Gfx_FillIBFunc)(cc_uint16* indices, int count, void* obj);
|
||||
/* Creates a new index buffer and fills out its contents */
|
||||
|
|
@ -185,6 +226,10 @@ CC_API void Gfx_BindIb(GfxResourceID ib);
|
|||
/* Deletes the given index buffer, then sets it to 0 */
|
||||
CC_API void Gfx_DeleteIb(GfxResourceID* ib);
|
||||
|
||||
|
||||
/*########################################################################################################################*
|
||||
*------------------------------------------------------Vertex buffers-----------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
/* Creates a new vertex buffer */
|
||||
CC_API GfxResourceID Gfx_CreateVb(VertexFormat fmt, int count);
|
||||
/* Sets the currently active vertex buffer */
|
||||
|
|
@ -222,6 +267,10 @@ CC_API void Gfx_UnlockDynamicVb(GfxResourceID vb);
|
|||
/* Updates the data of a dynamic vertex buffer */
|
||||
CC_API void Gfx_SetDynamicVbData(GfxResourceID vb, void* vertices, int vCount);
|
||||
|
||||
|
||||
/*########################################################################################################################*
|
||||
*------------------------------------------------------Vertex drawing-----------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
/* Sets the format of the rendered vertices */
|
||||
CC_API void Gfx_SetVertexFormat(VertexFormat fmt);
|
||||
/* Renders vertices from the currently bound vertex buffer as lines */
|
||||
|
|
@ -234,9 +283,20 @@ CC_API void Gfx_DrawVb_IndexedTris(int verticesCount);
|
|||
/* Special case Gfx_DrawVb_IndexedTris_Range for map renderer */
|
||||
void Gfx_DrawIndexedTris_T2fC4b(int verticesCount, int startVertex);
|
||||
|
||||
/* Loads the given matrix over the currently active matrix */
|
||||
|
||||
/*########################################################################################################################*
|
||||
*-----------------------------------------------------Vertex transform----------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
typedef enum MatrixType_ {
|
||||
MATRIX_PROJ, /* Projection matrix */
|
||||
MATRIX_VIEW /* Combined model view matrix */
|
||||
} MatrixType;
|
||||
|
||||
/* Sets the currently active matrix projection or modelview matrix */
|
||||
CC_API void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix);
|
||||
/* Sets the texture U/V translation (not normally used) */
|
||||
CC_API void Gfx_EnableTextureOffset(float x, float y);
|
||||
/* Disables texture U/V translation */
|
||||
CC_API void Gfx_DisableTextureOffset(void);
|
||||
/* Loads given modelview and projection matrices, then calculates the combined MVP matrix */
|
||||
void Gfx_LoadMVP(const struct Matrix* view, const struct Matrix* proj, struct Matrix* mvp);
|
||||
|
|
@ -248,19 +308,25 @@ void Gfx_CalcPerspectiveMatrix(struct Matrix* matrix, float fov, float aspect, f
|
|||
/* NOTE: Projection matrix calculation is here because it can depend the graphics backend */
|
||||
/* (e.g. OpenGL uses a Z clip space range of [-1, 1], whereas Direct3D9 uses [0, 1]) */
|
||||
|
||||
/* Sets the region where transformed vertices are drawn in */
|
||||
/* By default this region has origin 0,0 and size is window width/height */
|
||||
/* This region should normally be the same as the scissor region */
|
||||
CC_API void Gfx_SetViewport(int x, int y, int w, int h);
|
||||
/* Sets the region where pixels can be drawn in (pixels outside this region are discarded) */
|
||||
/* By default this region has origin 0,0 and size is window width/height */
|
||||
/* This region should normally be the same as the viewport region */
|
||||
CC_API void Gfx_SetScissor (int x, int y, int w, int h);
|
||||
|
||||
|
||||
/*########################################################################################################################*
|
||||
*------------------------------------------------------Misc utilities-----------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
/* Outputs a .png screenshot of the backbuffer */
|
||||
cc_result Gfx_TakeScreenshot(struct Stream* output);
|
||||
/* Warns in chat if the backend has problems with the user's GPU */
|
||||
/* Returns whether legacy rendering mode for borders/sky/clouds is needed */
|
||||
cc_bool Gfx_WarnIfNecessary(void);
|
||||
cc_bool Gfx_GetUIOptions(struct MenuOptionsScreen* s);
|
||||
/* Sets up state for rendering a new frame */
|
||||
void Gfx_BeginFrame(void);
|
||||
/* Finishes rendering a frame, and swaps it with the back buffer */
|
||||
void Gfx_EndFrame(void);
|
||||
/* Sets whether to synchronise with monitor refresh to avoid tearing */
|
||||
/* NOTE: VSync setting may be unsupported or just ignored */
|
||||
void Gfx_SetVSync(cc_bool vsync);
|
||||
/* Gets information about the user's GPU and current backend state */
|
||||
/* Backend state may include depth buffer bits, free memory, etc */
|
||||
/* NOTE: Each line is separated by \n */
|
||||
|
|
@ -269,16 +335,11 @@ void Gfx_GetApiInfo(cc_string* info);
|
|||
/* Updates state when the window's dimensions have changed */
|
||||
/* NOTE: This may require recreating the context depending on the backend */
|
||||
void Gfx_OnWindowResize(void);
|
||||
CC_API void Gfx_SetViewport(int x, int y, int w, int h);
|
||||
CC_API void Gfx_SetScissor (int x, int y, int w, int h);
|
||||
|
||||
enum Screen3DS { TOP_SCREEN, BOTTOM_SCREEN };
|
||||
#ifdef CC_BUILD_DUALSCREEN
|
||||
/* Selects which screen on the 3DS to render to */
|
||||
void Gfx_3DS_SetRenderScreen(enum Screen3DS screen);
|
||||
#else
|
||||
static CC_INLINE void Gfx_3DS_SetRenderScreen(enum Screen3DS screen) { }
|
||||
#endif
|
||||
/* Anaglyph 3D rendering support */
|
||||
void Gfx_Set3DLeft( struct Matrix* proj, struct Matrix* view);
|
||||
void Gfx_Set3DRight(struct Matrix* proj, struct Matrix* view);
|
||||
void Gfx_End3D( struct Matrix* proj, struct Matrix* view);
|
||||
|
||||
/* Raises ContextLost event and updates state for lost contexts */
|
||||
void Gfx_LoseContext(const char* reason);
|
||||
|
|
@ -287,6 +348,15 @@ void Gfx_RecreateContext(void);
|
|||
/* Attempts to restore a lost context */
|
||||
cc_bool Gfx_TryRestoreContext(void);
|
||||
|
||||
/* Sets appropriate alpha test/blending for given block draw type */
|
||||
void Gfx_SetupAlphaState(cc_uint8 draw);
|
||||
/* Undoes changes to alpha test/blending state by Gfx_SetupAlphaState */
|
||||
void Gfx_RestoreAlphaState(cc_uint8 draw);
|
||||
|
||||
|
||||
/*########################################################################################################################*
|
||||
*------------------------------------------------------2D rendering------------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
/* Renders a 2D flat coloured rectangle */
|
||||
void Gfx_Draw2DFlat(int x, int y, int width, int height, PackedCol color);
|
||||
/* Renders a 2D flat vertical gradient rectangle */
|
||||
|
|
@ -303,11 +373,6 @@ void Gfx_Begin2D(int width, int height);
|
|||
/* NOTE: This means restoring fog/depth test, restoring matrices, etc */
|
||||
void Gfx_End2D(void);
|
||||
|
||||
/* Sets appropriate alpha test/blending for given block draw type */
|
||||
void Gfx_SetupAlphaState(cc_uint8 draw);
|
||||
/* Undoes changes to alpha test/blending state by Gfx_SetupAlphaState */
|
||||
void Gfx_RestoreAlphaState(cc_uint8 draw);
|
||||
|
||||
/* Statically initialises the position and dimensions of this texture */
|
||||
#define Tex_Rect(x,y, width,height) x,y,width,height
|
||||
/* Statically initialises the texture coordinate corners of this texture */
|
||||
|
|
|
|||
|
|
@ -299,7 +299,6 @@ extern "C" {
|
|||
* | aes_ct | AES | 16 | 16, 24 and 32 |
|
||||
* | aes_ct64 | AES | 16 | 16, 24 and 32 |
|
||||
* | aes_x86ni | AES | 16 | 16, 24 and 32 |
|
||||
* | aes_pwr8 | AES | 16 | 16, 24 and 32 |
|
||||
* | des_ct | DES/3DES | 8 | 8, 16 and 24 |
|
||||
* | des_tab | DES/3DES | 8 | 8, 16 and 24 |
|
||||
*
|
||||
|
|
@ -335,10 +334,6 @@ extern "C" {
|
|||
* `aes_x86ni` exists only on x86 architectures (32-bit and 64-bit). It
|
||||
* uses the AES-NI opcodes when available.
|
||||
*
|
||||
* `aes_pwr8` exists only on PowerPC / POWER architectures (32-bit and
|
||||
* 64-bit, both little-endian and big-endian). It uses the AES opcodes
|
||||
* present in POWER8 and later.
|
||||
*
|
||||
* `des_tab` is a classic, table-based implementation of DES/3DES. It
|
||||
* is not constant-time.
|
||||
*
|
||||
|
|
@ -1860,296 +1855,6 @@ const br_block_ctr_class *br_aes_x86ni_ctr_get_vtable(void);
|
|||
*/
|
||||
const br_block_ctrcbc_class *br_aes_x86ni_ctrcbc_get_vtable(void);
|
||||
|
||||
/*
|
||||
* AES implementation using POWER8 opcodes.
|
||||
*/
|
||||
|
||||
/** \brief AES block size (16 bytes). */
|
||||
#define br_aes_pwr8_BLOCK_SIZE 16
|
||||
|
||||
/**
|
||||
* \brief Context for AES subkeys (`aes_pwr8` implementation, CBC encryption).
|
||||
*
|
||||
* First field is a pointer to the vtable; it is set by the initialisation
|
||||
* function. Other fields are not supposed to be accessed by user code.
|
||||
*/
|
||||
typedef struct {
|
||||
/** \brief Pointer to vtable for this context. */
|
||||
const br_block_cbcenc_class *vtable;
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
union {
|
||||
unsigned char skni[16 * 15];
|
||||
} skey;
|
||||
unsigned num_rounds;
|
||||
#endif
|
||||
} br_aes_pwr8_cbcenc_keys;
|
||||
|
||||
/**
|
||||
* \brief Context for AES subkeys (`aes_pwr8` implementation, CBC decryption).
|
||||
*
|
||||
* First field is a pointer to the vtable; it is set by the initialisation
|
||||
* function. Other fields are not supposed to be accessed by user code.
|
||||
*/
|
||||
typedef struct {
|
||||
/** \brief Pointer to vtable for this context. */
|
||||
const br_block_cbcdec_class *vtable;
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
union {
|
||||
unsigned char skni[16 * 15];
|
||||
} skey;
|
||||
unsigned num_rounds;
|
||||
#endif
|
||||
} br_aes_pwr8_cbcdec_keys;
|
||||
|
||||
/**
|
||||
* \brief Context for AES subkeys (`aes_pwr8` implementation, CTR encryption
|
||||
* and decryption).
|
||||
*
|
||||
* First field is a pointer to the vtable; it is set by the initialisation
|
||||
* function. Other fields are not supposed to be accessed by user code.
|
||||
*/
|
||||
typedef struct {
|
||||
/** \brief Pointer to vtable for this context. */
|
||||
const br_block_ctr_class *vtable;
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
union {
|
||||
unsigned char skni[16 * 15];
|
||||
} skey;
|
||||
unsigned num_rounds;
|
||||
#endif
|
||||
} br_aes_pwr8_ctr_keys;
|
||||
|
||||
/**
|
||||
* \brief Context for AES subkeys (`aes_pwr8` implementation, CTR encryption
|
||||
* and decryption + CBC-MAC).
|
||||
*
|
||||
* First field is a pointer to the vtable; it is set by the initialisation
|
||||
* function. Other fields are not supposed to be accessed by user code.
|
||||
*/
|
||||
typedef struct {
|
||||
/** \brief Pointer to vtable for this context. */
|
||||
const br_block_ctrcbc_class *vtable;
|
||||
#ifndef BR_DOXYGEN_IGNORE
|
||||
union {
|
||||
unsigned char skni[16 * 15];
|
||||
} skey;
|
||||
unsigned num_rounds;
|
||||
#endif
|
||||
} br_aes_pwr8_ctrcbc_keys;
|
||||
|
||||
/**
|
||||
* \brief Class instance for AES CBC encryption (`aes_pwr8` implementation).
|
||||
*
|
||||
* Since this implementation might be omitted from the library, or the
|
||||
* AES opcode unavailable on the current CPU, a pointer to this class
|
||||
* instance should be obtained through `br_aes_pwr8_cbcenc_get_vtable()`.
|
||||
*/
|
||||
extern const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable;
|
||||
|
||||
/**
|
||||
* \brief Class instance for AES CBC decryption (`aes_pwr8` implementation).
|
||||
*
|
||||
* Since this implementation might be omitted from the library, or the
|
||||
* AES opcode unavailable on the current CPU, a pointer to this class
|
||||
* instance should be obtained through `br_aes_pwr8_cbcdec_get_vtable()`.
|
||||
*/
|
||||
extern const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable;
|
||||
|
||||
/**
|
||||
* \brief Class instance for AES CTR encryption and decryption
|
||||
* (`aes_pwr8` implementation).
|
||||
*
|
||||
* Since this implementation might be omitted from the library, or the
|
||||
* AES opcode unavailable on the current CPU, a pointer to this class
|
||||
* instance should be obtained through `br_aes_pwr8_ctr_get_vtable()`.
|
||||
*/
|
||||
extern const br_block_ctr_class br_aes_pwr8_ctr_vtable;
|
||||
|
||||
/**
|
||||
* \brief Class instance for AES CTR encryption/decryption + CBC-MAC
|
||||
* (`aes_pwr8` implementation).
|
||||
*
|
||||
* Since this implementation might be omitted from the library, or the
|
||||
* AES opcode unavailable on the current CPU, a pointer to this class
|
||||
* instance should be obtained through `br_aes_pwr8_ctrcbc_get_vtable()`.
|
||||
*/
|
||||
extern const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable;
|
||||
|
||||
/**
|
||||
* \brief Context initialisation (key schedule) for AES CBC encryption
|
||||
* (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context to initialise.
|
||||
* \param key secret key.
|
||||
* \param len secret key length (in bytes).
|
||||
*/
|
||||
void br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
|
||||
const void *key, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Context initialisation (key schedule) for AES CBC decryption
|
||||
* (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context to initialise.
|
||||
* \param key secret key.
|
||||
* \param len secret key length (in bytes).
|
||||
*/
|
||||
void br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
|
||||
const void *key, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Context initialisation (key schedule) for AES CTR encryption
|
||||
* and decryption (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context to initialise.
|
||||
* \param key secret key.
|
||||
* \param len secret key length (in bytes).
|
||||
*/
|
||||
void br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
|
||||
const void *key, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Context initialisation (key schedule) for AES CTR + CBC-MAC
|
||||
* (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context to initialise.
|
||||
* \param key secret key.
|
||||
* \param len secret key length (in bytes).
|
||||
*/
|
||||
void br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
const void *key, size_t len);
|
||||
|
||||
/**
|
||||
* \brief CBC encryption with AES (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context (already initialised).
|
||||
* \param iv IV (updated).
|
||||
* \param data data to encrypt (updated).
|
||||
* \param len data length (in bytes, MUST be multiple of 16).
|
||||
*/
|
||||
void br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx, void *iv,
|
||||
void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief CBC decryption with AES (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context (already initialised).
|
||||
* \param iv IV (updated).
|
||||
* \param data data to decrypt (updated).
|
||||
* \param len data length (in bytes, MUST be multiple of 16).
|
||||
*/
|
||||
void br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx, void *iv,
|
||||
void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief CTR encryption and decryption with AES (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context (already initialised).
|
||||
* \param iv IV (constant, 12 bytes).
|
||||
* \param cc initial block counter value.
|
||||
* \param data data to decrypt (updated).
|
||||
* \param len data length (in bytes).
|
||||
* \return new block counter value.
|
||||
*/
|
||||
uint32_t br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
|
||||
const void *iv, uint32_t cc, void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief CTR encryption + CBC-MAC with AES (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context (already initialised).
|
||||
* \param ctr counter for CTR (16 bytes, updated).
|
||||
* \param cbcmac IV for CBC-MAC (updated).
|
||||
* \param data data to encrypt (updated).
|
||||
* \param len data length (in bytes, MUST be a multiple of 16).
|
||||
*/
|
||||
void br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief CTR decryption + CBC-MAC with AES (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context (already initialised).
|
||||
* \param ctr counter for CTR (16 bytes, updated).
|
||||
* \param cbcmac IV for CBC-MAC (updated).
|
||||
* \param data data to decrypt (updated).
|
||||
* \param len data length (in bytes, MUST be a multiple of 16).
|
||||
*/
|
||||
void br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief CTR encryption/decryption with AES (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context (already initialised).
|
||||
* \param ctr counter for CTR (16 bytes, updated).
|
||||
* \param data data to MAC (updated).
|
||||
* \param len data length (in bytes, MUST be a multiple of 16).
|
||||
*/
|
||||
void br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief CBC-MAC with AES (`aes_pwr8` implementation).
|
||||
*
|
||||
* \param ctx context (already initialised).
|
||||
* \param cbcmac IV for CBC-MAC (updated).
|
||||
* \param data data to MAC (unmodified).
|
||||
* \param len data length (in bytes, MUST be a multiple of 16).
|
||||
*/
|
||||
void br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *cbcmac, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Obtain the `aes_pwr8` AES-CBC (encryption) implementation, if
|
||||
* available.
|
||||
*
|
||||
* This function returns a pointer to `br_aes_pwr8_cbcenc_vtable`, if
|
||||
* that implementation was compiled in the library _and_ the POWER8
|
||||
* crypto opcodes are available on the currently running CPU. If either
|
||||
* of these conditions is not met, then this function returns `NULL`.
|
||||
*
|
||||
* \return the `aes_pwr8` AES-CBC (encryption) implementation, or `NULL`.
|
||||
*/
|
||||
const br_block_cbcenc_class *br_aes_pwr8_cbcenc_get_vtable(void);
|
||||
|
||||
/**
|
||||
* \brief Obtain the `aes_pwr8` AES-CBC (decryption) implementation, if
|
||||
* available.
|
||||
*
|
||||
* This function returns a pointer to `br_aes_pwr8_cbcdec_vtable`, if
|
||||
* that implementation was compiled in the library _and_ the POWER8
|
||||
* crypto opcodes are available on the currently running CPU. If either
|
||||
* of these conditions is not met, then this function returns `NULL`.
|
||||
*
|
||||
* \return the `aes_pwr8` AES-CBC (decryption) implementation, or `NULL`.
|
||||
*/
|
||||
const br_block_cbcdec_class *br_aes_pwr8_cbcdec_get_vtable(void);
|
||||
|
||||
/**
|
||||
* \brief Obtain the `aes_pwr8` AES-CTR implementation, if available.
|
||||
*
|
||||
* This function returns a pointer to `br_aes_pwr8_ctr_vtable`, if that
|
||||
* implementation was compiled in the library _and_ the POWER8 crypto
|
||||
* opcodes are available on the currently running CPU. If either of
|
||||
* these conditions is not met, then this function returns `NULL`.
|
||||
*
|
||||
* \return the `aes_pwr8` AES-CTR implementation, or `NULL`.
|
||||
*/
|
||||
const br_block_ctr_class *br_aes_pwr8_ctr_get_vtable(void);
|
||||
|
||||
/**
|
||||
* \brief Obtain the `aes_pwr8` AES-CTR + CBC-MAC implementation, if
|
||||
* available.
|
||||
*
|
||||
* This function returns a pointer to `br_aes_pwr8_ctrcbc_vtable`, if
|
||||
* that implementation was compiled in the library _and_ the POWER8 AES
|
||||
* opcodes are available on the currently running CPU. If either of
|
||||
* these conditions is not met, then this function returns `NULL`.
|
||||
*
|
||||
* \return the `aes_pwr8` AES-CTR implementation, or `NULL`.
|
||||
*/
|
||||
const br_block_ctrcbc_class *br_aes_pwr8_ctrcbc_get_vtable(void);
|
||||
|
||||
/**
|
||||
* \brief Aggregate structure large enough to be used as context for
|
||||
|
|
@ -2162,7 +1867,6 @@ typedef union {
|
|||
br_aes_ct_cbcenc_keys c_ct;
|
||||
br_aes_ct64_cbcenc_keys c_ct64;
|
||||
br_aes_x86ni_cbcenc_keys c_x86ni;
|
||||
br_aes_pwr8_cbcenc_keys c_pwr8;
|
||||
} br_aes_gen_cbcenc_keys;
|
||||
|
||||
/**
|
||||
|
|
@ -2176,7 +1880,6 @@ typedef union {
|
|||
br_aes_ct_cbcdec_keys c_ct;
|
||||
br_aes_ct64_cbcdec_keys c_ct64;
|
||||
br_aes_x86ni_cbcdec_keys c_x86ni;
|
||||
br_aes_pwr8_cbcdec_keys c_pwr8;
|
||||
} br_aes_gen_cbcdec_keys;
|
||||
|
||||
/**
|
||||
|
|
@ -2190,7 +1893,6 @@ typedef union {
|
|||
br_aes_ct_ctr_keys c_ct;
|
||||
br_aes_ct64_ctr_keys c_ct64;
|
||||
br_aes_x86ni_ctr_keys c_x86ni;
|
||||
br_aes_pwr8_ctr_keys c_pwr8;
|
||||
} br_aes_gen_ctr_keys;
|
||||
|
||||
/**
|
||||
|
|
@ -2204,7 +1906,6 @@ typedef union {
|
|||
br_aes_ct_ctrcbc_keys c_ct;
|
||||
br_aes_ct64_ctrcbc_keys c_ct64;
|
||||
br_aes_x86ni_ctrcbc_keys c_x86ni;
|
||||
br_aes_pwr8_ctrcbc_keys c_pwr8;
|
||||
} br_aes_gen_ctrcbc_keys;
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -1313,32 +1313,6 @@ void br_ghash_pclmul(void *y, const void *h, const void *data, size_t len);
|
|||
*/
|
||||
br_ghash br_ghash_pclmul_get(void);
|
||||
|
||||
/**
|
||||
* \brief GHASH implementation using the POWER8 opcodes.
|
||||
*
|
||||
* This implementation is available only on POWER8 platforms (and later).
|
||||
* To safely obtain a pointer to this function when supported (or 0
|
||||
* otherwise), use `br_ghash_pwr8_get()`.
|
||||
*
|
||||
* \param y the array to update.
|
||||
* \param h the GHASH key.
|
||||
* \param data the input data (may be `NULL` if `len` is zero).
|
||||
* \param len the input data length (in bytes).
|
||||
*/
|
||||
void br_ghash_pwr8(void *y, const void *h, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* \brief Obtain the `pwr8` GHASH implementation, if available.
|
||||
*
|
||||
* If the `pwr8` implementation was compiled in the library (depending
|
||||
* on the compiler abilities) _and_ the local CPU appears to support the
|
||||
* opcode, then this function will return a pointer to the
|
||||
* `br_ghash_pwr8()` function. Otherwise, it will return `0`.
|
||||
*
|
||||
* \return the `pwr8` GHASH implementation, or `0`.
|
||||
*/
|
||||
br_ghash br_ghash_pwr8_get(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,445 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* This code contains the AES key schedule implementation using the
|
||||
* POWER8 opcodes.
|
||||
*/
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
static void
|
||||
key_schedule_128(unsigned char *sk, const unsigned char *key)
|
||||
{
|
||||
long cc;
|
||||
|
||||
static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B };
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
|
||||
/*
|
||||
* We use the VSX instructions for loading and storing the
|
||||
* key/subkeys, since they support unaligned accesses. The rest
|
||||
* of the computation is VMX only. VMX register 0 is VSX
|
||||
* register 32.
|
||||
*/
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* v0 = all-zero word
|
||||
* v1 = constant -8 / +8, copied into four words
|
||||
* v2 = current subkey
|
||||
* v3 = Rcon (x4 words)
|
||||
* v6 = constant 8, copied into four words
|
||||
* v7 = constant 0x11B, copied into four words
|
||||
* v8 = constant for byteswapping words
|
||||
*/
|
||||
vspltisw(0, 0)
|
||||
#if BR_POWER8_LE
|
||||
vspltisw(1, -8)
|
||||
#else
|
||||
vspltisw(1, 8)
|
||||
#endif
|
||||
lxvw4x(34, 0, %[key])
|
||||
vspltisw(3, 1)
|
||||
vspltisw(6, 8)
|
||||
lxvw4x(39, 0, %[fmod])
|
||||
#if BR_POWER8_LE
|
||||
lxvw4x(40, 0, %[idx2be])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* First subkey is a copy of the key itself.
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(4, 2, 2, 8)
|
||||
stxvw4x(36, 0, %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Loop must run 10 times.
|
||||
*/
|
||||
li(%[cc], 10)
|
||||
mtctr(%[cc])
|
||||
label(loop)
|
||||
/* Increment subkey address */
|
||||
addi(%[sk], %[sk], 16)
|
||||
|
||||
/* Compute SubWord(RotWord(temp)) xor Rcon (into v4, splat) */
|
||||
vrlw(4, 2, 1)
|
||||
vsbox(4, 4)
|
||||
#if BR_POWER8_LE
|
||||
vxor(4, 4, 3)
|
||||
#else
|
||||
vsldoi(5, 3, 0, 3)
|
||||
vxor(4, 4, 5)
|
||||
#endif
|
||||
vspltw(4, 4, 3)
|
||||
|
||||
/* XOR words for next subkey */
|
||||
vsldoi(5, 0, 2, 12)
|
||||
vxor(2, 2, 5)
|
||||
vsldoi(5, 0, 2, 12)
|
||||
vxor(2, 2, 5)
|
||||
vsldoi(5, 0, 2, 12)
|
||||
vxor(2, 2, 5)
|
||||
vxor(2, 2, 4)
|
||||
|
||||
/* Store next subkey */
|
||||
#if BR_POWER8_LE
|
||||
vperm(4, 2, 2, 8)
|
||||
stxvw4x(36, 0, %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
#endif
|
||||
|
||||
/* Update Rcon */
|
||||
vadduwm(3, 3, 3)
|
||||
vsrw(4, 3, 6)
|
||||
vsubuwm(4, 0, 4)
|
||||
vand(4, 4, 7)
|
||||
vxor(3, 3, 4)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [sk] "+b" (sk), [cc] "+b" (cc)
|
||||
: [key] "b" (key), [fmod] "b" (fmod)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
key_schedule_192(unsigned char *sk, const unsigned char *key)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
|
||||
/*
|
||||
* We use the VSX instructions for loading and storing the
|
||||
* key/subkeys, since they support unaligned accesses. The rest
|
||||
* of the computation is VMX only. VMX register 0 is VSX
|
||||
* register 32.
|
||||
*/
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* v0 = all-zero word
|
||||
* v1 = constant -8 / +8, copied into four words
|
||||
* v2, v3 = current subkey
|
||||
* v5 = Rcon (x4 words) (already shifted on big-endian)
|
||||
* v6 = constant 8, copied into four words
|
||||
* v8 = constant for byteswapping words
|
||||
*
|
||||
* The left two words of v3 are ignored.
|
||||
*/
|
||||
vspltisw(0, 0)
|
||||
#if BR_POWER8_LE
|
||||
vspltisw(1, -8)
|
||||
#else
|
||||
vspltisw(1, 8)
|
||||
#endif
|
||||
li(%[cc], 8)
|
||||
lxvw4x(34, 0, %[key])
|
||||
lxvw4x(35, %[cc], %[key])
|
||||
vsldoi(3, 3, 0, 8)
|
||||
vspltisw(5, 1)
|
||||
#if !BR_POWER8_LE
|
||||
vsldoi(5, 5, 0, 3)
|
||||
#endif
|
||||
vspltisw(6, 8)
|
||||
#if BR_POWER8_LE
|
||||
lxvw4x(40, 0, %[idx2be])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Loop must run 8 times. Each iteration produces 256
|
||||
* bits of subkeys, with a 64-bit overlap.
|
||||
*/
|
||||
li(%[cc], 8)
|
||||
mtctr(%[cc])
|
||||
li(%[cc], 16)
|
||||
label(loop)
|
||||
|
||||
/*
|
||||
* Last 6 words in v2:v3l. Compute next 6 words into
|
||||
* v3r:v4.
|
||||
*/
|
||||
vrlw(10, 3, 1)
|
||||
vsbox(10, 10)
|
||||
vxor(10, 10, 5)
|
||||
vspltw(10, 10, 1)
|
||||
vsldoi(11, 0, 10, 8)
|
||||
|
||||
vsldoi(12, 0, 2, 12)
|
||||
vxor(12, 2, 12)
|
||||
vsldoi(13, 0, 12, 12)
|
||||
vxor(12, 12, 13)
|
||||
vsldoi(13, 0, 12, 12)
|
||||
vxor(12, 12, 13)
|
||||
|
||||
vspltw(13, 12, 3)
|
||||
vxor(13, 13, 3)
|
||||
vsldoi(14, 0, 3, 12)
|
||||
vxor(13, 13, 14)
|
||||
|
||||
vsldoi(4, 12, 13, 8)
|
||||
vsldoi(14, 0, 3, 8)
|
||||
vsldoi(3, 14, 12, 8)
|
||||
|
||||
vxor(3, 3, 11)
|
||||
vxor(4, 4, 10)
|
||||
|
||||
/*
|
||||
* Update Rcon. Since for a 192-bit key, we use only 8
|
||||
* such constants, we will not hit the field modulus,
|
||||
* so a simple shift (addition) works well.
|
||||
*/
|
||||
vadduwm(5, 5, 5)
|
||||
|
||||
/*
|
||||
* Write out the two left 128-bit words
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(10, 2, 2, 8)
|
||||
vperm(11, 3, 3, 8)
|
||||
stxvw4x(42, 0, %[sk])
|
||||
stxvw4x(43, %[cc], %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
stxvw4x(35, %[cc], %[sk])
|
||||
#endif
|
||||
addi(%[sk], %[sk], 24)
|
||||
|
||||
/*
|
||||
* Shift words for next iteration.
|
||||
*/
|
||||
vsldoi(2, 3, 4, 8)
|
||||
vsldoi(3, 4, 0, 8)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
/*
|
||||
* The loop wrote the first 50 subkey words, but we need
|
||||
* to produce 52, so we must do one last write.
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(10, 2, 2, 8)
|
||||
stxvw4x(42, 0, %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
#endif
|
||||
|
||||
: [sk] "+b" (sk), [cc] "+b" (cc)
|
||||
: [key] "b" (key)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
key_schedule_256(unsigned char *sk, const unsigned char *key)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
|
||||
/*
|
||||
* We use the VSX instructions for loading and storing the
|
||||
* key/subkeys, since they support unaligned accesses. The rest
|
||||
* of the computation is VMX only. VMX register 0 is VSX
|
||||
* register 32.
|
||||
*/
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* v0 = all-zero word
|
||||
* v1 = constant -8 / +8, copied into four words
|
||||
* v2, v3 = current subkey
|
||||
* v6 = Rcon (x4 words) (already shifted on big-endian)
|
||||
* v7 = constant 8, copied into four words
|
||||
* v8 = constant for byteswapping words
|
||||
*
|
||||
* The left two words of v3 are ignored.
|
||||
*/
|
||||
vspltisw(0, 0)
|
||||
#if BR_POWER8_LE
|
||||
vspltisw(1, -8)
|
||||
#else
|
||||
vspltisw(1, 8)
|
||||
#endif
|
||||
li(%[cc], 16)
|
||||
lxvw4x(34, 0, %[key])
|
||||
lxvw4x(35, %[cc], %[key])
|
||||
vspltisw(6, 1)
|
||||
#if !BR_POWER8_LE
|
||||
vsldoi(6, 6, 0, 3)
|
||||
#endif
|
||||
vspltisw(7, 8)
|
||||
#if BR_POWER8_LE
|
||||
lxvw4x(40, 0, %[idx2be])
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Loop must run 7 times. Each iteration produces two
|
||||
* subkeys.
|
||||
*/
|
||||
li(%[cc], 7)
|
||||
mtctr(%[cc])
|
||||
li(%[cc], 16)
|
||||
label(loop)
|
||||
|
||||
/*
|
||||
* Current words are in v2:v3. Compute next word in v4.
|
||||
*/
|
||||
vrlw(10, 3, 1)
|
||||
vsbox(10, 10)
|
||||
vxor(10, 10, 6)
|
||||
vspltw(10, 10, 3)
|
||||
|
||||
vsldoi(4, 0, 2, 12)
|
||||
vxor(4, 2, 4)
|
||||
vsldoi(5, 0, 4, 12)
|
||||
vxor(4, 4, 5)
|
||||
vsldoi(5, 0, 4, 12)
|
||||
vxor(4, 4, 5)
|
||||
vxor(4, 4, 10)
|
||||
|
||||
/*
|
||||
* Then other word in v5.
|
||||
*/
|
||||
vsbox(10, 4)
|
||||
vspltw(10, 10, 3)
|
||||
|
||||
vsldoi(5, 0, 3, 12)
|
||||
vxor(5, 3, 5)
|
||||
vsldoi(11, 0, 5, 12)
|
||||
vxor(5, 5, 11)
|
||||
vsldoi(11, 0, 5, 12)
|
||||
vxor(5, 5, 11)
|
||||
vxor(5, 5, 10)
|
||||
|
||||
/*
|
||||
* Update Rcon. Since for a 256-bit key, we use only 7
|
||||
* such constants, we will not hit the field modulus,
|
||||
* so a simple shift (addition) works well.
|
||||
*/
|
||||
vadduwm(6, 6, 6)
|
||||
|
||||
/*
|
||||
* Write out the two left 128-bit words
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(10, 2, 2, 8)
|
||||
vperm(11, 3, 3, 8)
|
||||
stxvw4x(42, 0, %[sk])
|
||||
stxvw4x(43, %[cc], %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
stxvw4x(35, %[cc], %[sk])
|
||||
#endif
|
||||
addi(%[sk], %[sk], 32)
|
||||
|
||||
/*
|
||||
* Replace v2:v3 with v4:v5.
|
||||
*/
|
||||
vxor(2, 0, 4)
|
||||
vxor(3, 0, 5)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
/*
|
||||
* The loop wrote the first 14 subkeys, but we need 15,
|
||||
* so we must do an extra write.
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(10, 2, 2, 8)
|
||||
stxvw4x(42, 0, %[sk])
|
||||
#else
|
||||
stxvw4x(34, 0, %[sk])
|
||||
#endif
|
||||
|
||||
: [sk] "+b" (sk), [cc] "+b" (cc)
|
||||
: [key] "b" (key)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
br_aes_pwr8_supported(void)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
unsigned
|
||||
br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len)
|
||||
{
|
||||
switch (len) {
|
||||
case 16:
|
||||
key_schedule_128(sk, key);
|
||||
return 10;
|
||||
case 24:
|
||||
key_schedule_192(sk, key);
|
||||
return 12;
|
||||
default:
|
||||
key_schedule_256(sk, key);
|
||||
return 14;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,670 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
|
||||
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcdec_128(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v10
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v24.
|
||||
*/
|
||||
lxvw4x(56, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(24, 24, 24, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next ciphertext words in v16..v19. Also save them
|
||||
* in v20..v23.
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[buf])
|
||||
lxvw4x(49, %[cc1], %[buf])
|
||||
lxvw4x(50, %[cc2], %[buf])
|
||||
lxvw4x(51, %[cc3], %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
vand(20, 16, 16)
|
||||
vand(21, 17, 17)
|
||||
vand(22, 18, 18)
|
||||
vand(23, 19, 19)
|
||||
|
||||
/*
|
||||
* Decrypt the blocks.
|
||||
*/
|
||||
vxor(16, 16, 10)
|
||||
vxor(17, 17, 10)
|
||||
vxor(18, 18, 10)
|
||||
vxor(19, 19, 10)
|
||||
vncipher(16, 16, 9)
|
||||
vncipher(17, 17, 9)
|
||||
vncipher(18, 18, 9)
|
||||
vncipher(19, 19, 9)
|
||||
vncipher(16, 16, 8)
|
||||
vncipher(17, 17, 8)
|
||||
vncipher(18, 18, 8)
|
||||
vncipher(19, 19, 8)
|
||||
vncipher(16, 16, 7)
|
||||
vncipher(17, 17, 7)
|
||||
vncipher(18, 18, 7)
|
||||
vncipher(19, 19, 7)
|
||||
vncipher(16, 16, 6)
|
||||
vncipher(17, 17, 6)
|
||||
vncipher(18, 18, 6)
|
||||
vncipher(19, 19, 6)
|
||||
vncipher(16, 16, 5)
|
||||
vncipher(17, 17, 5)
|
||||
vncipher(18, 18, 5)
|
||||
vncipher(19, 19, 5)
|
||||
vncipher(16, 16, 4)
|
||||
vncipher(17, 17, 4)
|
||||
vncipher(18, 18, 4)
|
||||
vncipher(19, 19, 4)
|
||||
vncipher(16, 16, 3)
|
||||
vncipher(17, 17, 3)
|
||||
vncipher(18, 18, 3)
|
||||
vncipher(19, 19, 3)
|
||||
vncipher(16, 16, 2)
|
||||
vncipher(17, 17, 2)
|
||||
vncipher(18, 18, 2)
|
||||
vncipher(19, 19, 2)
|
||||
vncipher(16, 16, 1)
|
||||
vncipher(17, 17, 1)
|
||||
vncipher(18, 18, 1)
|
||||
vncipher(19, 19, 1)
|
||||
vncipherlast(16, 16, 0)
|
||||
vncipherlast(17, 17, 0)
|
||||
vncipherlast(18, 18, 0)
|
||||
vncipherlast(19, 19, 0)
|
||||
|
||||
/*
|
||||
* XOR decrypted blocks with IV / previous block.
|
||||
*/
|
||||
vxor(16, 16, 24)
|
||||
vxor(17, 17, 20)
|
||||
vxor(18, 18, 21)
|
||||
vxor(19, 19, 22)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Fourth encrypted block is IV for next run.
|
||||
*/
|
||||
vand(24, 23, 23)
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcdec_192(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v12
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(43, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(44, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v24.
|
||||
*/
|
||||
lxvw4x(56, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(24, 24, 24, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next ciphertext words in v16..v19. Also save them
|
||||
* in v20..v23.
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[buf])
|
||||
lxvw4x(49, %[cc1], %[buf])
|
||||
lxvw4x(50, %[cc2], %[buf])
|
||||
lxvw4x(51, %[cc3], %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
vand(20, 16, 16)
|
||||
vand(21, 17, 17)
|
||||
vand(22, 18, 18)
|
||||
vand(23, 19, 19)
|
||||
|
||||
/*
|
||||
* Decrypt the blocks.
|
||||
*/
|
||||
vxor(16, 16, 12)
|
||||
vxor(17, 17, 12)
|
||||
vxor(18, 18, 12)
|
||||
vxor(19, 19, 12)
|
||||
vncipher(16, 16, 11)
|
||||
vncipher(17, 17, 11)
|
||||
vncipher(18, 18, 11)
|
||||
vncipher(19, 19, 11)
|
||||
vncipher(16, 16, 10)
|
||||
vncipher(17, 17, 10)
|
||||
vncipher(18, 18, 10)
|
||||
vncipher(19, 19, 10)
|
||||
vncipher(16, 16, 9)
|
||||
vncipher(17, 17, 9)
|
||||
vncipher(18, 18, 9)
|
||||
vncipher(19, 19, 9)
|
||||
vncipher(16, 16, 8)
|
||||
vncipher(17, 17, 8)
|
||||
vncipher(18, 18, 8)
|
||||
vncipher(19, 19, 8)
|
||||
vncipher(16, 16, 7)
|
||||
vncipher(17, 17, 7)
|
||||
vncipher(18, 18, 7)
|
||||
vncipher(19, 19, 7)
|
||||
vncipher(16, 16, 6)
|
||||
vncipher(17, 17, 6)
|
||||
vncipher(18, 18, 6)
|
||||
vncipher(19, 19, 6)
|
||||
vncipher(16, 16, 5)
|
||||
vncipher(17, 17, 5)
|
||||
vncipher(18, 18, 5)
|
||||
vncipher(19, 19, 5)
|
||||
vncipher(16, 16, 4)
|
||||
vncipher(17, 17, 4)
|
||||
vncipher(18, 18, 4)
|
||||
vncipher(19, 19, 4)
|
||||
vncipher(16, 16, 3)
|
||||
vncipher(17, 17, 3)
|
||||
vncipher(18, 18, 3)
|
||||
vncipher(19, 19, 3)
|
||||
vncipher(16, 16, 2)
|
||||
vncipher(17, 17, 2)
|
||||
vncipher(18, 18, 2)
|
||||
vncipher(19, 19, 2)
|
||||
vncipher(16, 16, 1)
|
||||
vncipher(17, 17, 1)
|
||||
vncipher(18, 18, 1)
|
||||
vncipher(19, 19, 1)
|
||||
vncipherlast(16, 16, 0)
|
||||
vncipherlast(17, 17, 0)
|
||||
vncipherlast(18, 18, 0)
|
||||
vncipherlast(19, 19, 0)
|
||||
|
||||
/*
|
||||
* XOR decrypted blocks with IV / previous block.
|
||||
*/
|
||||
vxor(16, 16, 24)
|
||||
vxor(17, 17, 20)
|
||||
vxor(18, 18, 21)
|
||||
vxor(19, 19, 22)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Fourth encrypted block is IV for next run.
|
||||
*/
|
||||
vand(24, 23, 23)
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcdec_256(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v14
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(43, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(44, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(45, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(46, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v24.
|
||||
*/
|
||||
lxvw4x(56, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(24, 24, 24, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next ciphertext words in v16..v19. Also save them
|
||||
* in v20..v23.
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[buf])
|
||||
lxvw4x(49, %[cc1], %[buf])
|
||||
lxvw4x(50, %[cc2], %[buf])
|
||||
lxvw4x(51, %[cc3], %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
vand(20, 16, 16)
|
||||
vand(21, 17, 17)
|
||||
vand(22, 18, 18)
|
||||
vand(23, 19, 19)
|
||||
|
||||
/*
|
||||
* Decrypt the blocks.
|
||||
*/
|
||||
vxor(16, 16, 14)
|
||||
vxor(17, 17, 14)
|
||||
vxor(18, 18, 14)
|
||||
vxor(19, 19, 14)
|
||||
vncipher(16, 16, 13)
|
||||
vncipher(17, 17, 13)
|
||||
vncipher(18, 18, 13)
|
||||
vncipher(19, 19, 13)
|
||||
vncipher(16, 16, 12)
|
||||
vncipher(17, 17, 12)
|
||||
vncipher(18, 18, 12)
|
||||
vncipher(19, 19, 12)
|
||||
vncipher(16, 16, 11)
|
||||
vncipher(17, 17, 11)
|
||||
vncipher(18, 18, 11)
|
||||
vncipher(19, 19, 11)
|
||||
vncipher(16, 16, 10)
|
||||
vncipher(17, 17, 10)
|
||||
vncipher(18, 18, 10)
|
||||
vncipher(19, 19, 10)
|
||||
vncipher(16, 16, 9)
|
||||
vncipher(17, 17, 9)
|
||||
vncipher(18, 18, 9)
|
||||
vncipher(19, 19, 9)
|
||||
vncipher(16, 16, 8)
|
||||
vncipher(17, 17, 8)
|
||||
vncipher(18, 18, 8)
|
||||
vncipher(19, 19, 8)
|
||||
vncipher(16, 16, 7)
|
||||
vncipher(17, 17, 7)
|
||||
vncipher(18, 18, 7)
|
||||
vncipher(19, 19, 7)
|
||||
vncipher(16, 16, 6)
|
||||
vncipher(17, 17, 6)
|
||||
vncipher(18, 18, 6)
|
||||
vncipher(19, 19, 6)
|
||||
vncipher(16, 16, 5)
|
||||
vncipher(17, 17, 5)
|
||||
vncipher(18, 18, 5)
|
||||
vncipher(19, 19, 5)
|
||||
vncipher(16, 16, 4)
|
||||
vncipher(17, 17, 4)
|
||||
vncipher(18, 18, 4)
|
||||
vncipher(19, 19, 4)
|
||||
vncipher(16, 16, 3)
|
||||
vncipher(17, 17, 3)
|
||||
vncipher(18, 18, 3)
|
||||
vncipher(19, 19, 3)
|
||||
vncipher(16, 16, 2)
|
||||
vncipher(17, 17, 2)
|
||||
vncipher(18, 18, 2)
|
||||
vncipher(19, 19, 2)
|
||||
vncipher(16, 16, 1)
|
||||
vncipher(17, 17, 1)
|
||||
vncipher(18, 18, 1)
|
||||
vncipher(19, 19, 1)
|
||||
vncipherlast(16, 16, 0)
|
||||
vncipherlast(17, 17, 0)
|
||||
vncipherlast(18, 18, 0)
|
||||
vncipherlast(19, 19, 0)
|
||||
|
||||
/*
|
||||
* XOR decrypted blocks with IV / previous block.
|
||||
*/
|
||||
vxor(16, 16, 24)
|
||||
vxor(17, 17, 20)
|
||||
vxor(18, 18, 21)
|
||||
vxor(19, 19, 22)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Fourth encrypted block is IV for next run.
|
||||
*/
|
||||
vand(24, 23, 23)
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
unsigned char nextiv[16];
|
||||
unsigned char *buf;
|
||||
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
buf = data;
|
||||
memcpy(nextiv, buf + len - 16, 16);
|
||||
if (len >= 64) {
|
||||
size_t num_blocks;
|
||||
unsigned char tmp[16];
|
||||
|
||||
num_blocks = (len >> 4) & ~(size_t)3;
|
||||
memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
|
||||
break;
|
||||
case 12:
|
||||
cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
|
||||
break;
|
||||
default:
|
||||
cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
|
||||
break;
|
||||
}
|
||||
buf += num_blocks << 4;
|
||||
len &= 63;
|
||||
memcpy(iv, tmp, 16);
|
||||
}
|
||||
if (len > 0) {
|
||||
unsigned char tmp[64];
|
||||
|
||||
memcpy(tmp, buf, len);
|
||||
memset(tmp + len, 0, (sizeof tmp) - len);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
cbcdec_128(ctx->skey.skni, iv, tmp, 4);
|
||||
break;
|
||||
case 12:
|
||||
cbcdec_192(ctx->skey.skni, iv, tmp, 4);
|
||||
break;
|
||||
default:
|
||||
cbcdec_256(ctx->skey.skni, iv, tmp, 4);
|
||||
break;
|
||||
}
|
||||
memcpy(buf, tmp, len);
|
||||
}
|
||||
memcpy(iv, nextiv, 16);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
|
||||
sizeof(br_aes_pwr8_cbcdec_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcdec_class **, const void *, size_t))
|
||||
&br_aes_pwr8_cbcdec_init,
|
||||
(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
|
||||
&br_aes_pwr8_cbcdec_run
|
||||
};
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class *
|
||||
br_aes_pwr8_cbcdec_get_vtable(void)
|
||||
{
|
||||
return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcdec_class *
|
||||
br_aes_pwr8_cbcdec_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,417 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
|
||||
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcenc_128(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t len)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v10
|
||||
*/
|
||||
lxvw4x(32, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(33, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(34, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(35, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(36, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(37, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(38, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(39, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(40, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(41, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(42, %[cc], %[sk])
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v16.
|
||||
*/
|
||||
lxvw4x(48, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next plaintext word and XOR with current IV.
|
||||
*/
|
||||
lxvw4x(49, 0, %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 17, 17, 15)
|
||||
#endif
|
||||
vxor(16, 16, 17)
|
||||
|
||||
/*
|
||||
* Encrypt the block.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipherlast(16, 16, 10)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 16, 16, 15)
|
||||
stxvw4x(49, 0, %[buf])
|
||||
#else
|
||||
stxvw4x(48, 0, %[buf])
|
||||
#endif
|
||||
addi(%[buf], %[buf], 16)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcenc_192(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t len)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v12
|
||||
*/
|
||||
lxvw4x(32, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(33, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(34, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(35, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(36, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(37, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(38, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(39, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(40, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(41, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(42, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(43, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(44, %[cc], %[sk])
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v16.
|
||||
*/
|
||||
lxvw4x(48, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next plaintext word and XOR with current IV.
|
||||
*/
|
||||
lxvw4x(49, 0, %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 17, 17, 15)
|
||||
#endif
|
||||
vxor(16, 16, 17)
|
||||
|
||||
/*
|
||||
* Encrypt the block.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(16, 16, 10)
|
||||
vcipher(16, 16, 11)
|
||||
vcipherlast(16, 16, 12)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 16, 16, 15)
|
||||
stxvw4x(49, 0, %[buf])
|
||||
#else
|
||||
stxvw4x(48, 0, %[buf])
|
||||
#endif
|
||||
addi(%[buf], %[buf], 16)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
cbcenc_256(const unsigned char *sk,
|
||||
const unsigned char *iv, unsigned char *buf, size_t len)
|
||||
{
|
||||
long cc;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
cc = 0;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v14
|
||||
*/
|
||||
lxvw4x(32, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(33, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(34, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(35, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(36, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(37, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(38, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(39, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(40, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(41, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(42, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(43, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(44, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(45, %[cc], %[sk])
|
||||
addi(%[cc], %[cc], 16)
|
||||
lxvw4x(46, %[cc], %[sk])
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* Load IV into v16.
|
||||
*/
|
||||
lxvw4x(48, 0, %[iv])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Load next plaintext word and XOR with current IV.
|
||||
*/
|
||||
lxvw4x(49, 0, %[buf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 17, 17, 15)
|
||||
#endif
|
||||
vxor(16, 16, 17)
|
||||
|
||||
/*
|
||||
* Encrypt the block.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(16, 16, 10)
|
||||
vcipher(16, 16, 11)
|
||||
vcipher(16, 16, 12)
|
||||
vcipher(16, 16, 13)
|
||||
vcipherlast(16, 16, 14)
|
||||
|
||||
/*
|
||||
* Store back result (with byteswap)
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
vperm(17, 16, 16, 15)
|
||||
stxvw4x(49, 0, %[buf])
|
||||
#else
|
||||
stxvw4x(48, 0, %[buf])
|
||||
#endif
|
||||
addi(%[buf], %[buf], 16)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
|
||||
void *iv, void *data, size_t len)
|
||||
{
|
||||
if (len > 0) {
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
cbcenc_128(ctx->skey.skni, iv, data, len);
|
||||
break;
|
||||
case 12:
|
||||
cbcenc_192(ctx->skey.skni, iv, data, len);
|
||||
break;
|
||||
default:
|
||||
cbcenc_256(ctx->skey.skni, iv, data, len);
|
||||
break;
|
||||
}
|
||||
memcpy(iv, (unsigned char *)data + (len - 16), 16);
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
|
||||
sizeof(br_aes_pwr8_cbcenc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_cbcenc_class **, const void *, size_t))
|
||||
&br_aes_pwr8_cbcenc_init,
|
||||
(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
|
||||
&br_aes_pwr8_cbcenc_run
|
||||
};
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class *
|
||||
br_aes_pwr8_cbcenc_get_vtable(void)
|
||||
{
|
||||
return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_cbcenc_class *
|
||||
br_aes_pwr8_cbcenc_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,717 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_pwr8_ctr_vtable;
|
||||
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
static void
|
||||
ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
|
||||
unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
static const uint32_t ctrinc[] = {
|
||||
0, 0, 0, 4
|
||||
};
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v10
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* v28 = increment for IV counter.
|
||||
*/
|
||||
lxvw4x(60, 0, %[ctrinc])
|
||||
|
||||
/*
|
||||
* Load IV into v16..v19
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[ivbuf])
|
||||
lxvw4x(49, %[cc1], %[ivbuf])
|
||||
lxvw4x(50, %[cc2], %[ivbuf])
|
||||
lxvw4x(51, %[cc3], %[ivbuf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Compute next IV into v24..v27
|
||||
*/
|
||||
vadduwm(24, 16, 28)
|
||||
vadduwm(25, 17, 28)
|
||||
vadduwm(26, 18, 28)
|
||||
vadduwm(27, 19, 28)
|
||||
|
||||
/*
|
||||
* Load next data blocks. We do this early on but we
|
||||
* won't need them until IV encryption is done.
|
||||
*/
|
||||
lxvw4x(52, %[cc0], %[buf])
|
||||
lxvw4x(53, %[cc1], %[buf])
|
||||
lxvw4x(54, %[cc2], %[buf])
|
||||
lxvw4x(55, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Encrypt the current IV.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vxor(17, 17, 0)
|
||||
vxor(18, 18, 0)
|
||||
vxor(19, 19, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(17, 17, 1)
|
||||
vcipher(18, 18, 1)
|
||||
vcipher(19, 19, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(17, 17, 2)
|
||||
vcipher(18, 18, 2)
|
||||
vcipher(19, 19, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(17, 17, 3)
|
||||
vcipher(18, 18, 3)
|
||||
vcipher(19, 19, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(17, 17, 4)
|
||||
vcipher(18, 18, 4)
|
||||
vcipher(19, 19, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(17, 17, 5)
|
||||
vcipher(18, 18, 5)
|
||||
vcipher(19, 19, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(17, 17, 6)
|
||||
vcipher(18, 18, 6)
|
||||
vcipher(19, 19, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(17, 17, 7)
|
||||
vcipher(18, 18, 7)
|
||||
vcipher(19, 19, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(17, 17, 8)
|
||||
vcipher(18, 18, 8)
|
||||
vcipher(19, 19, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(17, 17, 9)
|
||||
vcipher(18, 18, 9)
|
||||
vcipher(19, 19, 9)
|
||||
vcipherlast(16, 16, 10)
|
||||
vcipherlast(17, 17, 10)
|
||||
vcipherlast(18, 18, 10)
|
||||
vcipherlast(19, 19, 10)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Load next plaintext word and XOR with encrypted IV.
|
||||
*/
|
||||
vxor(16, 20, 16)
|
||||
vxor(17, 21, 17)
|
||||
vxor(18, 22, 18)
|
||||
vxor(19, 23, 19)
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
/*
|
||||
* Update IV.
|
||||
*/
|
||||
vand(16, 24, 24)
|
||||
vand(17, 25, 25)
|
||||
vand(18, 26, 26)
|
||||
vand(19, 27, 27)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
|
||||
[ctrinc] "b" (ctrinc)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
|
||||
unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
static const uint32_t ctrinc[] = {
|
||||
0, 0, 0, 4
|
||||
};
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v12
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(43, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(44, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* v28 = increment for IV counter.
|
||||
*/
|
||||
lxvw4x(60, 0, %[ctrinc])
|
||||
|
||||
/*
|
||||
* Load IV into v16..v19
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[ivbuf])
|
||||
lxvw4x(49, %[cc1], %[ivbuf])
|
||||
lxvw4x(50, %[cc2], %[ivbuf])
|
||||
lxvw4x(51, %[cc3], %[ivbuf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Compute next IV into v24..v27
|
||||
*/
|
||||
vadduwm(24, 16, 28)
|
||||
vadduwm(25, 17, 28)
|
||||
vadduwm(26, 18, 28)
|
||||
vadduwm(27, 19, 28)
|
||||
|
||||
/*
|
||||
* Load next data blocks. We do this early on but we
|
||||
* won't need them until IV encryption is done.
|
||||
*/
|
||||
lxvw4x(52, %[cc0], %[buf])
|
||||
lxvw4x(53, %[cc1], %[buf])
|
||||
lxvw4x(54, %[cc2], %[buf])
|
||||
lxvw4x(55, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Encrypt the current IV.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vxor(17, 17, 0)
|
||||
vxor(18, 18, 0)
|
||||
vxor(19, 19, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(17, 17, 1)
|
||||
vcipher(18, 18, 1)
|
||||
vcipher(19, 19, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(17, 17, 2)
|
||||
vcipher(18, 18, 2)
|
||||
vcipher(19, 19, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(17, 17, 3)
|
||||
vcipher(18, 18, 3)
|
||||
vcipher(19, 19, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(17, 17, 4)
|
||||
vcipher(18, 18, 4)
|
||||
vcipher(19, 19, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(17, 17, 5)
|
||||
vcipher(18, 18, 5)
|
||||
vcipher(19, 19, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(17, 17, 6)
|
||||
vcipher(18, 18, 6)
|
||||
vcipher(19, 19, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(17, 17, 7)
|
||||
vcipher(18, 18, 7)
|
||||
vcipher(19, 19, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(17, 17, 8)
|
||||
vcipher(18, 18, 8)
|
||||
vcipher(19, 19, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(17, 17, 9)
|
||||
vcipher(18, 18, 9)
|
||||
vcipher(19, 19, 9)
|
||||
vcipher(16, 16, 10)
|
||||
vcipher(17, 17, 10)
|
||||
vcipher(18, 18, 10)
|
||||
vcipher(19, 19, 10)
|
||||
vcipher(16, 16, 11)
|
||||
vcipher(17, 17, 11)
|
||||
vcipher(18, 18, 11)
|
||||
vcipher(19, 19, 11)
|
||||
vcipherlast(16, 16, 12)
|
||||
vcipherlast(17, 17, 12)
|
||||
vcipherlast(18, 18, 12)
|
||||
vcipherlast(19, 19, 12)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Load next plaintext word and XOR with encrypted IV.
|
||||
*/
|
||||
vxor(16, 20, 16)
|
||||
vxor(17, 21, 17)
|
||||
vxor(18, 22, 18)
|
||||
vxor(19, 23, 19)
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
/*
|
||||
* Update IV.
|
||||
*/
|
||||
vand(16, 24, 24)
|
||||
vand(17, 25, 25)
|
||||
vand(18, 26, 26)
|
||||
vand(19, 27, 27)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
|
||||
[ctrinc] "b" (ctrinc)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
static void
|
||||
ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
|
||||
unsigned char *buf, size_t num_blocks)
|
||||
{
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
static const uint32_t ctrinc[] = {
|
||||
0, 0, 0, 4
|
||||
};
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
|
||||
/*
|
||||
* Load subkeys into v0..v14
|
||||
*/
|
||||
lxvw4x(32, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(33, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(34, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(35, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(36, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(37, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(38, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(39, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(40, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(41, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(42, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(43, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(44, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(45, %[cc0], %[sk])
|
||||
addi(%[cc0], %[cc0], 16)
|
||||
lxvw4x(46, %[cc0], %[sk])
|
||||
li(%[cc0], 0)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
/*
|
||||
* v15 = constant for byteswapping words
|
||||
*/
|
||||
lxvw4x(47, 0, %[idx2be])
|
||||
#endif
|
||||
/*
|
||||
* v28 = increment for IV counter.
|
||||
*/
|
||||
lxvw4x(60, 0, %[ctrinc])
|
||||
|
||||
/*
|
||||
* Load IV into v16..v19
|
||||
*/
|
||||
lxvw4x(48, %[cc0], %[ivbuf])
|
||||
lxvw4x(49, %[cc1], %[ivbuf])
|
||||
lxvw4x(50, %[cc2], %[ivbuf])
|
||||
lxvw4x(51, %[cc3], %[ivbuf])
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
mtctr(%[num_blocks])
|
||||
label(loop)
|
||||
/*
|
||||
* Compute next IV into v24..v27
|
||||
*/
|
||||
vadduwm(24, 16, 28)
|
||||
vadduwm(25, 17, 28)
|
||||
vadduwm(26, 18, 28)
|
||||
vadduwm(27, 19, 28)
|
||||
|
||||
/*
|
||||
* Load next data blocks. We do this early on but we
|
||||
* won't need them until IV encryption is done.
|
||||
*/
|
||||
lxvw4x(52, %[cc0], %[buf])
|
||||
lxvw4x(53, %[cc1], %[buf])
|
||||
lxvw4x(54, %[cc2], %[buf])
|
||||
lxvw4x(55, %[cc3], %[buf])
|
||||
|
||||
/*
|
||||
* Encrypt the current IV.
|
||||
*/
|
||||
vxor(16, 16, 0)
|
||||
vxor(17, 17, 0)
|
||||
vxor(18, 18, 0)
|
||||
vxor(19, 19, 0)
|
||||
vcipher(16, 16, 1)
|
||||
vcipher(17, 17, 1)
|
||||
vcipher(18, 18, 1)
|
||||
vcipher(19, 19, 1)
|
||||
vcipher(16, 16, 2)
|
||||
vcipher(17, 17, 2)
|
||||
vcipher(18, 18, 2)
|
||||
vcipher(19, 19, 2)
|
||||
vcipher(16, 16, 3)
|
||||
vcipher(17, 17, 3)
|
||||
vcipher(18, 18, 3)
|
||||
vcipher(19, 19, 3)
|
||||
vcipher(16, 16, 4)
|
||||
vcipher(17, 17, 4)
|
||||
vcipher(18, 18, 4)
|
||||
vcipher(19, 19, 4)
|
||||
vcipher(16, 16, 5)
|
||||
vcipher(17, 17, 5)
|
||||
vcipher(18, 18, 5)
|
||||
vcipher(19, 19, 5)
|
||||
vcipher(16, 16, 6)
|
||||
vcipher(17, 17, 6)
|
||||
vcipher(18, 18, 6)
|
||||
vcipher(19, 19, 6)
|
||||
vcipher(16, 16, 7)
|
||||
vcipher(17, 17, 7)
|
||||
vcipher(18, 18, 7)
|
||||
vcipher(19, 19, 7)
|
||||
vcipher(16, 16, 8)
|
||||
vcipher(17, 17, 8)
|
||||
vcipher(18, 18, 8)
|
||||
vcipher(19, 19, 8)
|
||||
vcipher(16, 16, 9)
|
||||
vcipher(17, 17, 9)
|
||||
vcipher(18, 18, 9)
|
||||
vcipher(19, 19, 9)
|
||||
vcipher(16, 16, 10)
|
||||
vcipher(17, 17, 10)
|
||||
vcipher(18, 18, 10)
|
||||
vcipher(19, 19, 10)
|
||||
vcipher(16, 16, 11)
|
||||
vcipher(17, 17, 11)
|
||||
vcipher(18, 18, 11)
|
||||
vcipher(19, 19, 11)
|
||||
vcipher(16, 16, 12)
|
||||
vcipher(17, 17, 12)
|
||||
vcipher(18, 18, 12)
|
||||
vcipher(19, 19, 12)
|
||||
vcipher(16, 16, 13)
|
||||
vcipher(17, 17, 13)
|
||||
vcipher(18, 18, 13)
|
||||
vcipher(19, 19, 13)
|
||||
vcipherlast(16, 16, 14)
|
||||
vcipherlast(17, 17, 14)
|
||||
vcipherlast(18, 18, 14)
|
||||
vcipherlast(19, 19, 14)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
vperm(16, 16, 16, 15)
|
||||
vperm(17, 17, 17, 15)
|
||||
vperm(18, 18, 18, 15)
|
||||
vperm(19, 19, 19, 15)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Load next plaintext word and XOR with encrypted IV.
|
||||
*/
|
||||
vxor(16, 20, 16)
|
||||
vxor(17, 21, 17)
|
||||
vxor(18, 22, 18)
|
||||
vxor(19, 23, 19)
|
||||
stxvw4x(48, %[cc0], %[buf])
|
||||
stxvw4x(49, %[cc1], %[buf])
|
||||
stxvw4x(50, %[cc2], %[buf])
|
||||
stxvw4x(51, %[cc3], %[buf])
|
||||
|
||||
addi(%[buf], %[buf], 64)
|
||||
|
||||
/*
|
||||
* Update IV.
|
||||
*/
|
||||
vand(16, 24, 24)
|
||||
vand(17, 25, 25)
|
||||
vand(18, 26, 26)
|
||||
vand(19, 27, 27)
|
||||
|
||||
bdnz(loop)
|
||||
|
||||
: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
|
||||
[buf] "+b" (buf)
|
||||
: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
|
||||
[ctrinc] "b" (ctrinc)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
uint32_t
|
||||
br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
|
||||
const void *iv, uint32_t cc, void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
unsigned char ivbuf[64];
|
||||
|
||||
buf = data;
|
||||
memcpy(ivbuf + 0, iv, 12);
|
||||
memcpy(ivbuf + 16, iv, 12);
|
||||
memcpy(ivbuf + 32, iv, 12);
|
||||
memcpy(ivbuf + 48, iv, 12);
|
||||
if (len >= 64) {
|
||||
br_enc32be(ivbuf + 12, cc + 0);
|
||||
br_enc32be(ivbuf + 28, cc + 1);
|
||||
br_enc32be(ivbuf + 44, cc + 2);
|
||||
br_enc32be(ivbuf + 60, cc + 3);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctr_128(ctx->skey.skni, ivbuf, buf,
|
||||
(len >> 4) & ~(size_t)3);
|
||||
break;
|
||||
case 12:
|
||||
ctr_192(ctx->skey.skni, ivbuf, buf,
|
||||
(len >> 4) & ~(size_t)3);
|
||||
break;
|
||||
default:
|
||||
ctr_256(ctx->skey.skni, ivbuf, buf,
|
||||
(len >> 4) & ~(size_t)3);
|
||||
break;
|
||||
}
|
||||
cc += (len >> 4) & ~(size_t)3;
|
||||
buf += len & ~(size_t)63;
|
||||
len &= 63;
|
||||
}
|
||||
if (len > 0) {
|
||||
unsigned char tmp[64];
|
||||
|
||||
memcpy(tmp, buf, len);
|
||||
memset(tmp + len, 0, (sizeof tmp) - len);
|
||||
br_enc32be(ivbuf + 12, cc + 0);
|
||||
br_enc32be(ivbuf + 28, cc + 1);
|
||||
br_enc32be(ivbuf + 44, cc + 2);
|
||||
br_enc32be(ivbuf + 60, cc + 3);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
|
||||
break;
|
||||
case 12:
|
||||
ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
|
||||
break;
|
||||
default:
|
||||
ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
|
||||
break;
|
||||
}
|
||||
memcpy(buf, tmp, len);
|
||||
cc += (len + 15) >> 4;
|
||||
}
|
||||
return cc;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
|
||||
sizeof(br_aes_pwr8_ctr_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctr_class **, const void *, size_t))
|
||||
&br_aes_pwr8_ctr_init,
|
||||
(uint32_t (*)(const br_block_ctr_class *const *,
|
||||
const void *, uint32_t, void *, size_t))
|
||||
&br_aes_pwr8_ctr_run
|
||||
};
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class *
|
||||
br_aes_pwr8_ctr_get_vtable(void)
|
||||
{
|
||||
return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctr_class *
|
||||
br_aes_pwr8_ctr_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,946 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class *
|
||||
br_aes_pwr8_ctrcbc_get_vtable(void)
|
||||
{
|
||||
return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
const void *key, size_t len)
|
||||
{
|
||||
ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
|
||||
ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Register conventions for CTR + CBC-MAC:
|
||||
*
|
||||
* AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
|
||||
* Register v15 contains the byteswap index register (little-endian only)
|
||||
* Register v16 contains the CTR counter value
|
||||
* Register v17 contains the CBC-MAC current value
|
||||
* Registers v18 to v27 are scratch
|
||||
* Counter increment uses v28, v29 and v30
|
||||
*
|
||||
* For CTR alone:
|
||||
*
|
||||
* AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
|
||||
* Register v15 contains the byteswap index register (little-endian only)
|
||||
* Registers v16 to v19 contain the CTR counter values (four blocks)
|
||||
* Registers v20 to v27 are scratch
|
||||
* Counter increment uses v28, v29 and v30
|
||||
*/
|
||||
|
||||
#define LOAD_SUBKEYS_128 \
|
||||
lxvw4x(32, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(33, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(34, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(35, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(36, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(37, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(38, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(39, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(40, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(41, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(42, %[cc], %[sk])
|
||||
|
||||
#define LOAD_SUBKEYS_192 \
|
||||
LOAD_SUBKEYS_128 \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(43, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(44, %[cc], %[sk])
|
||||
|
||||
#define LOAD_SUBKEYS_256 \
|
||||
LOAD_SUBKEYS_192 \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(45, %[cc], %[sk]) \
|
||||
addi(%[cc], %[cc], 16) \
|
||||
lxvw4x(46, %[cc], %[sk])
|
||||
|
||||
#define BLOCK_ENCRYPT_128(x) \
|
||||
vxor(x, x, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipherlast(x, x, 10)
|
||||
|
||||
#define BLOCK_ENCRYPT_192(x) \
|
||||
vxor(x, x, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(x, x, 10) \
|
||||
vcipher(x, x, 11) \
|
||||
vcipherlast(x, x, 12)
|
||||
|
||||
#define BLOCK_ENCRYPT_256(x) \
|
||||
vxor(x, x, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(x, x, 10) \
|
||||
vcipher(x, x, 11) \
|
||||
vcipher(x, x, 12) \
|
||||
vcipher(x, x, 13) \
|
||||
vcipherlast(x, x, 14)
|
||||
|
||||
#define BLOCK_ENCRYPT_X2_128(x, y) \
|
||||
vxor(x, x, 0) \
|
||||
vxor(y, y, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(y, y, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(y, y, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(y, y, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(y, y, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(y, y, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(y, y, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(y, y, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(y, y, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(y, y, 9) \
|
||||
vcipherlast(x, x, 10) \
|
||||
vcipherlast(y, y, 10)
|
||||
|
||||
#define BLOCK_ENCRYPT_X2_192(x, y) \
|
||||
vxor(x, x, 0) \
|
||||
vxor(y, y, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(y, y, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(y, y, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(y, y, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(y, y, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(y, y, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(y, y, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(y, y, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(y, y, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(y, y, 9) \
|
||||
vcipher(x, x, 10) \
|
||||
vcipher(y, y, 10) \
|
||||
vcipher(x, x, 11) \
|
||||
vcipher(y, y, 11) \
|
||||
vcipherlast(x, x, 12) \
|
||||
vcipherlast(y, y, 12)
|
||||
|
||||
#define BLOCK_ENCRYPT_X2_256(x, y) \
|
||||
vxor(x, x, 0) \
|
||||
vxor(y, y, 0) \
|
||||
vcipher(x, x, 1) \
|
||||
vcipher(y, y, 1) \
|
||||
vcipher(x, x, 2) \
|
||||
vcipher(y, y, 2) \
|
||||
vcipher(x, x, 3) \
|
||||
vcipher(y, y, 3) \
|
||||
vcipher(x, x, 4) \
|
||||
vcipher(y, y, 4) \
|
||||
vcipher(x, x, 5) \
|
||||
vcipher(y, y, 5) \
|
||||
vcipher(x, x, 6) \
|
||||
vcipher(y, y, 6) \
|
||||
vcipher(x, x, 7) \
|
||||
vcipher(y, y, 7) \
|
||||
vcipher(x, x, 8) \
|
||||
vcipher(y, y, 8) \
|
||||
vcipher(x, x, 9) \
|
||||
vcipher(y, y, 9) \
|
||||
vcipher(x, x, 10) \
|
||||
vcipher(y, y, 10) \
|
||||
vcipher(x, x, 11) \
|
||||
vcipher(y, y, 11) \
|
||||
vcipher(x, x, 12) \
|
||||
vcipher(y, y, 12) \
|
||||
vcipher(x, x, 13) \
|
||||
vcipher(y, y, 13) \
|
||||
vcipherlast(x, x, 14) \
|
||||
vcipherlast(y, y, 14)
|
||||
|
||||
#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
|
||||
vxor(x0, x0, 0) \
|
||||
vxor(x1, x1, 0) \
|
||||
vxor(x2, x2, 0) \
|
||||
vxor(x3, x3, 0) \
|
||||
vcipher(x0, x0, 1) \
|
||||
vcipher(x1, x1, 1) \
|
||||
vcipher(x2, x2, 1) \
|
||||
vcipher(x3, x3, 1) \
|
||||
vcipher(x0, x0, 2) \
|
||||
vcipher(x1, x1, 2) \
|
||||
vcipher(x2, x2, 2) \
|
||||
vcipher(x3, x3, 2) \
|
||||
vcipher(x0, x0, 3) \
|
||||
vcipher(x1, x1, 3) \
|
||||
vcipher(x2, x2, 3) \
|
||||
vcipher(x3, x3, 3) \
|
||||
vcipher(x0, x0, 4) \
|
||||
vcipher(x1, x1, 4) \
|
||||
vcipher(x2, x2, 4) \
|
||||
vcipher(x3, x3, 4) \
|
||||
vcipher(x0, x0, 5) \
|
||||
vcipher(x1, x1, 5) \
|
||||
vcipher(x2, x2, 5) \
|
||||
vcipher(x3, x3, 5) \
|
||||
vcipher(x0, x0, 6) \
|
||||
vcipher(x1, x1, 6) \
|
||||
vcipher(x2, x2, 6) \
|
||||
vcipher(x3, x3, 6) \
|
||||
vcipher(x0, x0, 7) \
|
||||
vcipher(x1, x1, 7) \
|
||||
vcipher(x2, x2, 7) \
|
||||
vcipher(x3, x3, 7) \
|
||||
vcipher(x0, x0, 8) \
|
||||
vcipher(x1, x1, 8) \
|
||||
vcipher(x2, x2, 8) \
|
||||
vcipher(x3, x3, 8) \
|
||||
vcipher(x0, x0, 9) \
|
||||
vcipher(x1, x1, 9) \
|
||||
vcipher(x2, x2, 9) \
|
||||
vcipher(x3, x3, 9) \
|
||||
vcipherlast(x0, x0, 10) \
|
||||
vcipherlast(x1, x1, 10) \
|
||||
vcipherlast(x2, x2, 10) \
|
||||
vcipherlast(x3, x3, 10)
|
||||
|
||||
#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
|
||||
vxor(x0, x0, 0) \
|
||||
vxor(x1, x1, 0) \
|
||||
vxor(x2, x2, 0) \
|
||||
vxor(x3, x3, 0) \
|
||||
vcipher(x0, x0, 1) \
|
||||
vcipher(x1, x1, 1) \
|
||||
vcipher(x2, x2, 1) \
|
||||
vcipher(x3, x3, 1) \
|
||||
vcipher(x0, x0, 2) \
|
||||
vcipher(x1, x1, 2) \
|
||||
vcipher(x2, x2, 2) \
|
||||
vcipher(x3, x3, 2) \
|
||||
vcipher(x0, x0, 3) \
|
||||
vcipher(x1, x1, 3) \
|
||||
vcipher(x2, x2, 3) \
|
||||
vcipher(x3, x3, 3) \
|
||||
vcipher(x0, x0, 4) \
|
||||
vcipher(x1, x1, 4) \
|
||||
vcipher(x2, x2, 4) \
|
||||
vcipher(x3, x3, 4) \
|
||||
vcipher(x0, x0, 5) \
|
||||
vcipher(x1, x1, 5) \
|
||||
vcipher(x2, x2, 5) \
|
||||
vcipher(x3, x3, 5) \
|
||||
vcipher(x0, x0, 6) \
|
||||
vcipher(x1, x1, 6) \
|
||||
vcipher(x2, x2, 6) \
|
||||
vcipher(x3, x3, 6) \
|
||||
vcipher(x0, x0, 7) \
|
||||
vcipher(x1, x1, 7) \
|
||||
vcipher(x2, x2, 7) \
|
||||
vcipher(x3, x3, 7) \
|
||||
vcipher(x0, x0, 8) \
|
||||
vcipher(x1, x1, 8) \
|
||||
vcipher(x2, x2, 8) \
|
||||
vcipher(x3, x3, 8) \
|
||||
vcipher(x0, x0, 9) \
|
||||
vcipher(x1, x1, 9) \
|
||||
vcipher(x2, x2, 9) \
|
||||
vcipher(x3, x3, 9) \
|
||||
vcipher(x0, x0, 10) \
|
||||
vcipher(x1, x1, 10) \
|
||||
vcipher(x2, x2, 10) \
|
||||
vcipher(x3, x3, 10) \
|
||||
vcipher(x0, x0, 11) \
|
||||
vcipher(x1, x1, 11) \
|
||||
vcipher(x2, x2, 11) \
|
||||
vcipher(x3, x3, 11) \
|
||||
vcipherlast(x0, x0, 12) \
|
||||
vcipherlast(x1, x1, 12) \
|
||||
vcipherlast(x2, x2, 12) \
|
||||
vcipherlast(x3, x3, 12)
|
||||
|
||||
#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
|
||||
vxor(x0, x0, 0) \
|
||||
vxor(x1, x1, 0) \
|
||||
vxor(x2, x2, 0) \
|
||||
vxor(x3, x3, 0) \
|
||||
vcipher(x0, x0, 1) \
|
||||
vcipher(x1, x1, 1) \
|
||||
vcipher(x2, x2, 1) \
|
||||
vcipher(x3, x3, 1) \
|
||||
vcipher(x0, x0, 2) \
|
||||
vcipher(x1, x1, 2) \
|
||||
vcipher(x2, x2, 2) \
|
||||
vcipher(x3, x3, 2) \
|
||||
vcipher(x0, x0, 3) \
|
||||
vcipher(x1, x1, 3) \
|
||||
vcipher(x2, x2, 3) \
|
||||
vcipher(x3, x3, 3) \
|
||||
vcipher(x0, x0, 4) \
|
||||
vcipher(x1, x1, 4) \
|
||||
vcipher(x2, x2, 4) \
|
||||
vcipher(x3, x3, 4) \
|
||||
vcipher(x0, x0, 5) \
|
||||
vcipher(x1, x1, 5) \
|
||||
vcipher(x2, x2, 5) \
|
||||
vcipher(x3, x3, 5) \
|
||||
vcipher(x0, x0, 6) \
|
||||
vcipher(x1, x1, 6) \
|
||||
vcipher(x2, x2, 6) \
|
||||
vcipher(x3, x3, 6) \
|
||||
vcipher(x0, x0, 7) \
|
||||
vcipher(x1, x1, 7) \
|
||||
vcipher(x2, x2, 7) \
|
||||
vcipher(x3, x3, 7) \
|
||||
vcipher(x0, x0, 8) \
|
||||
vcipher(x1, x1, 8) \
|
||||
vcipher(x2, x2, 8) \
|
||||
vcipher(x3, x3, 8) \
|
||||
vcipher(x0, x0, 9) \
|
||||
vcipher(x1, x1, 9) \
|
||||
vcipher(x2, x2, 9) \
|
||||
vcipher(x3, x3, 9) \
|
||||
vcipher(x0, x0, 10) \
|
||||
vcipher(x1, x1, 10) \
|
||||
vcipher(x2, x2, 10) \
|
||||
vcipher(x3, x3, 10) \
|
||||
vcipher(x0, x0, 11) \
|
||||
vcipher(x1, x1, 11) \
|
||||
vcipher(x2, x2, 11) \
|
||||
vcipher(x3, x3, 11) \
|
||||
vcipher(x0, x0, 12) \
|
||||
vcipher(x1, x1, 12) \
|
||||
vcipher(x2, x2, 12) \
|
||||
vcipher(x3, x3, 12) \
|
||||
vcipher(x0, x0, 13) \
|
||||
vcipher(x1, x1, 13) \
|
||||
vcipher(x2, x2, 13) \
|
||||
vcipher(x3, x3, 13) \
|
||||
vcipherlast(x0, x0, 14) \
|
||||
vcipherlast(x1, x1, 14) \
|
||||
vcipherlast(x2, x2, 14) \
|
||||
vcipherlast(x3, x3, 14)
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#define BYTESWAP_INIT lxvw4x(47, 0, %[idx2be])
|
||||
#define BYTESWAP(x) vperm(x, x, x, 15)
|
||||
#define BYTESWAPX(d, s) vperm(d, s, s, 15)
|
||||
#define BYTESWAP_REG , [idx2be] "b" (idx2be)
|
||||
#else
|
||||
#define BYTESWAP_INIT
|
||||
#define BYTESWAP(x)
|
||||
#define BYTESWAPX(d, s) vand(d, s, s)
|
||||
#define BYTESWAP_REG
|
||||
#endif
|
||||
|
||||
static const uint32_t ctrinc[] = {
|
||||
0, 0, 0, 1
|
||||
};
|
||||
static const uint32_t ctrinc_x4[] = {
|
||||
0, 0, 0, 4
|
||||
};
|
||||
#define INCR_128_INIT lxvw4x(60, 0, %[ctrinc])
|
||||
#define INCR_128_X4_INIT lxvw4x(60, 0, %[ctrinc_x4])
|
||||
#define INCR_128(d, s) \
|
||||
vaddcuw(29, s, 28) \
|
||||
vadduwm(d, s, 28) \
|
||||
vsldoi(30, 29, 29, 4) \
|
||||
vaddcuw(29, d, 30) \
|
||||
vadduwm(d, d, 30) \
|
||||
vsldoi(30, 29, 29, 4) \
|
||||
vaddcuw(29, d, 30) \
|
||||
vadduwm(d, d, 30) \
|
||||
vsldoi(30, 29, 29, 4) \
|
||||
vadduwm(d, d, 30)
|
||||
|
||||
#define MKCTR(size) \
|
||||
static void \
|
||||
ctr_ ## size(const unsigned char *sk, \
|
||||
unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
|
||||
{ \
|
||||
long cc, cc0, cc1, cc2, cc3; \
|
||||
\
|
||||
cc = 0; \
|
||||
cc0 = 0; \
|
||||
cc1 = 16; \
|
||||
cc2 = 32; \
|
||||
cc3 = 48; \
|
||||
asm volatile ( \
|
||||
\
|
||||
/* \
|
||||
* Load subkeys into v0..v10 \
|
||||
*/ \
|
||||
LOAD_SUBKEYS_ ## size \
|
||||
li(%[cc], 0) \
|
||||
\
|
||||
BYTESWAP_INIT \
|
||||
INCR_128_X4_INIT \
|
||||
\
|
||||
/* \
|
||||
* Load current CTR counters into v16 to v19. \
|
||||
*/ \
|
||||
lxvw4x(48, %[cc0], %[ctrbuf]) \
|
||||
lxvw4x(49, %[cc1], %[ctrbuf]) \
|
||||
lxvw4x(50, %[cc2], %[ctrbuf]) \
|
||||
lxvw4x(51, %[cc3], %[ctrbuf]) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
BYTESWAP(18) \
|
||||
BYTESWAP(19) \
|
||||
\
|
||||
mtctr(%[num_blocks_x4]) \
|
||||
\
|
||||
label(loop) \
|
||||
/* \
|
||||
* Compute next counter values into v20..v23. \
|
||||
*/ \
|
||||
INCR_128(20, 16) \
|
||||
INCR_128(21, 17) \
|
||||
INCR_128(22, 18) \
|
||||
INCR_128(23, 19) \
|
||||
\
|
||||
/* \
|
||||
* Encrypt counter values and XOR into next data blocks. \
|
||||
*/ \
|
||||
lxvw4x(56, %[cc0], %[buf]) \
|
||||
lxvw4x(57, %[cc1], %[buf]) \
|
||||
lxvw4x(58, %[cc2], %[buf]) \
|
||||
lxvw4x(59, %[cc3], %[buf]) \
|
||||
BYTESWAP(24) \
|
||||
BYTESWAP(25) \
|
||||
BYTESWAP(26) \
|
||||
BYTESWAP(27) \
|
||||
BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
|
||||
vxor(16, 16, 24) \
|
||||
vxor(17, 17, 25) \
|
||||
vxor(18, 18, 26) \
|
||||
vxor(19, 19, 27) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
BYTESWAP(18) \
|
||||
BYTESWAP(19) \
|
||||
stxvw4x(48, %[cc0], %[buf]) \
|
||||
stxvw4x(49, %[cc1], %[buf]) \
|
||||
stxvw4x(50, %[cc2], %[buf]) \
|
||||
stxvw4x(51, %[cc3], %[buf]) \
|
||||
\
|
||||
/* \
|
||||
* Update counters and data pointer. \
|
||||
*/ \
|
||||
vand(16, 20, 20) \
|
||||
vand(17, 21, 21) \
|
||||
vand(18, 22, 22) \
|
||||
vand(19, 23, 23) \
|
||||
addi(%[buf], %[buf], 64) \
|
||||
\
|
||||
bdnz(loop) \
|
||||
\
|
||||
/* \
|
||||
* Write back new counter values. \
|
||||
*/ \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
BYTESWAP(18) \
|
||||
BYTESWAP(19) \
|
||||
stxvw4x(48, %[cc0], %[ctrbuf]) \
|
||||
stxvw4x(49, %[cc1], %[ctrbuf]) \
|
||||
stxvw4x(50, %[cc2], %[ctrbuf]) \
|
||||
stxvw4x(51, %[cc3], %[ctrbuf]) \
|
||||
\
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf), \
|
||||
[cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
|
||||
: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
|
||||
[num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
|
||||
BYTESWAP_REG \
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
|
||||
"v30", "ctr", "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
MKCTR(128)
|
||||
MKCTR(192)
|
||||
MKCTR(256)
|
||||
|
||||
#define MKCBCMAC(size) \
|
||||
static void \
|
||||
cbcmac_ ## size(const unsigned char *sk, \
|
||||
unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
|
||||
{ \
|
||||
long cc; \
|
||||
\
|
||||
cc = 0; \
|
||||
asm volatile ( \
|
||||
\
|
||||
/* \
|
||||
* Load subkeys into v0..v10 \
|
||||
*/ \
|
||||
LOAD_SUBKEYS_ ## size \
|
||||
li(%[cc], 0) \
|
||||
\
|
||||
BYTESWAP_INIT \
|
||||
\
|
||||
/* \
|
||||
* Load current CBC-MAC value into v16. \
|
||||
*/ \
|
||||
lxvw4x(48, %[cc], %[cbcmac]) \
|
||||
BYTESWAP(16) \
|
||||
\
|
||||
mtctr(%[num_blocks]) \
|
||||
\
|
||||
label(loop) \
|
||||
/* \
|
||||
* Load next block, XOR into current CBC-MAC value, \
|
||||
* and then encrypt it. \
|
||||
*/ \
|
||||
lxvw4x(49, %[cc], %[buf]) \
|
||||
BYTESWAP(17) \
|
||||
vxor(16, 16, 17) \
|
||||
BLOCK_ENCRYPT_ ## size(16) \
|
||||
addi(%[buf], %[buf], 16) \
|
||||
\
|
||||
bdnz(loop) \
|
||||
\
|
||||
/* \
|
||||
* Write back new CBC-MAC value. \
|
||||
*/ \
|
||||
BYTESWAP(16) \
|
||||
stxvw4x(48, %[cc], %[cbcmac]) \
|
||||
\
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf) \
|
||||
: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
|
||||
BYTESWAP_REG \
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
|
||||
"v30", "ctr", "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
MKCBCMAC(128)
|
||||
MKCBCMAC(192)
|
||||
MKCBCMAC(256)
|
||||
|
||||
#define MKENCRYPT(size) \
|
||||
static void \
|
||||
ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
|
||||
unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
|
||||
size_t num_blocks) \
|
||||
{ \
|
||||
long cc; \
|
||||
\
|
||||
cc = 0; \
|
||||
asm volatile ( \
|
||||
\
|
||||
/* \
|
||||
* Load subkeys into v0..v10 \
|
||||
*/ \
|
||||
LOAD_SUBKEYS_ ## size \
|
||||
li(%[cc], 0) \
|
||||
\
|
||||
BYTESWAP_INIT \
|
||||
INCR_128_INIT \
|
||||
\
|
||||
/* \
|
||||
* Load current CTR counter into v16, and current \
|
||||
* CBC-MAC IV into v17. \
|
||||
*/ \
|
||||
lxvw4x(48, %[cc], %[ctr]) \
|
||||
lxvw4x(49, %[cc], %[cbcmac]) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
\
|
||||
/* \
|
||||
* At each iteration, we do two parallel encryption: \
|
||||
* - new counter value for encryption of the next block; \
|
||||
* - CBC-MAC over the previous encrypted block. \
|
||||
* Thus, each plaintext block implies two AES instances, \
|
||||
* over two successive iterations. This requires a single \
|
||||
* counter encryption before the loop, and a single \
|
||||
* CBC-MAC encryption after the loop. \
|
||||
*/ \
|
||||
\
|
||||
/* \
|
||||
* Encrypt first block (into v20). \
|
||||
*/ \
|
||||
lxvw4x(52, %[cc], %[buf]) \
|
||||
BYTESWAP(20) \
|
||||
INCR_128(22, 16) \
|
||||
BLOCK_ENCRYPT_ ## size(16) \
|
||||
vxor(20, 20, 16) \
|
||||
BYTESWAPX(21, 20) \
|
||||
stxvw4x(53, %[cc], %[buf]) \
|
||||
vand(16, 22, 22) \
|
||||
addi(%[buf], %[buf], 16) \
|
||||
\
|
||||
/* \
|
||||
* Load loop counter; skip the loop if there is only \
|
||||
* one block in total (already handled by the boundary \
|
||||
* conditions). \
|
||||
*/ \
|
||||
mtctr(%[num_blocks]) \
|
||||
bdz(fastexit) \
|
||||
\
|
||||
label(loop) \
|
||||
/* \
|
||||
* Upon loop entry: \
|
||||
* v16 counter value for next block \
|
||||
* v17 current CBC-MAC value \
|
||||
* v20 encrypted previous block \
|
||||
*/ \
|
||||
vxor(17, 17, 20) \
|
||||
INCR_128(22, 16) \
|
||||
lxvw4x(52, %[cc], %[buf]) \
|
||||
BYTESWAP(20) \
|
||||
BLOCK_ENCRYPT_X2_ ## size(16, 17) \
|
||||
vxor(20, 20, 16) \
|
||||
BYTESWAPX(21, 20) \
|
||||
stxvw4x(53, %[cc], %[buf]) \
|
||||
addi(%[buf], %[buf], 16) \
|
||||
vand(16, 22, 22) \
|
||||
\
|
||||
bdnz(loop) \
|
||||
\
|
||||
label(fastexit) \
|
||||
vxor(17, 17, 20) \
|
||||
BLOCK_ENCRYPT_ ## size(17) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
stxvw4x(48, %[cc], %[ctr]) \
|
||||
stxvw4x(49, %[cc], %[cbcmac]) \
|
||||
\
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf) \
|
||||
: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
|
||||
[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
|
||||
BYTESWAP_REG \
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
|
||||
"v30", "ctr", "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
MKENCRYPT(128)
|
||||
MKENCRYPT(192)
|
||||
MKENCRYPT(256)
|
||||
|
||||
#define MKDECRYPT(size) \
|
||||
static void \
|
||||
ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
|
||||
unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
|
||||
size_t num_blocks) \
|
||||
{ \
|
||||
long cc; \
|
||||
\
|
||||
cc = 0; \
|
||||
asm volatile ( \
|
||||
\
|
||||
/* \
|
||||
* Load subkeys into v0..v10 \
|
||||
*/ \
|
||||
LOAD_SUBKEYS_ ## size \
|
||||
li(%[cc], 0) \
|
||||
\
|
||||
BYTESWAP_INIT \
|
||||
INCR_128_INIT \
|
||||
\
|
||||
/* \
|
||||
* Load current CTR counter into v16, and current \
|
||||
* CBC-MAC IV into v17. \
|
||||
*/ \
|
||||
lxvw4x(48, %[cc], %[ctr]) \
|
||||
lxvw4x(49, %[cc], %[cbcmac]) \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
\
|
||||
/* \
|
||||
* At each iteration, we do two parallel encryption: \
|
||||
* - new counter value for decryption of the next block; \
|
||||
* - CBC-MAC over the next encrypted block. \
|
||||
* Each iteration performs the two AES instances related \
|
||||
* to the current block; there is thus no need for some \
|
||||
* extra pre-loop and post-loop work as in encryption. \
|
||||
*/ \
|
||||
\
|
||||
mtctr(%[num_blocks]) \
|
||||
\
|
||||
label(loop) \
|
||||
/* \
|
||||
* Upon loop entry: \
|
||||
* v16 counter value for next block \
|
||||
* v17 current CBC-MAC value \
|
||||
*/ \
|
||||
lxvw4x(52, %[cc], %[buf]) \
|
||||
BYTESWAP(20) \
|
||||
vxor(17, 17, 20) \
|
||||
INCR_128(22, 16) \
|
||||
BLOCK_ENCRYPT_X2_ ## size(16, 17) \
|
||||
vxor(20, 20, 16) \
|
||||
BYTESWAPX(21, 20) \
|
||||
stxvw4x(53, %[cc], %[buf]) \
|
||||
addi(%[buf], %[buf], 16) \
|
||||
vand(16, 22, 22) \
|
||||
\
|
||||
bdnz(loop) \
|
||||
\
|
||||
/* \
|
||||
* Store back counter and CBC-MAC value. \
|
||||
*/ \
|
||||
BYTESWAP(16) \
|
||||
BYTESWAP(17) \
|
||||
stxvw4x(48, %[cc], %[ctr]) \
|
||||
stxvw4x(49, %[cc], %[cbcmac]) \
|
||||
\
|
||||
: [cc] "+b" (cc), [buf] "+b" (buf) \
|
||||
: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
|
||||
[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
|
||||
BYTESWAP_REG \
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
|
||||
"v30", "ctr", "memory" \
|
||||
); \
|
||||
}
|
||||
|
||||
MKDECRYPT(128)
|
||||
MKDECRYPT(192)
|
||||
MKDECRYPT(256)
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
case 12:
|
||||
ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
default:
|
||||
ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *cbcmac, void *data, size_t len)
|
||||
{
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
case 12:
|
||||
ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
default:
|
||||
ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
incr_ctr(void *dst, const void *src)
|
||||
{
|
||||
uint64_t hi, lo;
|
||||
|
||||
hi = br_dec64be(src);
|
||||
lo = br_dec64be((const unsigned char *)src + 8);
|
||||
lo ++;
|
||||
hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
|
||||
br_enc64be(dst, hi);
|
||||
br_enc64be((unsigned char *)dst + 8, lo);
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *ctr, void *data, size_t len)
|
||||
{
|
||||
unsigned char ctrbuf[64];
|
||||
|
||||
memcpy(ctrbuf, ctr, 16);
|
||||
incr_ctr(ctrbuf + 16, ctrbuf);
|
||||
incr_ctr(ctrbuf + 32, ctrbuf + 16);
|
||||
incr_ctr(ctrbuf + 48, ctrbuf + 32);
|
||||
if (len >= 64) {
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
|
||||
break;
|
||||
case 12:
|
||||
ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
|
||||
break;
|
||||
default:
|
||||
ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
|
||||
break;
|
||||
}
|
||||
data = (unsigned char *)data + (len & ~(size_t)63);
|
||||
len &= 63;
|
||||
}
|
||||
if (len > 0) {
|
||||
unsigned char tmp[64];
|
||||
|
||||
if (len >= 32) {
|
||||
if (len >= 48) {
|
||||
memcpy(ctr, ctrbuf + 48, 16);
|
||||
} else {
|
||||
memcpy(ctr, ctrbuf + 32, 16);
|
||||
}
|
||||
} else {
|
||||
if (len >= 16) {
|
||||
memcpy(ctr, ctrbuf + 16, 16);
|
||||
}
|
||||
}
|
||||
memcpy(tmp, data, len);
|
||||
memset(tmp + len, 0, (sizeof tmp) - len);
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
|
||||
break;
|
||||
case 12:
|
||||
ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
|
||||
break;
|
||||
default:
|
||||
ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
|
||||
break;
|
||||
}
|
||||
memcpy(data, tmp, len);
|
||||
} else {
|
||||
memcpy(ctr, ctrbuf, 16);
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
void
|
||||
br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
|
||||
void *cbcmac, const void *data, size_t len)
|
||||
{
|
||||
if (len > 0) {
|
||||
switch (ctx->num_rounds) {
|
||||
case 10:
|
||||
cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
|
||||
break;
|
||||
case 12:
|
||||
cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
|
||||
break;
|
||||
default:
|
||||
cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
|
||||
sizeof(br_aes_pwr8_ctrcbc_keys),
|
||||
16,
|
||||
4,
|
||||
(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_init,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_encrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_decrypt,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_ctr,
|
||||
(void (*)(const br_block_ctrcbc_class *const *,
|
||||
void *, const void *, size_t))
|
||||
&br_aes_pwr8_ctrcbc_mac
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_block.h */
|
||||
const br_block_ctrcbc_class *
|
||||
br_aes_pwr8_ctrcbc_get_vtable(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1,411 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#define BR_POWER_ASM_MACROS 1
|
||||
#include "inner.h"
|
||||
|
||||
/*
|
||||
* This is the GHASH implementation that leverages the POWER8 opcodes.
|
||||
*/
|
||||
|
||||
#if BR_POWER8
|
||||
|
||||
/*
|
||||
* Some symbolic names for registers.
|
||||
* HB0 = 16 bytes of value 0
|
||||
* HB1 = 16 bytes of value 1
|
||||
* HB2 = 16 bytes of value 2
|
||||
* HB6 = 16 bytes of value 6
|
||||
* HB7 = 16 bytes of value 7
|
||||
* TT0, TT1 and TT2 are temporaries
|
||||
*
|
||||
* BSW holds the pattern for byteswapping 32-bit words; this is set only
|
||||
* on little-endian systems. XBSW is the same register with the +32 offset
|
||||
* for access with the VSX opcodes.
|
||||
*/
|
||||
#define HB0 0
|
||||
#define HB1 1
|
||||
#define HB2 2
|
||||
#define HB6 3
|
||||
#define HB7 4
|
||||
#define TT0 5
|
||||
#define TT1 6
|
||||
#define TT2 7
|
||||
|
||||
#define BSW 8
|
||||
#define XBSW 40
|
||||
|
||||
/*
|
||||
* Macro to initialise the constants.
|
||||
*/
|
||||
#define INIT \
|
||||
vxor(HB0, HB0, HB0) \
|
||||
vspltisb(HB1, 1) \
|
||||
vspltisb(HB2, 2) \
|
||||
vspltisb(HB6, 6) \
|
||||
vspltisb(HB7, 7) \
|
||||
INIT_BSW
|
||||
|
||||
/*
|
||||
* Fix endianness of a value after reading it or before writing it, if
|
||||
* necessary.
|
||||
*/
|
||||
#if BR_POWER8_LE
|
||||
#define INIT_BSW lxvw4x(XBSW, 0, %[idx2be])
|
||||
#define FIX_ENDIAN(xx) vperm(xx, xx, xx, BSW)
|
||||
#else
|
||||
#define INIT_BSW
|
||||
#define FIX_ENDIAN(xx)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Left-shift x0:x1 by one bit to the left. This is a corrective action
|
||||
* needed because GHASH is defined in full little-endian specification,
|
||||
* while the opcodes use full big-endian convention, so the 255-bit product
|
||||
* ends up one bit to the right.
|
||||
*/
|
||||
#define SL_256(x0, x1) \
|
||||
vsldoi(TT0, HB0, x1, 1) \
|
||||
vsl(x0, x0, HB1) \
|
||||
vsr(TT0, TT0, HB7) \
|
||||
vsl(x1, x1, HB1) \
|
||||
vxor(x0, x0, TT0)
|
||||
|
||||
/*
|
||||
* Reduce x0:x1 in GF(2^128), result in xd (register xd may be the same as
|
||||
* x0 or x1, or a different register). x0 and x1 are modified.
|
||||
*/
|
||||
#define REDUCE_F128(xd, x0, x1) \
|
||||
vxor(x0, x0, x1) \
|
||||
vsr(TT0, x1, HB1) \
|
||||
vsr(TT1, x1, HB2) \
|
||||
vsr(TT2, x1, HB7) \
|
||||
vxor(x0, x0, TT0) \
|
||||
vxor(TT1, TT1, TT2) \
|
||||
vxor(x0, x0, TT1) \
|
||||
vsldoi(x1, x1, HB0, 15) \
|
||||
vsl(TT1, x1, HB6) \
|
||||
vsl(TT2, x1, HB1) \
|
||||
vxor(x1, TT1, TT2) \
|
||||
vsr(TT0, x1, HB1) \
|
||||
vsr(TT1, x1, HB2) \
|
||||
vsr(TT2, x1, HB7) \
|
||||
vxor(x0, x0, x1) \
|
||||
vxor(x0, x0, TT0) \
|
||||
vxor(TT1, TT1, TT2) \
|
||||
vxor(xd, x0, TT1)
|
||||
|
||||
/* see bearssl_hash.h */
|
||||
void
|
||||
br_ghash_pwr8(void *y, const void *h, const void *data, size_t len)
|
||||
{
|
||||
const unsigned char *buf1, *buf2;
|
||||
size_t num4, num1;
|
||||
unsigned char tmp[64];
|
||||
long cc0, cc1, cc2, cc3;
|
||||
|
||||
#if BR_POWER8_LE
|
||||
static const uint32_t idx2be[] = {
|
||||
0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
|
||||
};
|
||||
#endif
|
||||
|
||||
buf1 = data;
|
||||
|
||||
/*
|
||||
* Assembly code requires data into two chunks; first chunk
|
||||
* must contain a number of blocks which is a multiple of 4.
|
||||
* Since the processing for the first chunk is faster, we want
|
||||
* to make it as big as possible.
|
||||
*
|
||||
* For the remainder, there are two possibilities:
|
||||
* -- if the remainder size is a multiple of 16, then use it
|
||||
* in place;
|
||||
* -- otherwise, copy it to the tmp[] array and pad it with
|
||||
* zeros.
|
||||
*/
|
||||
num4 = len >> 6;
|
||||
buf2 = buf1 + (num4 << 6);
|
||||
len &= 63;
|
||||
num1 = (len + 15) >> 4;
|
||||
if ((len & 15) != 0) {
|
||||
memcpy(tmp, buf2, len);
|
||||
memset(tmp + len, 0, (num1 << 4) - len);
|
||||
buf2 = tmp;
|
||||
}
|
||||
|
||||
cc0 = 0;
|
||||
cc1 = 16;
|
||||
cc2 = 32;
|
||||
cc3 = 48;
|
||||
asm volatile (
|
||||
INIT
|
||||
|
||||
/*
|
||||
* Load current h (denoted hereafter h1) in v9.
|
||||
*/
|
||||
lxvw4x(41, 0, %[h])
|
||||
FIX_ENDIAN(9)
|
||||
|
||||
/*
|
||||
* Load current y into v28.
|
||||
*/
|
||||
lxvw4x(60, 0, %[y])
|
||||
FIX_ENDIAN(28)
|
||||
|
||||
/*
|
||||
* Split h1 into three registers:
|
||||
* v17 = h1_1:h1_0
|
||||
* v18 = 0:h1_0
|
||||
* v19 = h1_1:0
|
||||
*/
|
||||
xxpermdi(49, 41, 41, 2)
|
||||
vsldoi(18, HB0, 9, 8)
|
||||
vsldoi(19, 9, HB0, 8)
|
||||
|
||||
/*
|
||||
* If num4 is 0, skip directly to the second chunk.
|
||||
*/
|
||||
cmpldi(%[num4], 0)
|
||||
beq(chunk1)
|
||||
|
||||
/*
|
||||
* Compute h2 = h*h in v10.
|
||||
*/
|
||||
vpmsumd(10, 18, 18)
|
||||
vpmsumd(11, 19, 19)
|
||||
SL_256(10, 11)
|
||||
REDUCE_F128(10, 10, 11)
|
||||
|
||||
/*
|
||||
* Compute h3 = h*h*h in v11.
|
||||
* We first split h2 into:
|
||||
* v10 = h2_0:h2_1
|
||||
* v11 = 0:h2_0
|
||||
* v12 = h2_1:0
|
||||
* Then we do the product with h1, and reduce into v11.
|
||||
*/
|
||||
vsldoi(11, HB0, 10, 8)
|
||||
vsldoi(12, 10, HB0, 8)
|
||||
vpmsumd(13, 10, 17)
|
||||
vpmsumd(11, 11, 18)
|
||||
vpmsumd(12, 12, 19)
|
||||
vsldoi(14, HB0, 13, 8)
|
||||
vsldoi(15, 13, HB0, 8)
|
||||
vxor(11, 11, 14)
|
||||
vxor(12, 12, 15)
|
||||
SL_256(11, 12)
|
||||
REDUCE_F128(11, 11, 12)
|
||||
|
||||
/*
|
||||
* Compute h4 = h*h*h*h in v12. This is done by squaring h2.
|
||||
*/
|
||||
vsldoi(12, HB0, 10, 8)
|
||||
vsldoi(13, 10, HB0, 8)
|
||||
vpmsumd(12, 12, 12)
|
||||
vpmsumd(13, 13, 13)
|
||||
SL_256(12, 13)
|
||||
REDUCE_F128(12, 12, 13)
|
||||
|
||||
/*
|
||||
* Repack h1, h2, h3 and h4:
|
||||
* v13 = h4_0:h3_0
|
||||
* v14 = h4_1:h3_1
|
||||
* v15 = h2_0:h1_0
|
||||
* v16 = h2_1:h1_1
|
||||
*/
|
||||
xxpermdi(45, 44, 43, 0)
|
||||
xxpermdi(46, 44, 43, 3)
|
||||
xxpermdi(47, 42, 41, 0)
|
||||
xxpermdi(48, 42, 41, 3)
|
||||
|
||||
/*
|
||||
* Loop for each group of four blocks.
|
||||
*/
|
||||
mtctr(%[num4])
|
||||
label(loop4)
|
||||
/*
|
||||
* Read the four next blocks.
|
||||
* v20 = y + a0 = b0
|
||||
* v21 = a1 = b1
|
||||
* v22 = a2 = b2
|
||||
* v23 = a3 = b3
|
||||
*/
|
||||
lxvw4x(52, %[cc0], %[buf1])
|
||||
lxvw4x(53, %[cc1], %[buf1])
|
||||
lxvw4x(54, %[cc2], %[buf1])
|
||||
lxvw4x(55, %[cc3], %[buf1])
|
||||
FIX_ENDIAN(20)
|
||||
FIX_ENDIAN(21)
|
||||
FIX_ENDIAN(22)
|
||||
FIX_ENDIAN(23)
|
||||
addi(%[buf1], %[buf1], 64)
|
||||
vxor(20, 20, 28)
|
||||
|
||||
/*
|
||||
* Repack the blocks into v9, v10, v11 and v12.
|
||||
* v9 = b0_0:b1_0
|
||||
* v10 = b0_1:b1_1
|
||||
* v11 = b2_0:b3_0
|
||||
* v12 = b2_1:b3_1
|
||||
*/
|
||||
xxpermdi(41, 52, 53, 0)
|
||||
xxpermdi(42, 52, 53, 3)
|
||||
xxpermdi(43, 54, 55, 0)
|
||||
xxpermdi(44, 54, 55, 3)
|
||||
|
||||
/*
|
||||
* Compute the products.
|
||||
* v20 = b0_0*h4_0 + b1_0*h3_0
|
||||
* v21 = b0_1*h4_0 + b1_1*h3_0
|
||||
* v22 = b0_0*h4_1 + b1_0*h3_1
|
||||
* v23 = b0_1*h4_1 + b1_1*h3_1
|
||||
* v24 = b2_0*h2_0 + b3_0*h1_0
|
||||
* v25 = b2_1*h2_0 + b3_1*h1_0
|
||||
* v26 = b2_0*h2_1 + b3_0*h1_1
|
||||
* v27 = b2_1*h2_1 + b3_1*h1_1
|
||||
*/
|
||||
vpmsumd(20, 13, 9)
|
||||
vpmsumd(21, 13, 10)
|
||||
vpmsumd(22, 14, 9)
|
||||
vpmsumd(23, 14, 10)
|
||||
vpmsumd(24, 15, 11)
|
||||
vpmsumd(25, 15, 12)
|
||||
vpmsumd(26, 16, 11)
|
||||
vpmsumd(27, 16, 12)
|
||||
|
||||
/*
|
||||
* Sum products into a single 256-bit result in v11:v12.
|
||||
*/
|
||||
vxor(11, 20, 24)
|
||||
vxor(12, 23, 27)
|
||||
vxor( 9, 21, 22)
|
||||
vxor(10, 25, 26)
|
||||
vxor(20, 9, 10)
|
||||
vsldoi( 9, HB0, 20, 8)
|
||||
vsldoi(10, 20, HB0, 8)
|
||||
vxor(11, 11, 9)
|
||||
vxor(12, 12, 10)
|
||||
|
||||
/*
|
||||
* Fix and reduce in GF(2^128); this is the new y (in v28).
|
||||
*/
|
||||
SL_256(11, 12)
|
||||
REDUCE_F128(28, 11, 12)
|
||||
|
||||
/*
|
||||
* Loop for next group of four blocks.
|
||||
*/
|
||||
bdnz(loop4)
|
||||
|
||||
/*
|
||||
* Process second chunk, one block at a time.
|
||||
*/
|
||||
label(chunk1)
|
||||
cmpldi(%[num1], 0)
|
||||
beq(done)
|
||||
|
||||
mtctr(%[num1])
|
||||
label(loop1)
|
||||
/*
|
||||
* Load next data block and XOR it into y.
|
||||
*/
|
||||
lxvw4x(41, 0, %[buf2])
|
||||
#if BR_POWER8_LE
|
||||
FIX_ENDIAN(9)
|
||||
#endif
|
||||
addi(%[buf2], %[buf2], 16)
|
||||
vxor(9, 28, 9)
|
||||
|
||||
/*
|
||||
* Split y into doublewords:
|
||||
* v9 = y_0:y_1
|
||||
* v10 = 0:y_0
|
||||
* v11 = y_1:0
|
||||
*/
|
||||
vsldoi(10, HB0, 9, 8)
|
||||
vsldoi(11, 9, HB0, 8)
|
||||
|
||||
/*
|
||||
* Compute products with h:
|
||||
* v12 = y_0 * h_0
|
||||
* v13 = y_1 * h_1
|
||||
* v14 = y_1 * h_0 + y_0 * h_1
|
||||
*/
|
||||
vpmsumd(14, 9, 17)
|
||||
vpmsumd(12, 10, 18)
|
||||
vpmsumd(13, 11, 19)
|
||||
|
||||
/*
|
||||
* Propagate v14 into v12:v13 to finalise product.
|
||||
*/
|
||||
vsldoi(10, HB0, 14, 8)
|
||||
vsldoi(11, 14, HB0, 8)
|
||||
vxor(12, 12, 10)
|
||||
vxor(13, 13, 11)
|
||||
|
||||
/*
|
||||
* Fix result and reduce into v28 (next value for y).
|
||||
*/
|
||||
SL_256(12, 13)
|
||||
REDUCE_F128(28, 12, 13)
|
||||
bdnz(loop1)
|
||||
|
||||
label(done)
|
||||
/*
|
||||
* Write back the new y.
|
||||
*/
|
||||
FIX_ENDIAN(28)
|
||||
stxvw4x(60, 0, %[y])
|
||||
|
||||
: [buf1] "+b" (buf1), [buf2] "+b" (buf2)
|
||||
: [y] "b" (y), [h] "b" (h), [num4] "b" (num4), [num1] "b" (num1),
|
||||
[cc0] "b" (cc0), [cc1] "b" (cc1), [cc2] "b" (cc2), [cc3] "b" (cc3)
|
||||
#if BR_POWER8_LE
|
||||
, [idx2be] "b" (idx2be)
|
||||
#endif
|
||||
: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
|
||||
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
|
||||
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"ctr", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
/* see bearssl_hash.h */
|
||||
br_ghash
|
||||
br_ghash_pwr8_get(void)
|
||||
{
|
||||
return &br_ghash_pwr8;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* see bearssl_hash.h */
|
||||
br_ghash
|
||||
br_ghash_pwr8_get(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -1913,19 +1913,6 @@ unsigned br_aes_x86ni_keysched_enc(unsigned char *skni,
|
|||
unsigned br_aes_x86ni_keysched_dec(unsigned char *skni,
|
||||
const void *key, size_t len);
|
||||
|
||||
/*
|
||||
* Test support for AES POWER8 opcodes.
|
||||
*/
|
||||
int br_aes_pwr8_supported(void);
|
||||
|
||||
/*
|
||||
* AES key schedule, using POWER8 instructions. This yields the
|
||||
* subkeys in the encryption direction. Number of rounds is returned.
|
||||
* Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
|
||||
*/
|
||||
unsigned br_aes_pwr8_keysched(unsigned char *skni,
|
||||
const void *key, size_t len);
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* RSA.
|
||||
|
|
|
|||
|
|
@ -229,13 +229,6 @@ static const br_config_option config[] = {
|
|||
1
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
},
|
||||
{ "BR_POWER8",
|
||||
#if BR_POWER8
|
||||
1
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
},
|
||||
{ "BR_RDRAND",
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
void
|
||||
br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc)
|
||||
{
|
||||
#if BR_AES_X86NI || BR_POWER8
|
||||
#if BR_AES_X86NI
|
||||
const br_block_cbcenc_class *ienc;
|
||||
const br_block_cbcdec_class *idec;
|
||||
#endif
|
||||
|
|
@ -44,14 +44,6 @@ br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc)
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
#if BR_POWER8
|
||||
ienc = br_aes_pwr8_cbcenc_get_vtable();
|
||||
idec = br_aes_pwr8_cbcdec_get_vtable();
|
||||
if (ienc != NULL && idec != NULL) {
|
||||
br_ssl_engine_set_aes_cbc(cc, ienc, idec);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if BR_64
|
||||
br_ssl_engine_set_aes_cbc(cc,
|
||||
&br_aes_ct64_cbcenc_vtable,
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
void
|
||||
br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc)
|
||||
{
|
||||
#if BR_AES_X86NI || BR_POWER8
|
||||
#if BR_AES_X86NI
|
||||
const br_block_ctrcbc_class *ictrcbc;
|
||||
#endif
|
||||
|
||||
|
|
@ -46,17 +46,6 @@ br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc)
|
|||
br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
|
||||
#endif
|
||||
}
|
||||
#elif BR_POWER8
|
||||
ictrcbc = br_aes_pwr8_ctrcbc_get_vtable();
|
||||
if (ictrcbc != NULL) {
|
||||
br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc);
|
||||
} else {
|
||||
#if BR_64
|
||||
br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
|
||||
#else
|
||||
br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#if BR_64
|
||||
br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
void
|
||||
br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
|
||||
{
|
||||
#if BR_AES_X86NI || BR_POWER8
|
||||
#if BR_AES_X86NI
|
||||
const br_block_ctr_class *ictr;
|
||||
br_ghash ighash;
|
||||
#endif
|
||||
|
|
@ -47,17 +47,6 @@ br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
|
|||
br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
|
||||
#endif
|
||||
}
|
||||
#elif BR_POWER8
|
||||
ictr = br_aes_pwr8_ctr_get_vtable();
|
||||
if (ictr != NULL) {
|
||||
br_ssl_engine_set_aes_ctr(cc, ictr);
|
||||
} else {
|
||||
#if BR_64
|
||||
br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
|
||||
#else
|
||||
br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#if BR_64
|
||||
br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
|
||||
|
|
@ -72,13 +61,6 @@ br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
|
|||
return;
|
||||
}
|
||||
#endif
|
||||
#if BR_POWER8
|
||||
ighash = br_ghash_pwr8_get();
|
||||
if (ighash != 0) {
|
||||
br_ssl_engine_set_ghash(cc, ighash);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if BR_LOMUL
|
||||
br_ssl_engine_set_ghash(cc, &br_ghash_ctmul32);
|
||||
#elif BR_64
|
||||
|
|
|
|||
Loading…
Reference in New Issue