mirror of
https://github.com/wiidev/usbloadergx.git
synced 2025-01-01 14:41:50 +01:00
399 lines
9.5 KiB
C
399 lines
9.5 KiB
C
|
/* Rijndael Block Cipher - rijndael.c
|
||
|
|
||
|
Written by Mike Scott 21st April 1999
|
||
|
mike@compapp.dcu.ie
|
||
|
|
||
|
Permission for free direct or derivative use is granted subject
|
||
|
to compliance with any conditions that the originators of the
|
||
|
algorithm place on its exploitation.
|
||
|
|
||
|
*/
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#define u8 unsigned char /* 8 bits */
|
||
|
#define u32 unsigned long /* 32 bits */
|
||
|
#define u64 unsigned long long
|
||
|
|
||
|
/* rotates x one bit to the left */
|
||
|
|
||
|
#define ROTL(x) (((x)>>7)|((x)<<1))
|
||
|
|
||
|
/* Rotates 32-bit word left by 1, 2 or 3 byte */
|
||
|
|
||
|
#define ROTL8(x) (((x)<<8)|((x)>>24))
|
||
|
#define ROTL16(x) (((x)<<16)|((x)>>16))
|
||
|
#define ROTL24(x) (((x)<<24)|((x)>>8))
|
||
|
|
||
|
/* Fixed Data */
|
||
|
|
||
|
static u8 InCo[4]={0xB,0xD,0x9,0xE}; /* Inverse Coefficients */
|
||
|
|
||
|
static u8 fbsub[256];
|
||
|
static u8 rbsub[256];
|
||
|
static u8 ptab[256],ltab[256];
|
||
|
static u32 ftable[256];
|
||
|
static u32 rtable[256];
|
||
|
static u32 rco[30];
|
||
|
|
||
|
/* Parameter-dependent data */
|
||
|
|
||
|
int Nk,Nb,Nr;
|
||
|
u8 fi[24],ri[24];
|
||
|
u32 fkey[120];
|
||
|
u32 rkey[120];
|
||
|
|
||
|
static u32 pack(u8 *b)
|
||
|
{ /* pack bytes into a 32-bit Word */
|
||
|
return ((u32)b[3]<<24)|((u32)b[2]<<16)|((u32)b[1]<<8)|(u32)b[0];
|
||
|
}
|
||
|
|
||
|
static void unpack(u32 a,u8 *b)
|
||
|
{ /* unpack bytes from a word */
|
||
|
b[0]=(u8)a;
|
||
|
b[1]=(u8)(a>>8);
|
||
|
b[2]=(u8)(a>>16);
|
||
|
b[3]=(u8)(a>>24);
|
||
|
}
|
||
|
|
||
|
static u8 xtime(u8 a)
|
||
|
{
|
||
|
u8 b;
|
||
|
if (a&0x80) b=0x1B;
|
||
|
else b=0;
|
||
|
a<<=1;
|
||
|
a^=b;
|
||
|
return a;
|
||
|
}
|
||
|
|
||
|
static u8 bmul(u8 x,u8 y)
|
||
|
{ /* x.y= AntiLog(Log(x) + Log(y)) */
|
||
|
if (x && y) return ptab[(ltab[x]+ltab[y])%255];
|
||
|
else return 0;
|
||
|
}
|
||
|
|
||
|
static u32 SubByte(u32 a)
|
||
|
{
|
||
|
u8 b[4];
|
||
|
unpack(a,b);
|
||
|
b[0]=fbsub[b[0]];
|
||
|
b[1]=fbsub[b[1]];
|
||
|
b[2]=fbsub[b[2]];
|
||
|
b[3]=fbsub[b[3]];
|
||
|
return pack(b);
|
||
|
}
|
||
|
|
||
|
static u8 product(u32 x,u32 y)
|
||
|
{ /* dot product of two 4-byte arrays */
|
||
|
u8 xb[4],yb[4];
|
||
|
unpack(x,xb);
|
||
|
unpack(y,yb);
|
||
|
return bmul(xb[0],yb[0])^bmul(xb[1],yb[1])^bmul(xb[2],yb[2])^bmul(xb[3],yb[3]);
|
||
|
}
|
||
|
|
||
|
static u32 InvMixCol(u32 x)
|
||
|
{ /* matrix Multiplication */
|
||
|
u32 y,m;
|
||
|
u8 b[4];
|
||
|
|
||
|
m=pack(InCo);
|
||
|
b[3]=product(m,x);
|
||
|
m=ROTL24(m);
|
||
|
b[2]=product(m,x);
|
||
|
m=ROTL24(m);
|
||
|
b[1]=product(m,x);
|
||
|
m=ROTL24(m);
|
||
|
b[0]=product(m,x);
|
||
|
y=pack(b);
|
||
|
return y;
|
||
|
}
|
||
|
|
||
|
u8 ByteSub(u8 x)
|
||
|
{
|
||
|
u8 y=ptab[255-ltab[x]]; /* multiplicative inverse */
|
||
|
x=y; x=ROTL(x);
|
||
|
y^=x; x=ROTL(x);
|
||
|
y^=x; x=ROTL(x);
|
||
|
y^=x; x=ROTL(x);
|
||
|
y^=x; y^=0x63;
|
||
|
return y;
|
||
|
}
|
||
|
|
||
|
void gentables(void)
|
||
|
{ /* generate tables */
|
||
|
int i;
|
||
|
u8 y,b[4];
|
||
|
|
||
|
/* use 3 as primitive root to generate power and log tables */
|
||
|
|
||
|
ltab[0]=0;
|
||
|
ptab[0]=1; ltab[1]=0;
|
||
|
ptab[1]=3; ltab[3]=1;
|
||
|
for (i=2;i<256;i++)
|
||
|
{
|
||
|
ptab[i]=ptab[i-1]^xtime(ptab[i-1]);
|
||
|
ltab[ptab[i]]=i;
|
||
|
}
|
||
|
|
||
|
/* affine transformation:- each bit is xored with itself shifted one bit */
|
||
|
|
||
|
fbsub[0]=0x63;
|
||
|
rbsub[0x63]=0;
|
||
|
for (i=1;i<256;i++)
|
||
|
{
|
||
|
y=ByteSub((u8)i);
|
||
|
fbsub[i]=y; rbsub[y]=i;
|
||
|
}
|
||
|
|
||
|
for (i=0,y=1;i<30;i++)
|
||
|
{
|
||
|
rco[i]=y;
|
||
|
y=xtime(y);
|
||
|
}
|
||
|
|
||
|
/* calculate forward and reverse tables */
|
||
|
for (i=0;i<256;i++)
|
||
|
{
|
||
|
y=fbsub[i];
|
||
|
b[3]=y^xtime(y); b[2]=y;
|
||
|
b[1]=y; b[0]=xtime(y);
|
||
|
ftable[i]=pack(b);
|
||
|
|
||
|
y=rbsub[i];
|
||
|
b[3]=bmul(InCo[0],y); b[2]=bmul(InCo[1],y);
|
||
|
b[1]=bmul(InCo[2],y); b[0]=bmul(InCo[3],y);
|
||
|
rtable[i]=pack(b);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void gkey(int nb,int nk,char *key)
|
||
|
{ /* blocksize=32*nb bits. Key=32*nk bits */
|
||
|
/* currently nb,bk = 4, 6 or 8 */
|
||
|
/* key comes as 4*Nk bytes */
|
||
|
/* Key Scheduler. Create expanded encryption key */
|
||
|
int i,j,k,m,N;
|
||
|
int C1,C2,C3;
|
||
|
u32 CipherKey[8];
|
||
|
|
||
|
Nb=nb; Nk=nk;
|
||
|
|
||
|
/* Nr is number of rounds */
|
||
|
if (Nb>=Nk) Nr=6+Nb;
|
||
|
else Nr=6+Nk;
|
||
|
|
||
|
C1=1;
|
||
|
if (Nb<8) { C2=2; C3=3; }
|
||
|
else { C2=3; C3=4; }
|
||
|
|
||
|
/* pre-calculate forward and reverse increments */
|
||
|
for (m=j=0;j<nb;j++,m+=3)
|
||
|
{
|
||
|
fi[m]=(j+C1)%nb;
|
||
|
fi[m+1]=(j+C2)%nb;
|
||
|
fi[m+2]=(j+C3)%nb;
|
||
|
ri[m]=(nb+j-C1)%nb;
|
||
|
ri[m+1]=(nb+j-C2)%nb;
|
||
|
ri[m+2]=(nb+j-C3)%nb;
|
||
|
}
|
||
|
|
||
|
N=Nb*(Nr+1);
|
||
|
|
||
|
for (i=j=0;i<Nk;i++,j+=4)
|
||
|
{
|
||
|
CipherKey[i]=pack((u8 *)&key[j]);
|
||
|
}
|
||
|
for (i=0;i<Nk;i++) fkey[i]=CipherKey[i];
|
||
|
for (j=Nk,k=0;j<N;j+=Nk,k++)
|
||
|
{
|
||
|
fkey[j]=fkey[j-Nk]^SubByte(ROTL24(fkey[j-1]))^rco[k];
|
||
|
if (Nk<=6)
|
||
|
{
|
||
|
for (i=1;i<Nk && (i+j)<N;i++)
|
||
|
fkey[i+j]=fkey[i+j-Nk]^fkey[i+j-1];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for (i=1;i<4 &&(i+j)<N;i++)
|
||
|
fkey[i+j]=fkey[i+j-Nk]^fkey[i+j-1];
|
||
|
if ((j+4)<N) fkey[j+4]=fkey[j+4-Nk]^SubByte(fkey[j+3]);
|
||
|
for (i=5;i<Nk && (i+j)<N;i++)
|
||
|
fkey[i+j]=fkey[i+j-Nk]^fkey[i+j-1];
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* now for the expanded decrypt key in reverse order */
|
||
|
|
||
|
for (j=0;j<Nb;j++) rkey[j+N-Nb]=fkey[j];
|
||
|
for (i=Nb;i<N-Nb;i+=Nb)
|
||
|
{
|
||
|
k=N-Nb-i;
|
||
|
for (j=0;j<Nb;j++) rkey[k+j]=InvMixCol(fkey[i+j]);
|
||
|
}
|
||
|
for (j=N-Nb;j<N;j++) rkey[j-N+Nb]=fkey[j];
|
||
|
}
|
||
|
|
||
|
|
||
|
/* There is an obvious time/space trade-off possible here. *
|
||
|
* Instead of just one ftable[], I could have 4, the other *
|
||
|
* 3 pre-rotated to save the ROTL8, ROTL16 and ROTL24 overhead */
|
||
|
|
||
|
void encrypt(char *buff)
|
||
|
{
|
||
|
int i,j,k,m;
|
||
|
u32 a[8],b[8],*x,*y,*t;
|
||
|
|
||
|
for (i=j=0;i<Nb;i++,j+=4)
|
||
|
{
|
||
|
a[i]=pack((u8 *)&buff[j]);
|
||
|
a[i]^=fkey[i];
|
||
|
}
|
||
|
k=Nb;
|
||
|
x=a; y=b;
|
||
|
|
||
|
/* State alternates between a and b */
|
||
|
for (i=1;i<Nr;i++)
|
||
|
{ /* Nr is number of rounds. May be odd. */
|
||
|
|
||
|
/* if Nb is fixed - unroll this next
|
||
|
loop and hard-code in the values of fi[] */
|
||
|
|
||
|
for (m=j=0;j<Nb;j++,m+=3)
|
||
|
{ /* deal with each 32-bit element of the State */
|
||
|
/* This is the time-critical bit */
|
||
|
y[j]=fkey[k++]^ftable[(u8)x[j]]^
|
||
|
ROTL8(ftable[(u8)(x[fi[m]]>>8)])^
|
||
|
ROTL16(ftable[(u8)(x[fi[m+1]]>>16)])^
|
||
|
ROTL24(ftable[x[fi[m+2]]>>24]);
|
||
|
}
|
||
|
t=x; x=y; y=t; /* swap pointers */
|
||
|
}
|
||
|
|
||
|
/* Last Round - unroll if possible */
|
||
|
for (m=j=0;j<Nb;j++,m+=3)
|
||
|
{
|
||
|
y[j]=fkey[k++]^(u32)fbsub[(u8)x[j]]^
|
||
|
ROTL8((u32)fbsub[(u8)(x[fi[m]]>>8)])^
|
||
|
ROTL16((u32)fbsub[(u8)(x[fi[m+1]]>>16)])^
|
||
|
ROTL24((u32)fbsub[x[fi[m+2]]>>24]);
|
||
|
}
|
||
|
for (i=j=0;i<Nb;i++,j+=4)
|
||
|
{
|
||
|
unpack(y[i],(u8 *)&buff[j]);
|
||
|
x[i]=y[i]=0; /* clean up stack */
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
void decrypt(char *buff)
|
||
|
{
|
||
|
int i,j,k,m;
|
||
|
u32 a[8],b[8],*x,*y,*t;
|
||
|
|
||
|
for (i=j=0;i<Nb;i++,j+=4)
|
||
|
{
|
||
|
a[i]=pack((u8 *)&buff[j]);
|
||
|
a[i]^=rkey[i];
|
||
|
}
|
||
|
k=Nb;
|
||
|
x=a; y=b;
|
||
|
|
||
|
/* State alternates between a and b */
|
||
|
for (i=1;i<Nr;i++)
|
||
|
{ /* Nr is number of rounds. May be odd. */
|
||
|
|
||
|
/* if Nb is fixed - unroll this next
|
||
|
loop and hard-code in the values of ri[] */
|
||
|
|
||
|
for (m=j=0;j<Nb;j++,m+=3)
|
||
|
{ /* This is the time-critical bit */
|
||
|
y[j]=rkey[k++]^rtable[(u8)x[j]]^
|
||
|
ROTL8(rtable[(u8)(x[ri[m]]>>8)])^
|
||
|
ROTL16(rtable[(u8)(x[ri[m+1]]>>16)])^
|
||
|
ROTL24(rtable[x[ri[m+2]]>>24]);
|
||
|
}
|
||
|
t=x; x=y; y=t; /* swap pointers */
|
||
|
}
|
||
|
|
||
|
/* Last Round - unroll if possible */
|
||
|
for (m=j=0;j<Nb;j++,m+=3)
|
||
|
{
|
||
|
y[j]=rkey[k++]^(u32)rbsub[(u8)x[j]]^
|
||
|
ROTL8((u32)rbsub[(u8)(x[ri[m]]>>8)])^
|
||
|
ROTL16((u32)rbsub[(u8)(x[ri[m+1]]>>16)])^
|
||
|
ROTL24((u32)rbsub[x[ri[m+2]]>>24]);
|
||
|
}
|
||
|
for (i=j=0;i<Nb;i++,j+=4)
|
||
|
{
|
||
|
unpack(y[i],(u8 *)&buff[j]);
|
||
|
x[i]=y[i]=0; /* clean up stack */
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
void aes_set_key(u8 *key) {
|
||
|
gentables();
|
||
|
gkey(4, 4,(char*) key);
|
||
|
}
|
||
|
|
||
|
// CBC mode decryption
|
||
|
void aes_decrypt(u8 *iv, u8 *inbuf, u8 *outbuf, unsigned long long len) {
|
||
|
u8 block[16];
|
||
|
unsigned int blockno = 0, i;
|
||
|
|
||
|
//printf("aes_decrypt(%p, %p, %p, %lld)\n", iv, inbuf, outbuf, len);
|
||
|
|
||
|
for (blockno = 0; blockno <= (len / sizeof(block)); blockno++) {
|
||
|
unsigned int fraction;
|
||
|
if (blockno == (len / sizeof(block))) { // last block
|
||
|
fraction = len % sizeof(block);
|
||
|
if (fraction == 0) break;
|
||
|
memset(block, 0, sizeof(block));
|
||
|
} else fraction = 16;
|
||
|
|
||
|
// debug_printf("block %d: fraction = %d\n", blockno, fraction);
|
||
|
memcpy(block, inbuf + blockno * sizeof(block), fraction);
|
||
|
decrypt((char*)block);
|
||
|
u8 *ctext_ptr;
|
||
|
if (blockno == 0) ctext_ptr = iv;
|
||
|
else ctext_ptr = inbuf + (blockno-1) * sizeof(block);
|
||
|
|
||
|
for(i=0; i < fraction; i++)
|
||
|
outbuf[blockno * sizeof(block) + i] =
|
||
|
ctext_ptr[i] ^ block[i];
|
||
|
// debug_printf("Block %d output: ", blockno);
|
||
|
// hexdump(outbuf + blockno*sizeof(block), 16);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// CBC mode encryption
|
||
|
void aes_encrypt(u8 *iv, u8 *inbuf, u8 *outbuf, unsigned long long len) {
|
||
|
u8 block[16];
|
||
|
unsigned int blockno = 0, i;
|
||
|
|
||
|
// debug_printf("aes_decrypt(%p, %p, %p, %lld)\n", iv, inbuf, outbuf, len);
|
||
|
|
||
|
for (blockno = 0; blockno <= (len / sizeof(block)); blockno++) {
|
||
|
unsigned int fraction;
|
||
|
if (blockno == (len / sizeof(block))) { // last block
|
||
|
fraction = len % sizeof(block);
|
||
|
if (fraction == 0) break;
|
||
|
memset(block, 0, sizeof(block));
|
||
|
} else fraction = 16;
|
||
|
|
||
|
// debug_printf("block %d: fraction = %d\n", blockno, fraction);
|
||
|
memcpy(block, inbuf + blockno * sizeof(block), fraction);
|
||
|
|
||
|
for(i=0; i < fraction; i++)
|
||
|
block[i] = inbuf[blockno * sizeof(block) + i] ^ iv[i];
|
||
|
|
||
|
encrypt((char*)block);
|
||
|
memcpy(iv, block, sizeof(block));
|
||
|
memcpy(outbuf + blockno * sizeof(block), block, sizeof(block));
|
||
|
// debug_printf("Block %d output: ", blockno);
|
||
|
// hexdump(outbuf + blockno*sizeof(block), 16);
|
||
|
}
|
||
|
}
|
||
|
|