mirror of
https://github.com/cemu-project/WiiU-BenchmarkTool.git
synced 2024-11-22 09:19:16 +01:00
v0.1
This commit is contained in:
parent
04a57a4851
commit
aa0d2bbde7
1
BenchmarkTool.pnproj
Normal file
1
BenchmarkTool.pnproj
Normal file
@ -0,0 +1 @@
|
||||
<Project name="BenchmarkTool"><Folder name="benchmark"><Folder name="test1"><File path="src\test1.c"></File></Folder><Folder name="test2"><File path="src\test2.c"></File><File path="src\aes.c"></File><File path="src\aes.h"></File></Folder><Folder name="test3"><File path="src\test3.c"></File></Folder><Folder name="test4"><File path="src\test4.c"></File></Folder><File path="src\benchmarkTests.h"></File><File path="src\drawing.c"></File><File path="src\entry.c"></File><File path="src\main.c"></File><File path="src\main.h"></File></Folder><File path="src\link.ld"></File></Project>
|
1
BenchmarkTool.pnps
Normal file
1
BenchmarkTool.pnps
Normal file
@ -0,0 +1 @@
|
||||
<pd><ViewState><e p="BenchmarkTool" x="true"></e><e p="BenchmarkTool\benchmark" x="true"></e><e p="TemplateWiiU\benchmark\test1" x="true"></e><e p="BenchmarkTool\benchmark\test3" x="true"></e><e p="BenchmarkTool\benchmark\test2" x="true"></e><e p="BenchmarkTool\benchmark\test4" x="true"></e></ViewState></pd>
|
211
Makefile
Normal file
211
Makefile
Normal file
@ -0,0 +1,211 @@
|
||||
#---------------------------------------------------------------------------------
|
||||
# Clear the implicit built in rules
|
||||
#---------------------------------------------------------------------------------
|
||||
.SUFFIXES:
|
||||
#---------------------------------------------------------------------------------
|
||||
ifeq ($(strip $(DEVKITPPC)),)
|
||||
$(error "Please set DEVKITPPC in your environment. export DEVKITPPC=<path to>devkitPPC")
|
||||
endif
|
||||
ifeq ($(strip $(DEVKITPRO)),)
|
||||
$(error "Please set DEVKITPRO in your environment. export DEVKITPRO=<path to>devkitPRO")
|
||||
endif
|
||||
export PATH := $(DEVKITPPC)/bin:$(PORTLIBS)/bin:$(PATH)
|
||||
export LIBOGC_INC := $(DEVKITPRO)/libogc/include
|
||||
export LIBOGC_LIB := $(DEVKITPRO)/libogc/lib/wii
|
||||
export PORTLIBS := $(DEVKITPRO)/portlibs/ppc
|
||||
|
||||
PREFIX := powerpc-eabi-
|
||||
|
||||
export AS := $(PREFIX)as
|
||||
export CC := $(PREFIX)gcc
|
||||
export CXX := $(PREFIX)g++
|
||||
export AR := $(PREFIX)ar
|
||||
export OBJCOPY := $(PREFIX)objcopy
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# TARGET is the name of the output
|
||||
# BUILD is the directory where object files & intermediate files will be placed
|
||||
# SOURCES is a list of directories containing source code
|
||||
# INCLUDES is a list of directories containing extra header files
|
||||
#---------------------------------------------------------------------------------
|
||||
TARGET := benchmark
|
||||
BUILD := build
|
||||
BUILD_DBG := $(TARGET)_dbg
|
||||
SOURCES := src
|
||||
DATA :=
|
||||
|
||||
INCLUDES := src
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# options for code generation
|
||||
#---------------------------------------------------------------------------------
|
||||
CFLAGS := -std=gnu11 -mrvl -mcpu=750 -meabi -mhard-float -ffast-math \
|
||||
-O3 -Wall -Wextra -Wno-unused-parameter -Wno-strict-aliasing $(INCLUDE)
|
||||
CXXFLAGS := -std=gnu++11 -mrvl -mcpu=750 -meabi -mhard-float -ffast-math \
|
||||
-O3 -Wall -Wextra -Wno-unused-parameter -Wno-strict-aliasing $(INCLUDE)
|
||||
ASFLAGS := -mregnames
|
||||
LDFLAGS := -nostartfiles -Wl,-Map,$(notdir $@).map,-wrap,malloc,-wrap,free,-wrap,memalign,-wrap,calloc,-wrap,realloc,-wrap,malloc_usable_size,-wrap,_malloc_r,-wrap,_free_r,-wrap,_realloc_r,-wrap,_calloc_r,-wrap,_memalign_r,-wrap,_malloc_usable_size_r,-wrap,valloc,-wrap,_valloc_r,-wrap,_pvalloc_r,--gc-sections
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
Q := @
|
||||
MAKEFLAGS += --no-print-directory
|
||||
#---------------------------------------------------------------------------------
|
||||
# any extra libraries we wish to link with the project
|
||||
#---------------------------------------------------------------------------------
|
||||
LIBS :=
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# list of directories containing libraries, this must be the top level containing
|
||||
# include and lib
|
||||
#---------------------------------------------------------------------------------
|
||||
LIBDIRS := $(CURDIR) \
|
||||
$(DEVKITPPC)/lib \
|
||||
$(DEVKITPPC)/lib/gcc/powerpc-eabi/4.8.2
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# no real need to edit anything past this point unless you need to add additional
|
||||
# rules for different file extensions
|
||||
#---------------------------------------------------------------------------------
|
||||
ifneq ($(BUILD),$(notdir $(CURDIR)))
|
||||
#---------------------------------------------------------------------------------
|
||||
export PROJECTDIR := $(CURDIR)
|
||||
export OUTPUT := $(CURDIR)/$(TARGETDIR)/$(TARGET)
|
||||
export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
|
||||
$(foreach dir,$(DATA),$(CURDIR)/$(dir))
|
||||
export DEPSDIR := $(CURDIR)/$(BUILD)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# automatically build a list of object files for our project
|
||||
#---------------------------------------------------------------------------------
|
||||
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
|
||||
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
|
||||
sFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
|
||||
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.S)))
|
||||
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
|
||||
TTFFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.ttf)))
|
||||
PNGFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.png)))
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# use CXX for linking C++ projects, CC for standard C
|
||||
#---------------------------------------------------------------------------------
|
||||
ifeq ($(strip $(CPPFILES)),)
|
||||
export LD := $(CC)
|
||||
else
|
||||
export LD := $(CXX)
|
||||
endif
|
||||
|
||||
export OFILES := $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) \
|
||||
$(sFILES:.s=.o) $(SFILES:.S=.o) \
|
||||
$(PNGFILES:.png=.png.o) $(addsuffix .o,$(BINFILES))
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# build a list of include paths
|
||||
#---------------------------------------------------------------------------------
|
||||
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
|
||||
$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
|
||||
-I$(CURDIR)/$(BUILD) -I$(LIBOGC_INC) \
|
||||
-I$(PORTLIBS)/include -I$(PORTLIBS)/include/freetype2
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# build a list of library paths
|
||||
#---------------------------------------------------------------------------------
|
||||
export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) \
|
||||
-L$(LIBOGC_LIB) -L$(PORTLIBS)/lib
|
||||
|
||||
export OUTPUT := $(CURDIR)/$(TARGET)
|
||||
.PHONY: $(BUILD) clean install
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
$(BUILD):
|
||||
@[ -d $@ ] || mkdir -p $@
|
||||
@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
clean:
|
||||
@echo clean ...
|
||||
@rm -fr $(BUILD) $(OUTPUT).elf $(OUTPUT).bin $(BUILD_DBG).elf
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
else
|
||||
|
||||
DEPENDS := $(OFILES:.o=.d)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# main targets
|
||||
#---------------------------------------------------------------------------------
|
||||
$(OUTPUT).elf: $(OFILES)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# This rule links in binary data with the .jpg extension
|
||||
#---------------------------------------------------------------------------------
|
||||
%.elf: link.ld $(OFILES)
|
||||
@echo "linking ... $(TARGET).elf"
|
||||
$(Q)$(LD) -n -T $^ $(LDFLAGS) -o ../$(BUILD_DBG).elf $(LIBPATHS) $(LIBS)
|
||||
$(Q)$(OBJCOPY) -S -R .comment -R .gnu.attributes ../$(BUILD_DBG).elf $@
|
||||
|
||||
../data/loader.bin:
|
||||
$(MAKE) -C ../loader clean
|
||||
$(MAKE) -C ../loader
|
||||
#---------------------------------------------------------------------------------
|
||||
%.a:
|
||||
#---------------------------------------------------------------------------------
|
||||
@echo $(notdir $@)
|
||||
@rm -f $@
|
||||
@$(AR) -rc $@ $^
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.o: %.cpp
|
||||
@echo $(notdir $<)
|
||||
@$(CXX) -MMD -MP -MF $(DEPSDIR)/$*.d $(CXXFLAGS) -c $< -o $@ $(ERROR_FILTER)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.o: %.c
|
||||
@echo $(notdir $<)
|
||||
@$(CC) -MMD -MP -MF $(DEPSDIR)/$*.d $(CFLAGS) -c $< -o $@ $(ERROR_FILTER)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.o: %.S
|
||||
@echo $(notdir $<)
|
||||
@$(CC) -MMD -MP -MF $(DEPSDIR)/$*.d -x assembler-with-cpp $(ASFLAGS) -c $< -o $@ $(ERROR_FILTER)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.png.o : %.png
|
||||
@echo $(notdir $<)
|
||||
@bin2s -a 32 $< | $(AS) -o $(@)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.jpg.o : %.jpg
|
||||
@echo $(notdir $<)
|
||||
@bin2s -a 32 $< | $(AS) -o $(@)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.ttf.o : %.ttf
|
||||
@echo $(notdir $<)
|
||||
@bin2s -a 32 $< | $(AS) -o $(@)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.bin.o : %.bin
|
||||
@echo $(notdir $<)
|
||||
@bin2s -a 32 $< | $(AS) -o $(@)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.wav.o : %.wav
|
||||
@echo $(notdir $<)
|
||||
@bin2s -a 32 $< | $(AS) -o $(@)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.mp3.o : %.mp3
|
||||
@echo $(notdir $<)
|
||||
@bin2s -a 32 $< | $(AS) -o $(@)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
%.ogg.o : %.ogg
|
||||
@echo $(notdir $<)
|
||||
@bin2s -a 32 $< | $(AS) -o $(@)
|
||||
|
||||
-include $(DEPENDS)
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
endif
|
||||
#---------------------------------------------------------------------------------
|
570
src/aes.c
Normal file
570
src/aes.c
Normal file
@ -0,0 +1,570 @@
|
||||
/*
|
||||
|
||||
This is an implementation of the AES algorithm, specifically ECB, CTR and CBC mode.
|
||||
Block size can be chosen in aes.h - available choices are AES128, AES192, AES256.
|
||||
|
||||
The implementation is verified against the test vectors in:
|
||||
National Institute of Standards and Technology Special Publication 800-38A 2001 ED
|
||||
|
||||
ECB-AES128
|
||||
----------
|
||||
|
||||
plain-text:
|
||||
6bc1bee22e409f96e93d7e117393172a
|
||||
ae2d8a571e03ac9c9eb76fac45af8e51
|
||||
30c81c46a35ce411e5fbc1191a0a52ef
|
||||
f69f2445df4f9b17ad2b417be66c3710
|
||||
|
||||
key:
|
||||
2b7e151628aed2a6abf7158809cf4f3c
|
||||
|
||||
resulting cipher
|
||||
3ad77bb40d7a3660a89ecaf32466ef97
|
||||
f5d3d58503b9699de785895a96fdbaaf
|
||||
43b1cd7f598ece23881b00e3ed030688
|
||||
7b0c785e27e8ad3f8223207104725dd4
|
||||
|
||||
|
||||
NOTE: String length must be evenly divisible by 16byte (str_len % 16 == 0)
|
||||
You should pad the end of the string with zeros if this is not the case.
|
||||
For AES192/256 the key size is proportionally larger.
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Includes: */
|
||||
/*****************************************************************************/
|
||||
#include <stdint.h>
|
||||
#include <string.h> // CBC mode, for memset
|
||||
#include "aes.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Defines: */
|
||||
/*****************************************************************************/
|
||||
// The number of columns comprising a state in AES. This is a constant in AES. Value=4
|
||||
#define Nb 4
|
||||
|
||||
#if defined(AES256) && (AES256 == 1)
|
||||
#define Nk 8
|
||||
#define Nr 14
|
||||
#elif defined(AES192) && (AES192 == 1)
|
||||
#define Nk 6
|
||||
#define Nr 12
|
||||
#else
|
||||
#define Nk 4 // The number of 32 bit words in a key.
|
||||
#define Nr 10 // The number of rounds in AES Cipher.
|
||||
#endif
|
||||
|
||||
// jcallan@github points out that declaring Multiply as a function
|
||||
// reduces code size considerably with the Keil ARM compiler.
|
||||
// See this link for more information: https://github.com/kokke/tiny-AES-C/pull/3
|
||||
#ifndef MULTIPLY_AS_A_FUNCTION
|
||||
#define MULTIPLY_AS_A_FUNCTION 0
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Private variables: */
|
||||
/*****************************************************************************/
|
||||
// state - array holding the intermediate results during decryption.
|
||||
typedef uint8_t state_t[4][4];
|
||||
|
||||
|
||||
|
||||
// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
|
||||
// The numbers below can be computed dynamically trading ROM for RAM -
|
||||
// This can be useful in (embedded) bootloader applications, where ROM is often limited.
|
||||
static const uint8_t sbox[256] = {
|
||||
//0 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
|
||||
|
||||
static const uint8_t rsbox[256] = {
|
||||
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };
|
||||
|
||||
// The round constant word array, Rcon[i], contains the values given by
|
||||
// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8)
|
||||
static const uint8_t Rcon[11] = {
|
||||
0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 };
|
||||
|
||||
/*
|
||||
* Jordan Goulder points out in PR #12 (https://github.com/kokke/tiny-AES-C/pull/12),
|
||||
* that you can remove most of the elements in the Rcon array, because they are unused.
|
||||
*
|
||||
* From Wikipedia's article on the Rijndael key schedule @ https://en.wikipedia.org/wiki/Rijndael_key_schedule#Rcon
|
||||
*
|
||||
* "Only the first some of these constants are actually used – up to rcon[10] for AES-128 (as 11 round keys are needed),
|
||||
* up to rcon[8] for AES-192, up to rcon[7] for AES-256. rcon[0] is not used in AES algorithm."
|
||||
*/
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Private functions: */
|
||||
/*****************************************************************************/
|
||||
/*
|
||||
static uint8_t getSBoxValue(uint8_t num)
|
||||
{
|
||||
return sbox[num];
|
||||
}
|
||||
*/
|
||||
#define getSBoxValue(num) (sbox[(num)])
|
||||
/*
|
||||
static uint8_t getSBoxInvert(uint8_t num)
|
||||
{
|
||||
return rsbox[num];
|
||||
}
|
||||
*/
|
||||
#define getSBoxInvert(num) (rsbox[(num)])
|
||||
|
||||
// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states.
|
||||
static void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key)
|
||||
{
|
||||
unsigned i, j, k;
|
||||
uint8_t tempa[4]; // Used for the column/row operations
|
||||
|
||||
// The first round key is the key itself.
|
||||
for (i = 0; i < Nk; ++i)
|
||||
{
|
||||
RoundKey[(i * 4) + 0] = Key[(i * 4) + 0];
|
||||
RoundKey[(i * 4) + 1] = Key[(i * 4) + 1];
|
||||
RoundKey[(i * 4) + 2] = Key[(i * 4) + 2];
|
||||
RoundKey[(i * 4) + 3] = Key[(i * 4) + 3];
|
||||
}
|
||||
|
||||
// All other round keys are found from the previous round keys.
|
||||
for (i = Nk; i < Nb * (Nr + 1); ++i)
|
||||
{
|
||||
{
|
||||
k = (i - 1) * 4;
|
||||
tempa[0]=RoundKey[k + 0];
|
||||
tempa[1]=RoundKey[k + 1];
|
||||
tempa[2]=RoundKey[k + 2];
|
||||
tempa[3]=RoundKey[k + 3];
|
||||
|
||||
}
|
||||
|
||||
if (i % Nk == 0)
|
||||
{
|
||||
// This function shifts the 4 bytes in a word to the left once.
|
||||
// [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
|
||||
|
||||
// Function RotWord()
|
||||
{
|
||||
k = tempa[0];
|
||||
tempa[0] = tempa[1];
|
||||
tempa[1] = tempa[2];
|
||||
tempa[2] = tempa[3];
|
||||
tempa[3] = k;
|
||||
}
|
||||
|
||||
// SubWord() is a function that takes a four-byte input word and
|
||||
// applies the S-box to each of the four bytes to produce an output word.
|
||||
|
||||
// Function Subword()
|
||||
{
|
||||
tempa[0] = getSBoxValue(tempa[0]);
|
||||
tempa[1] = getSBoxValue(tempa[1]);
|
||||
tempa[2] = getSBoxValue(tempa[2]);
|
||||
tempa[3] = getSBoxValue(tempa[3]);
|
||||
}
|
||||
|
||||
tempa[0] = tempa[0] ^ Rcon[i/Nk];
|
||||
}
|
||||
#if defined(AES256) && (AES256 == 1)
|
||||
if (i % Nk == 4)
|
||||
{
|
||||
// Function Subword()
|
||||
{
|
||||
tempa[0] = getSBoxValue(tempa[0]);
|
||||
tempa[1] = getSBoxValue(tempa[1]);
|
||||
tempa[2] = getSBoxValue(tempa[2]);
|
||||
tempa[3] = getSBoxValue(tempa[3]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
j = i * 4; k=(i - Nk) * 4;
|
||||
RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0];
|
||||
RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1];
|
||||
RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2];
|
||||
RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3];
|
||||
}
|
||||
}
|
||||
|
||||
void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key)
|
||||
{
|
||||
KeyExpansion(ctx->RoundKey, key);
|
||||
}
|
||||
#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
|
||||
void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv)
|
||||
{
|
||||
KeyExpansion(ctx->RoundKey, key);
|
||||
memcpy (ctx->Iv, iv, AES_BLOCKLEN);
|
||||
}
|
||||
void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv)
|
||||
{
|
||||
memcpy (ctx->Iv, iv, AES_BLOCKLEN);
|
||||
}
|
||||
#endif
|
||||
|
||||
// This function adds the round key to state.
|
||||
// The round key is added to the state by an XOR function.
|
||||
static void AddRoundKey(uint8_t round,state_t* state,uint8_t* RoundKey)
|
||||
{
|
||||
uint8_t i,j;
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
(*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The SubBytes Function Substitutes the values in the
|
||||
// state matrix with values in an S-box.
|
||||
static void SubBytes(state_t* state)
|
||||
{
|
||||
uint8_t i, j;
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
(*state)[j][i] = getSBoxValue((*state)[j][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The ShiftRows() function shifts the rows in the state to the left.
|
||||
// Each row is shifted with different offset.
|
||||
// Offset = Row number. So the first row is not shifted.
|
||||
static void ShiftRows(state_t* state)
|
||||
{
|
||||
uint8_t temp;
|
||||
|
||||
// Rotate first row 1 columns to left
|
||||
temp = (*state)[0][1];
|
||||
(*state)[0][1] = (*state)[1][1];
|
||||
(*state)[1][1] = (*state)[2][1];
|
||||
(*state)[2][1] = (*state)[3][1];
|
||||
(*state)[3][1] = temp;
|
||||
|
||||
// Rotate second row 2 columns to left
|
||||
temp = (*state)[0][2];
|
||||
(*state)[0][2] = (*state)[2][2];
|
||||
(*state)[2][2] = temp;
|
||||
|
||||
temp = (*state)[1][2];
|
||||
(*state)[1][2] = (*state)[3][2];
|
||||
(*state)[3][2] = temp;
|
||||
|
||||
// Rotate third row 3 columns to left
|
||||
temp = (*state)[0][3];
|
||||
(*state)[0][3] = (*state)[3][3];
|
||||
(*state)[3][3] = (*state)[2][3];
|
||||
(*state)[2][3] = (*state)[1][3];
|
||||
(*state)[1][3] = temp;
|
||||
}
|
||||
|
||||
static uint8_t xtime(uint8_t x)
|
||||
{
|
||||
return ((x<<1) ^ (((x>>7) & 1) * 0x1b));
|
||||
}
|
||||
|
||||
// MixColumns function mixes the columns of the state matrix
|
||||
static void MixColumns(state_t* state)
|
||||
{
|
||||
uint8_t i;
|
||||
uint8_t Tmp, Tm, t;
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
t = (*state)[i][0];
|
||||
Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3] ;
|
||||
Tm = (*state)[i][0] ^ (*state)[i][1] ; Tm = xtime(Tm); (*state)[i][0] ^= Tm ^ Tmp ;
|
||||
Tm = (*state)[i][1] ^ (*state)[i][2] ; Tm = xtime(Tm); (*state)[i][1] ^= Tm ^ Tmp ;
|
||||
Tm = (*state)[i][2] ^ (*state)[i][3] ; Tm = xtime(Tm); (*state)[i][2] ^= Tm ^ Tmp ;
|
||||
Tm = (*state)[i][3] ^ t ; Tm = xtime(Tm); (*state)[i][3] ^= Tm ^ Tmp ;
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply is used to multiply numbers in the field GF(2^8)
|
||||
// Note: The last call to xtime() is unneeded, but often ends up generating a smaller binary
|
||||
// The compiler seems to be able to vectorize the operation better this way.
|
||||
// See https://github.com/kokke/tiny-AES-c/pull/34
|
||||
#if MULTIPLY_AS_A_FUNCTION
|
||||
static uint8_t Multiply(uint8_t x, uint8_t y)
|
||||
{
|
||||
return (((y & 1) * x) ^
|
||||
((y>>1 & 1) * xtime(x)) ^
|
||||
((y>>2 & 1) * xtime(xtime(x))) ^
|
||||
((y>>3 & 1) * xtime(xtime(xtime(x)))) ^
|
||||
((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */
|
||||
}
|
||||
#else
|
||||
#define Multiply(x, y) \
|
||||
( ((y & 1) * x) ^ \
|
||||
((y>>1 & 1) * xtime(x)) ^ \
|
||||
((y>>2 & 1) * xtime(xtime(x))) ^ \
|
||||
((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ \
|
||||
((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))) \
|
||||
|
||||
#endif
|
||||
|
||||
// MixColumns function mixes the columns of the state matrix.
|
||||
// The method used to multiply may be difficult to understand for the inexperienced.
|
||||
// Please use the references to gain more information.
|
||||
static void InvMixColumns(state_t* state)
|
||||
{
|
||||
int i;
|
||||
uint8_t a, b, c, d;
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
a = (*state)[i][0];
|
||||
b = (*state)[i][1];
|
||||
c = (*state)[i][2];
|
||||
d = (*state)[i][3];
|
||||
|
||||
(*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09);
|
||||
(*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d);
|
||||
(*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b);
|
||||
(*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// The SubBytes Function Substitutes the values in the
|
||||
// state matrix with values in an S-box.
|
||||
static void InvSubBytes(state_t* state)
|
||||
{
|
||||
uint8_t i, j;
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
for (j = 0; j < 4; ++j)
|
||||
{
|
||||
(*state)[j][i] = getSBoxInvert((*state)[j][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void InvShiftRows(state_t* state)
|
||||
{
|
||||
uint8_t temp;
|
||||
|
||||
// Rotate first row 1 columns to right
|
||||
temp = (*state)[3][1];
|
||||
(*state)[3][1] = (*state)[2][1];
|
||||
(*state)[2][1] = (*state)[1][1];
|
||||
(*state)[1][1] = (*state)[0][1];
|
||||
(*state)[0][1] = temp;
|
||||
|
||||
// Rotate second row 2 columns to right
|
||||
temp = (*state)[0][2];
|
||||
(*state)[0][2] = (*state)[2][2];
|
||||
(*state)[2][2] = temp;
|
||||
|
||||
temp = (*state)[1][2];
|
||||
(*state)[1][2] = (*state)[3][2];
|
||||
(*state)[3][2] = temp;
|
||||
|
||||
// Rotate third row 3 columns to right
|
||||
temp = (*state)[0][3];
|
||||
(*state)[0][3] = (*state)[1][3];
|
||||
(*state)[1][3] = (*state)[2][3];
|
||||
(*state)[2][3] = (*state)[3][3];
|
||||
(*state)[3][3] = temp;
|
||||
}
|
||||
|
||||
|
||||
// Cipher is the main function that encrypts the PlainText.
|
||||
static void Cipher(state_t* state, uint8_t* RoundKey)
|
||||
{
|
||||
uint8_t round = 0;
|
||||
|
||||
// Add the First round key to the state before starting the rounds.
|
||||
AddRoundKey(0, state, RoundKey);
|
||||
|
||||
// There will be Nr rounds.
|
||||
// The first Nr-1 rounds are identical.
|
||||
// These Nr-1 rounds are executed in the loop below.
|
||||
for (round = 1; round < Nr; ++round)
|
||||
{
|
||||
SubBytes(state);
|
||||
ShiftRows(state);
|
||||
MixColumns(state);
|
||||
AddRoundKey(round, state, RoundKey);
|
||||
}
|
||||
|
||||
// The last round is given below.
|
||||
// The MixColumns function is not here in the last round.
|
||||
SubBytes(state);
|
||||
ShiftRows(state);
|
||||
AddRoundKey(Nr, state, RoundKey);
|
||||
}
|
||||
|
||||
static void InvCipher(state_t* state,uint8_t* RoundKey)
|
||||
{
|
||||
uint8_t round = 0;
|
||||
|
||||
// Add the First round key to the state before starting the rounds.
|
||||
AddRoundKey(Nr, state, RoundKey);
|
||||
|
||||
// There will be Nr rounds.
|
||||
// The first Nr-1 rounds are identical.
|
||||
// These Nr-1 rounds are executed in the loop below.
|
||||
for (round = (Nr - 1); round > 0; --round)
|
||||
{
|
||||
InvShiftRows(state);
|
||||
InvSubBytes(state);
|
||||
AddRoundKey(round, state, RoundKey);
|
||||
InvMixColumns(state);
|
||||
}
|
||||
|
||||
// The last round is given below.
|
||||
// The MixColumns function is not here in the last round.
|
||||
InvShiftRows(state);
|
||||
InvSubBytes(state);
|
||||
AddRoundKey(0, state, RoundKey);
|
||||
}
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Public functions: */
|
||||
/*****************************************************************************/
|
||||
#if defined(ECB) && (ECB == 1)
|
||||
|
||||
|
||||
void AES_ECB_encrypt(struct AES_ctx *ctx, uint8_t* buf)
|
||||
{
|
||||
// The next function call encrypts the PlainText with the Key using AES algorithm.
|
||||
Cipher((state_t*)buf, ctx->RoundKey);
|
||||
}
|
||||
|
||||
void AES_ECB_decrypt(struct AES_ctx* ctx, uint8_t* buf)
|
||||
{
|
||||
// The next function call decrypts the PlainText with the Key using AES algorithm.
|
||||
InvCipher((state_t*)buf, ctx->RoundKey);
|
||||
}
|
||||
|
||||
|
||||
#endif // #if defined(ECB) && (ECB == 1)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if defined(CBC) && (CBC == 1)
|
||||
|
||||
|
||||
static void XorWithIv(uint8_t* buf, uint8_t* Iv)
|
||||
{
|
||||
uint8_t i;
|
||||
for (i = 0; i < AES_BLOCKLEN; ++i) // The block in AES is always 128bit no matter the key size
|
||||
{
|
||||
buf[i] ^= Iv[i];
|
||||
}
|
||||
}
|
||||
|
||||
void AES_CBC_encrypt_buffer(struct AES_ctx *ctx,uint8_t* buf, uint32_t length)
|
||||
{
|
||||
uintptr_t i;
|
||||
uint8_t *Iv = ctx->Iv;
|
||||
for (i = 0; i < length; i += AES_BLOCKLEN)
|
||||
{
|
||||
XorWithIv(buf, Iv);
|
||||
Cipher((state_t*)buf, ctx->RoundKey);
|
||||
Iv = buf;
|
||||
buf += AES_BLOCKLEN;
|
||||
//printf("Step %d - %d", i/16, i);
|
||||
}
|
||||
/* store Iv in ctx for next call */
|
||||
memcpy(ctx->Iv, Iv, AES_BLOCKLEN);
|
||||
}
|
||||
|
||||
void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length)
|
||||
{
|
||||
uintptr_t i;
|
||||
uint8_t storeNextIv[AES_BLOCKLEN];
|
||||
for (i = 0; i < length; i += AES_BLOCKLEN)
|
||||
{
|
||||
memcpy(storeNextIv, buf, AES_BLOCKLEN);
|
||||
InvCipher((state_t*)buf, ctx->RoundKey);
|
||||
XorWithIv(buf, ctx->Iv);
|
||||
memcpy(ctx->Iv, storeNextIv, AES_BLOCKLEN);
|
||||
buf += AES_BLOCKLEN;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // #if defined(CBC) && (CBC == 1)
|
||||
|
||||
|
||||
|
||||
#if defined(CTR) && (CTR == 1)
|
||||
|
||||
/* Symmetrical operation: same function for encrypting as for decrypting. Note any IV/nonce should never be reused with the same key */
|
||||
void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length)
|
||||
{
|
||||
uint8_t buffer[AES_BLOCKLEN];
|
||||
|
||||
unsigned i;
|
||||
int bi;
|
||||
for (i = 0, bi = AES_BLOCKLEN; i < length; ++i, ++bi)
|
||||
{
|
||||
if (bi == AES_BLOCKLEN) /* we need to regen xor compliment in buffer */
|
||||
{
|
||||
|
||||
memcpy(buffer, ctx->Iv, AES_BLOCKLEN);
|
||||
Cipher((state_t*)buffer,ctx->RoundKey);
|
||||
|
||||
/* Increment Iv and handle overflow */
|
||||
for (bi = (AES_BLOCKLEN - 1); bi >= 0; --bi)
|
||||
{
|
||||
/* inc will owerflow */
|
||||
if (ctx->Iv[bi] == 255)
|
||||
{
|
||||
ctx->Iv[bi] = 0;
|
||||
continue;
|
||||
}
|
||||
ctx->Iv[bi] += 1;
|
||||
break;
|
||||
}
|
||||
bi = 0;
|
||||
}
|
||||
|
||||
buf[i] = (buf[i] ^ buffer[bi]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // #if defined(CTR) && (CTR == 1)
|
||||
|
90
src/aes.h
Normal file
90
src/aes.h
Normal file
@ -0,0 +1,90 @@
|
||||
#ifndef _AES_H_
|
||||
#define _AES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
// #define the macros below to 1/0 to enable/disable the mode of operation.
|
||||
//
|
||||
// CBC enables AES encryption in CBC-mode of operation.
|
||||
// CTR enables encryption in counter-mode.
|
||||
// ECB enables the basic ECB 16-byte block algorithm. All can be enabled simultaneously.
|
||||
|
||||
// The #ifndef-guard allows it to be configured before #include'ing or at compile time.
|
||||
#ifndef CBC
|
||||
#define CBC 1
|
||||
#endif
|
||||
|
||||
#ifndef ECB
|
||||
#define ECB 1
|
||||
#endif
|
||||
|
||||
#ifndef CTR
|
||||
#define CTR 1
|
||||
#endif
|
||||
|
||||
|
||||
#define AES128 1
|
||||
//#define AES192 1
|
||||
//#define AES256 1
|
||||
|
||||
#define AES_BLOCKLEN 16 //Block length in bytes AES is 128b block only
|
||||
|
||||
#if defined(AES256) && (AES256 == 1)
|
||||
#define AES_KEYLEN 32
|
||||
#define AES_keyExpSize 240
|
||||
#elif defined(AES192) && (AES192 == 1)
|
||||
#define AES_KEYLEN 24
|
||||
#define AES_keyExpSize 208
|
||||
#else
|
||||
#define AES_KEYLEN 16 // Key length in bytes
|
||||
#define AES_keyExpSize 176
|
||||
#endif
|
||||
|
||||
struct AES_ctx
|
||||
{
|
||||
uint8_t RoundKey[AES_keyExpSize];
|
||||
#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
|
||||
uint8_t Iv[AES_BLOCKLEN];
|
||||
#endif
|
||||
};
|
||||
|
||||
void AES_init_ctx(struct AES_ctx* ctx, const uint8_t* key);
|
||||
#if (defined(CBC) && (CBC == 1)) || (defined(CTR) && (CTR == 1))
|
||||
void AES_init_ctx_iv(struct AES_ctx* ctx, const uint8_t* key, const uint8_t* iv);
|
||||
void AES_ctx_set_iv(struct AES_ctx* ctx, const uint8_t* iv);
|
||||
#endif
|
||||
|
||||
#if defined(ECB) && (ECB == 1)
|
||||
// buffer size is exactly AES_BLOCKLEN bytes;
|
||||
// you need only AES_init_ctx as IV is not used in ECB
|
||||
// NB: ECB is considered insecure for most uses
|
||||
void AES_ECB_encrypt(struct AES_ctx* ctx, uint8_t* buf);
|
||||
void AES_ECB_decrypt(struct AES_ctx* ctx, uint8_t* buf);
|
||||
|
||||
#endif // #if defined(ECB) && (ECB == !)
|
||||
|
||||
|
||||
#if defined(CBC) && (CBC == 1)
|
||||
// buffer size MUST be mutile of AES_BLOCKLEN;
|
||||
// Suggest https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme
|
||||
// NOTES: you need to set IV in ctx via AES_init_ctx_iv() or AES_ctx_set_iv()
|
||||
// no IV should ever be reused with the same key
|
||||
void AES_CBC_encrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length);
|
||||
void AES_CBC_decrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length);
|
||||
|
||||
#endif // #if defined(CBC) && (CBC == 1)
|
||||
|
||||
|
||||
#if defined(CTR) && (CTR == 1)
|
||||
|
||||
// Same function for encrypting as for decrypting.
|
||||
// IV is incremented for every block, and used after encryption as XOR-compliment for output
|
||||
// Suggesting https://en.wikipedia.org/wiki/Padding_(cryptography)#PKCS7 for padding scheme
|
||||
// NOTES: you need to set IV in ctx with AES_init_ctx_iv() or AES_ctx_set_iv()
|
||||
// no IV should ever be reused with the same key
|
||||
void AES_CTR_xcrypt_buffer(struct AES_ctx* ctx, uint8_t* buf, uint32_t length);
|
||||
|
||||
#endif // #if defined(CTR) && (CTR == 1)
|
||||
|
||||
|
||||
#endif //_AES_H_
|
4
src/benchmarkTests.h
Normal file
4
src/benchmarkTests.h
Normal file
@ -0,0 +1,4 @@
|
||||
int test1_run();
|
||||
int test2_run();
|
||||
int test3_run();
|
||||
int test4_run();
|
1694
src/drawing.c
Normal file
1694
src/drawing.c
Normal file
File diff suppressed because it is too large
Load Diff
71
src/entry.c
Normal file
71
src/entry.c
Normal file
@ -0,0 +1,71 @@
|
||||
#include <string.h>
|
||||
#include "main.h"
|
||||
|
||||
int (*OSDynLoad_Acquire)(const char* rpl, u32 *handle);
|
||||
int (*OSDynLoad_FindExport)(u32 handle, int isdata, const char *symbol, void *address);
|
||||
|
||||
// coreinit OSScreen
|
||||
void (*OSScreenInit)(void);
|
||||
unsigned int (*OSScreenGetBufferSizeEx)(unsigned int bufferNum);
|
||||
int (*OSScreenSetBufferEx)(unsigned int bufferNum, void * addr);
|
||||
int (*OSScreenClearBufferEx)(unsigned int bufferNum, unsigned int temp);
|
||||
int (*OSScreenFlipBuffersEx)(unsigned int bufferNum);
|
||||
int (*OSScreenPutFontEx)(unsigned int bufferNum, unsigned int posX, unsigned int posY, const char * buffer);
|
||||
void (*OSScreenPutPixelEx)(int bufferNum, uint32 posX, uint32 posY, uint32 color);
|
||||
int (*OSScreenEnableEx)(unsigned int bufferNum, int enable);
|
||||
// coreinit memory
|
||||
void* (*MEMAllocFromDefaultHeapEx)(uint32_t size, int alignment);
|
||||
void (*MEMFreeToDefaultHeap)(void* addr);
|
||||
|
||||
// coreinit time
|
||||
u64 (*OSGetTime)(void);
|
||||
void (*OSSleepTicks)(u64 ticks);
|
||||
|
||||
// misc
|
||||
int (*_snprintf)(char *buf, int n, const char *format, ... );
|
||||
|
||||
int __entry_menu(int argc, char **argv)
|
||||
{
|
||||
// function offsets setup by Homebrew Launcher:
|
||||
// 0x00801500 -> OSDynLoad_Acquire
|
||||
// 0x00801504 -> OSDynLoad_FindExport
|
||||
|
||||
OSDynLoad_Acquire = (int(*)(const char*, u32*))*(u32*)0x00801500;
|
||||
OSDynLoad_FindExport = (int (*)(u32, int, const char *, void*))*(u32*)0x00801504;
|
||||
u32 coreinitHandle;
|
||||
OSDynLoad_Acquire("coreinit", &coreinitHandle);
|
||||
// coreinit OSScreen
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSScreenInit", (void*)&OSScreenInit);
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSScreenGetBufferSizeEx", (void*)&OSScreenGetBufferSizeEx);
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSScreenSetBufferEx", (void*)&OSScreenSetBufferEx);
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSScreenClearBufferEx", (void*)&OSScreenClearBufferEx);
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSScreenFlipBuffersEx", (void*)&OSScreenFlipBuffersEx);
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSScreenPutFontEx", (void*)&OSScreenPutFontEx);
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSScreenPutPixelEx", (void*)&OSScreenPutPixelEx);
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSScreenEnableEx", (void*)&OSScreenEnableEx);
|
||||
// coreinit time
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSGetTime", (void*)&OSGetTime);
|
||||
// coreinit thread
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "OSSleepTicks", (void*)&OSSleepTicks);
|
||||
// coreinit memory
|
||||
void* temp;
|
||||
OSDynLoad_FindExport(coreinitHandle, 1, "MEMAllocFromDefaultHeapEx", (void*)&temp);
|
||||
*(void**)&MEMAllocFromDefaultHeapEx = *(void**)temp;
|
||||
OSDynLoad_FindExport(coreinitHandle, 1, "MEMFreeToDefaultHeap", (void*)&temp);
|
||||
*(void**)&MEMFreeToDefaultHeap = *(void**)temp;
|
||||
// coreinit misc
|
||||
OSDynLoad_FindExport(coreinitHandle, 0, "__os_snprintf", (void*)&_snprintf);
|
||||
|
||||
int ret = mainFunc();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* memAlloc(int size)
|
||||
{
|
||||
return MEMAllocFromDefaultHeapEx(size, 16);
|
||||
}
|
||||
|
||||
void memFree(void* ptr)
|
||||
{
|
||||
MEMFreeToDefaultHeap(ptr);
|
||||
}
|
40
src/link.ld
Normal file
40
src/link.ld
Normal file
@ -0,0 +1,40 @@
|
||||
OUTPUT(ftpiiu.elf);
|
||||
|
||||
/* Tell linker where our application entry is so the garbage collect can work correct */
|
||||
ENTRY(__entry_menu);
|
||||
|
||||
SECTIONS {
|
||||
. = 0x00802000;
|
||||
.text : {
|
||||
*(.text*);
|
||||
}
|
||||
.rodata : {
|
||||
*(.rodata*);
|
||||
}
|
||||
.data : {
|
||||
*(.data*);
|
||||
|
||||
__sdata_start = .;
|
||||
*(.sdata*);
|
||||
__sdata_end = .;
|
||||
|
||||
__sdata2_start = .;
|
||||
*(.sdata2*);
|
||||
__sdata2_end = .;
|
||||
}
|
||||
.bss : {
|
||||
__bss_start = .;
|
||||
*(.bss*);
|
||||
*(.sbss*);
|
||||
*(COMMON);
|
||||
__bss_end = .;
|
||||
}
|
||||
__CODE_END = .;
|
||||
|
||||
/DISCARD/ : {
|
||||
*(*);
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************** FS ********************************************************/
|
||||
/* coreinit.rpl difference in addresses 0xFE3C00 */
|
132
src/main.c
Normal file
132
src/main.c
Normal file
@ -0,0 +1,132 @@
|
||||
#include "main.h"
|
||||
#include "benchmarkTests.h"
|
||||
|
||||
#define TEST_STATE_QUEUED (0)
|
||||
#define TEST_STATE_RUNNING (1)
|
||||
#define TEST_STATE_COMPLETED (2)
|
||||
#define TEST_STATE_FAILED (3)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint64 tickStart;
|
||||
uint64 tickEnd;
|
||||
uint8 state;
|
||||
char* name;
|
||||
int(*testFunc)();
|
||||
}testResultState_t;
|
||||
|
||||
testResultState_t testResultList[32];
|
||||
sint32 testResultCount = 0;
|
||||
|
||||
void drawStatusScreen()
|
||||
{
|
||||
OSScreenClearBufferEx(0, 0);
|
||||
OSScreenClearBufferEx(1, 0);
|
||||
drawText(-1, 1, 1, 0xFF905000, "Wii U CPU benchmark tool v0.1 (single-core)");
|
||||
drawText(-1, 1, 3, 0xFFFFFFFF, "Status Duration Test-Name");
|
||||
drawHorizontalLineW2(-1, 1, 4, 50, 0xFFFFFFFF);
|
||||
char text[128];
|
||||
int allComplete = 1;
|
||||
sint32 totalTime = 0;
|
||||
for(sint32 i=0; i<testResultCount; i++)
|
||||
{
|
||||
char durationText[32];
|
||||
durationText[0] = '?';
|
||||
durationText[1] = '\0';
|
||||
if( testResultList[i].state == TEST_STATE_COMPLETED )
|
||||
{
|
||||
drawText(0, 1, 5+i, 0x80FF8000, "COMPLETED");
|
||||
// calculate duration
|
||||
uint64 durationInTicks = testResultList[i].tickEnd - testResultList[i].tickStart;
|
||||
uint64 durationinMS = durationInTicks * (uint64)1000 / (uint64)TICK_SPEED;
|
||||
_snprintf(durationText, sizeof(text), "%dms", (sint32)durationinMS);
|
||||
totalTime += (sint32)durationinMS;
|
||||
}
|
||||
else if( testResultList[i].state == TEST_STATE_FAILED )
|
||||
{
|
||||
allComplete = 0;
|
||||
drawText(-1, 1, 5+i, 0xFF808000, "FAILED");
|
||||
}
|
||||
else if( testResultList[i].state == TEST_STATE_QUEUED )
|
||||
{
|
||||
allComplete = 0;
|
||||
drawText(-1, 1, 5+i, 0x80808000, "QUEUED");
|
||||
}
|
||||
else
|
||||
{
|
||||
allComplete = 0;
|
||||
drawText(-1, 1, 5+i, 0x8080FFFF, "RUNNING");
|
||||
}
|
||||
_snprintf(text, sizeof(text), "%-11s %-11s", durationText, testResultList[i].name);
|
||||
drawText(-1, 1+12, 5+i, 0xFFFFFFFF, text);
|
||||
}
|
||||
// draw total time if all tests completed
|
||||
if( allComplete != 0 )
|
||||
{
|
||||
_snprintf(text, sizeof(text), "Total: %dms", totalTime);
|
||||
drawText(-1, 1, 5+testResultCount+2, 0xFFFFFFFF, text);
|
||||
}
|
||||
OSScreenFlipBuffersEx(0);
|
||||
OSScreenFlipBuffersEx(1);
|
||||
}
|
||||
|
||||
void drawResult()
|
||||
{
|
||||
drawStatusScreen();
|
||||
}
|
||||
|
||||
void queueTest(int(*testFunc)(), char* name)
|
||||
{
|
||||
sint32 testIndex = testResultCount;
|
||||
testResultList[testIndex].name = name;
|
||||
testResultList[testIndex].testFunc = testFunc;
|
||||
testResultList[testIndex].tickStart = 0;
|
||||
testResultList[testIndex].tickEnd = 0;
|
||||
testResultList[testIndex].state = TEST_STATE_QUEUED;
|
||||
testResultCount++;
|
||||
}
|
||||
|
||||
void runTest(int testIndex)
|
||||
{
|
||||
testResultList[testIndex].state = TEST_STATE_RUNNING;
|
||||
drawStatusScreen();
|
||||
uint64 tickStart = OSGetTime();
|
||||
testResultList[testIndex].testFunc();
|
||||
uint64 tickEnd = OSGetTime();
|
||||
testResultList[testIndex].tickStart = tickStart;
|
||||
testResultList[testIndex].tickEnd = tickEnd;
|
||||
testResultList[testIndex].state = TEST_STATE_COMPLETED;
|
||||
drawStatusScreen();
|
||||
}
|
||||
|
||||
int mainFunc(void)
|
||||
{
|
||||
OSScreenInit();
|
||||
OSScreenSetBufferEx(0, (void*)0xF4000000);
|
||||
OSScreenSetBufferEx(1, (void*)(0xF4000000 + OSScreenGetBufferSizeEx(0)));
|
||||
OSScreenEnableEx(0, 1);
|
||||
OSScreenEnableEx(1, 1);
|
||||
OSScreenClearBufferEx(0, 0);
|
||||
OSScreenClearBufferEx(1, 0);
|
||||
OSScreenFlipBuffersEx(0);
|
||||
|
||||
drawStatusScreen();
|
||||
|
||||
queueTest(test1_run, "ALU"); // arithmetic in a loop
|
||||
queueTest(test2_run, "AES128"); // AES128 compression
|
||||
queueTest(test3_run, "COPY"); // memory copy
|
||||
queueTest(test4_run, "RECUR"); // recursive functions
|
||||
|
||||
// run tests
|
||||
for(sint32 i=0; i<testResultCount; i++)
|
||||
{
|
||||
runTest(i);
|
||||
}
|
||||
while( 1 )
|
||||
{
|
||||
drawResult();
|
||||
OSSleepTicks(MILLISECS_TO_TICKS(1000/30)); // aim for roughly 30 FPS
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
52
src/main.h
Normal file
52
src/main.h
Normal file
@ -0,0 +1,52 @@
|
||||
#include <gctypes.h>
|
||||
|
||||
typedef unsigned long long uint64;
|
||||
typedef signed long long sint64;
|
||||
typedef unsigned int uint32;
|
||||
typedef signed int sint32;
|
||||
typedef unsigned short uint16;
|
||||
typedef signed short sint16;
|
||||
typedef unsigned char uint8;
|
||||
typedef signed char sint8;
|
||||
|
||||
void* memAlloc(int size);
|
||||
void memFree(void* ptr);
|
||||
|
||||
int mainFunc(void);
|
||||
|
||||
#define BUS_SPEED 248625000
|
||||
#define SECS_TO_TICKS(sec) (((unsigned long long)(sec)) * (BUS_SPEED/4))
|
||||
#define MILLISECS_TO_TICKS(msec) (SECS_TO_TICKS(msec) / 1000)
|
||||
#define MICROSECS_TO_TICKS(usec) (SECS_TO_TICKS(usec) / 1000000)
|
||||
|
||||
#define TICK_SPEED (BUS_SPEED/4)
|
||||
|
||||
// dynload functions
|
||||
extern int (*OSDynLoad_Acquire)(const char* rpl, u32 *handle);
|
||||
extern int (*OSDynLoad_FindExport)(u32 handle, int isdata, const char *symbol, void *address);
|
||||
|
||||
// coreinit OSScreen functions
|
||||
extern void (*OSScreenInit)(void);
|
||||
extern unsigned int (*OSScreenGetBufferSizeEx)(unsigned int bufferNum);
|
||||
extern int (*OSScreenSetBufferEx)(unsigned int bufferNum, void * addr);
|
||||
extern int (*OSScreenClearBufferEx)(unsigned int bufferNum, unsigned int temp);
|
||||
extern int (*OSScreenFlipBuffersEx)(unsigned int bufferNum);
|
||||
extern int (*OSScreenPutFontEx)(unsigned int bufferNum, unsigned int posX, unsigned int posY, const char * buffer);
|
||||
extern void (*OSScreenPutPixelEx)(int bufferNum, uint32 posX, uint32 posY, uint32 color);
|
||||
extern int (*OSScreenEnableEx)(unsigned int bufferNum, int enable);
|
||||
|
||||
// coreinit memory
|
||||
void* (*MEMAllocFromDefaultHeapEx)(uint32_t size, int alignment);
|
||||
|
||||
// coreinit time functions
|
||||
extern u64 (*OSGetTime)(void);
|
||||
|
||||
// coreinit thread
|
||||
extern void (*OSSleepTicks)(u64 ticks);
|
||||
|
||||
// coreinit misc
|
||||
extern int (*_snprintf)(char *buf, int n, const char *format, ... );
|
||||
|
||||
// drawing
|
||||
void drawText(sint32 bufferNum, sint32 x, sint32 y, uint32 color, char* str);
|
||||
void drawHorizontalLineW2(sint32 bufferNum, sint32 x, sint32 y, sint32 width, uint32 color);
|
44
src/test1.c
Normal file
44
src/test1.c
Normal file
@ -0,0 +1,44 @@
|
||||
#include "main.h"
|
||||
|
||||
__attribute__((used)) uint32 test1Result = 0;
|
||||
|
||||
int test1_run()
|
||||
{
|
||||
uint32 v0 = 0x11111111;
|
||||
uint32 v1 = 0x22222222;
|
||||
uint32 v2 = 0x33333333;
|
||||
uint32 v3 = 0x44444444;
|
||||
uint32 v4 = 0x55555555;
|
||||
uint32 v5 = 0x66666666;
|
||||
uint32 v6 = 0x77777777;
|
||||
uint32 v7 = 0x88888888;
|
||||
uint32 v8 = 0x99999999;
|
||||
uint32 v9 = 0xAAAAAAAA;
|
||||
uint32 v10 = 0xBBBBBBBB;
|
||||
uint32 v11 = 0xCCCCCCCC;
|
||||
uint32 v12 = 0xDDDDDDDD;
|
||||
uint32 v13 = 0xEEEEEEEE;
|
||||
uint32 v14 = 0xFFFFFFFF;
|
||||
uint32 v15 = 0x00000000;
|
||||
for(uint32 i=0; i<0x60000000; i++)
|
||||
{
|
||||
v0 += v1;
|
||||
v1 ^= v2;
|
||||
v2 -= (v3<<7)|(v3>>(32-7));
|
||||
v3 = (v2 - v4) ^ v3;
|
||||
v4 = ~v4 + v5;
|
||||
v5 = (v6<<7)|(v6>>(32-7));
|
||||
v6 = v7 - v4;
|
||||
v7 = (v7>>1) + ~v8;
|
||||
v8 = v9 + 0x111;
|
||||
v9 = v10 * v8 + v10;
|
||||
v10 = (v11 >> 1) + (v11 >> 20);
|
||||
v11 = v11 + v12;
|
||||
v12 = v12 + v13;
|
||||
v13 = v13 + v14;
|
||||
v14 = v14 + v15;
|
||||
v15 = v15 + v0;
|
||||
}
|
||||
test1Result = v0;
|
||||
return 0;
|
||||
}
|
24
src/test2.c
Normal file
24
src/test2.c
Normal file
@ -0,0 +1,24 @@
|
||||
#include "main.h"
|
||||
#include "aes.h"
|
||||
|
||||
#define AES_COMPRESS_SIZE (1024*1024*1)
|
||||
|
||||
int test2_run()
|
||||
{
|
||||
unsigned char key[16] = {1,2,3,4,5,6,7,8};
|
||||
struct AES_ctx aesCtx;
|
||||
AES_init_ctx(&aesCtx, key);
|
||||
|
||||
uint8* block = (uint8*)memAlloc(AES_COMPRESS_SIZE);
|
||||
for(uint32 i=0; i<AES_COMPRESS_SIZE; i++)
|
||||
{
|
||||
block[i] = i&0xFF;
|
||||
}
|
||||
|
||||
for(uint32 i=0; i<400; i++)
|
||||
AES_CBC_encrypt_buffer(&aesCtx, block, AES_COMPRESS_SIZE);
|
||||
|
||||
memFree(block);
|
||||
|
||||
return 0;
|
||||
}
|
24
src/test3.c
Normal file
24
src/test3.c
Normal file
@ -0,0 +1,24 @@
|
||||
#include "main.h"
|
||||
|
||||
#define BLOCK_SIZE (1024*1024*8) // 8MB
|
||||
|
||||
int test3_run()
|
||||
{
|
||||
uint8* block0 = (uint8*)memAlloc(BLOCK_SIZE);
|
||||
uint8* block1 = (uint8*)memAlloc(BLOCK_SIZE);
|
||||
for(uint32 i=0; i<BLOCK_SIZE; i++)
|
||||
{
|
||||
block0[i] = i&0xFF;
|
||||
}
|
||||
|
||||
for(uint32 i=0; i<2000; i++)
|
||||
{
|
||||
for(uint32 c=0; c<BLOCK_SIZE; c++)
|
||||
block1[c] = block0[c];
|
||||
}
|
||||
|
||||
memFree(block0);
|
||||
memFree(block1);
|
||||
|
||||
return 0;
|
||||
}
|
34
src/test4.c
Normal file
34
src/test4.c
Normal file
@ -0,0 +1,34 @@
|
||||
#include "main.h"
|
||||
|
||||
__attribute__((used)) uint32 test2Result = 0;
|
||||
|
||||
int t4_recursiveA(sint32 depth, sint32 v0, sint32 v1)
|
||||
{
|
||||
if( depth >= 200 )
|
||||
return v0 + v1;
|
||||
return t4_recursiveA(depth+1, ~v0, ~v1) + (v0>>3) + (v1>>1);
|
||||
}
|
||||
|
||||
int t4_recursiveB(sint32 depth, sint32 v0, sint32 v1)
|
||||
{
|
||||
if( depth >= 200 )
|
||||
return v0 ^ v1;
|
||||
return t4_recursiveB(depth+1, v1, v0) + (v0>>1) + (v1>>3);
|
||||
}
|
||||
|
||||
int t4_recursiveC(sint32 depth, sint32 v0, sint32 v1)
|
||||
{
|
||||
if( depth >= 200 )
|
||||
return v0 + v1;
|
||||
return t4_recursiveC(depth+1, v0 + t4_recursiveB(0, v0, v1) + t4_recursiveA(0, v1-v0, v0>>3), v1 + t4_recursiveA(0, v0, v1) + t4_recursiveB(0, v1^v0, ~v0));
|
||||
}
|
||||
|
||||
int test4_run()
|
||||
{
|
||||
sint32 v = 0;
|
||||
for(sint32 i=0; i<2000; i++)
|
||||
v += t4_recursiveC(0, 0x11111111 + i * 133, 0xFFFFFFFF - i * 1021);
|
||||
|
||||
test2Result = v;
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user