<tb@panthema.net>
<http://www.gnu.org/licenses/>
void cScanWrite32PtrSimpleLoop(char* memarea, size_t size, size_t repeats)
{
uint32_t* begin = (uint32_t*)memarea;
uint32_t* end = begin + size / sizeof(uint32_t);
uint32_t value = 0xC0FFEEEE;
do {
uint32_t* p = begin;
do {
*p++ = value;
}
while (p < end);
}
while (--repeats != 0);
}
void ScanWrite32PtrSimpleLoop(char* memarea, size_t size, size_t repeats)
{
uint32_t value = 0xC0FFEEEE;
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"str %[value], [ip], #4 \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [value] "r" (value), [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip");
}
REGISTER(ScanWrite32PtrSimpleLoop, 4, 4);
void ScanWrite32PtrUnrollLoop(char* memarea, size_t size, size_t repeats)
{
uint32_t value = 0xC0FFEEEE;
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"str %[value], [ip,#0*4] \n"
"str %[value], [ip,#1*4] \n"
"str %[value], [ip,#2*4] \n"
"str %[value], [ip,#3*4] \n"
"str %[value], [ip,#4*4] \n"
"str %[value], [ip,#5*4] \n"
"str %[value], [ip,#6*4] \n"
"str %[value], [ip,#7*4] \n"
"str %[value], [ip,#8*4] \n"
"str %[value], [ip,#9*4] \n"
"str %[value], [ip,#10*4] \n"
"str %[value], [ip,#11*4] \n"
"str %[value], [ip,#12*4] \n"
"str %[value], [ip,#13*4] \n"
"str %[value], [ip,#14*4] \n"
"str %[value], [ip,#15*4] \n"
"add ip, ip, #16*4 \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [value] "r" (value), [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip");
}
REGISTER(ScanWrite32PtrUnrollLoop, 4, 4);
void ScanWrite32PtrMultiLoop(char* memarea, size_t size, size_t repeats)
{
uint32_t value = 0xC0FFEEEE;
asm("ldr r4, %[value] \n"
"ldr r5, %[value] \n"
"ldr r6, %[value] \n"
"ldr r7, %[value] \n"
"ldr r8, %[value] \n"
"ldr r9, %[value] \n"
"ldr r10, %[value] \n"
"ldr r11, %[value] \n"
"1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"stmia ip!, {r4-r11} \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [value] "m" (value), [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11");
}
REGISTER(ScanWrite32PtrMultiLoop, 4, 4);
void ScanRead32PtrSimpleLoop(char* memarea, size_t size, size_t repeats)
{
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"ldr r0, [ip], #4 \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip", "r0");
}
REGISTER(ScanRead32PtrSimpleLoop, 4, 4);
void ScanRead32PtrUnrollLoop(char* memarea, size_t size, size_t repeats)
{
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"ldr r0, [ip,#0*4] \n"
"ldr r0, [ip,#1*4] \n"
"ldr r0, [ip,#2*4] \n"
"ldr r0, [ip,#3*4] \n"
"ldr r0, [ip,#4*4] \n"
"ldr r0, [ip,#5*4] \n"
"ldr r0, [ip,#6*4] \n"
"ldr r0, [ip,#7*4] \n"
"ldr r0, [ip,#8*4] \n"
"ldr r0, [ip,#9*4] \n"
"ldr r0, [ip,#10*4] \n"
"ldr r0, [ip,#11*4] \n"
"ldr r0, [ip,#12*4] \n"
"ldr r0, [ip,#13*4] \n"
"ldr r0, [ip,#14*4] \n"
"ldr r0, [ip,#15*4] \n"
"add ip, ip, #16*4 \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip", "r0");
}
REGISTER(ScanRead32PtrUnrollLoop, 4, 4);
void ScanRead32PtrMultiLoop(char* memarea, size_t size, size_t repeats)
{
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"ldmia ip!, {r4-r11} \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11");
}
REGISTER(ScanRead32PtrMultiLoop, 4, 4);
void cScanWrite32IndexSimpleLoop(char* _memarea, size_t _size, size_t repeats)
{
uint32_t* memarea = (uint32_t*)_memarea;
uint32_t size = _size / sizeof(uint32_t);
uint32_t value = 0xC0FFEEEE;
do {
for (size_t i = 0; i < size; ++i)
memarea[i] = value;
}
while (--repeats != 0);
}
void ScanWrite32IndexSimpleLoop(char* memarea, size_t size, size_t repeats)
{
uint32_t value = 0xC0FFEEEE;
asm("1: \n"
"mov ip, #0 \n"
"2: \n"
"str %[value], [%[memarea], ip] \n"
"add ip, #4 \n"
"cmp ip, %[size] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [value] "r" (value), [memarea] "r" (memarea), [size] "r" (size), [repeats] "r" (repeats)
: "ip");
}
REGISTER(ScanWrite32IndexSimpleLoop, 4, 4);
void ScanRead32IndexSimpleLoop(char* memarea, size_t size, size_t repeats)
{
asm("1: \n"
"mov ip, #0 \n"
"2: \n"
"ldr r0, [%[memarea], ip] \n"
"add ip, #4 \n"
"cmp ip, %[size] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [memarea] "r" (memarea), [size] "r" (size), [repeats] "r" (repeats)
: "ip", "r0");
}
REGISTER(ScanRead32IndexSimpleLoop, 4, 4);
void cScanWrite64PtrSimpleLoop(char* memarea, size_t size, size_t repeats)
{
typedef std::pair<uint32_t,uint32_t> uint64;
uint64* begin = (uint64*)memarea;
uint64* end = begin + size / sizeof(uint64);
uint32_t val32 = 0xC0FFEEEE;
uint64 value = uint64(val32,val32);
do {
uint64* p = begin;
do {
*p++ = value;
}
while(p < end);
}
while (--repeats != 0);
}
void ScanWrite64PtrSimpleLoop(char* memarea, size_t size, size_t repeats)
{
uint32_t value = 0xC0FFEEEE;
asm("mov r4, %[value] \n"
"mov r5, %[value] \n"
"1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"strd r4, [ip], #8 \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [value] "r" (value), [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip", "r4", "r5");
}
REGISTER(ScanWrite64PtrSimpleLoop, 8, 8);
void ScanWrite64PtrUnrollLoop(char* memarea, size_t size, size_t repeats)
{
uint32_t value = 0xC0FFEEEE;
asm("mov r4, %[value] \n"
"mov r5, %[value] \n"
"1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"strd r4, [ip,#0*8] \n"
"strd r4, [ip,#1*8] \n"
"strd r4, [ip,#2*8] \n"
"strd r4, [ip,#3*8] \n"
"strd r4, [ip,#4*8] \n"
"strd r4, [ip,#5*8] \n"
"strd r4, [ip,#6*8] \n"
"strd r4, [ip,#7*8] \n"
"strd r4, [ip,#8*8] \n"
"strd r4, [ip,#9*8] \n"
"strd r4, [ip,#10*8] \n"
"strd r4, [ip,#11*8] \n"
"strd r4, [ip,#12*8] \n"
"strd r4, [ip,#13*8] \n"
"strd r4, [ip,#14*8] \n"
"strd r4, [ip,#15*8] \n"
"add ip, #16*8 \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [value] "r" (value), [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip", "r4", "r5");
}
REGISTER(ScanWrite64PtrUnrollLoop, 8, 8);
void ScanRead64PtrSimpleLoop(char* memarea, size_t size, size_t repeats)
{
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"ldrd r4, [ip], #8 \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip", "r4", "r5");
}
REGISTER(ScanRead64PtrSimpleLoop, 8, 8);
void ScanRead64PtrUnrollLoop(char* memarea, size_t size, size_t repeats)
{
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"ldrd r4, [ip,#0*8] \n"
"ldrd r4, [ip,#1*8] \n"
"ldrd r4, [ip,#2*8] \n"
"ldrd r4, [ip,#3*8] \n"
"ldrd r4, [ip,#4*8] \n"
"ldrd r4, [ip,#5*8] \n"
"ldrd r4, [ip,#6*8] \n"
"ldrd r4, [ip,#7*8] \n"
"ldrd r4, [ip,#8*8] \n"
"ldrd r4, [ip,#9*8] \n"
"ldrd r4, [ip,#10*8] \n"
"ldrd r4, [ip,#11*8] \n"
"ldrd r4, [ip,#12*8] \n"
"ldrd r4, [ip,#13*8] \n"
"ldrd r4, [ip,#14*8] \n"
"ldrd r4, [ip,#15*8] \n"
"add ip, #16*8 \n"
"cmp ip, %[end] \n"
"blo 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [memarea] "r" (memarea), [end] "r" (memarea+size), [repeats] "r" (repeats)
: "ip", "r4", "r5");
}
REGISTER(ScanRead64PtrUnrollLoop, 8, 8);
void cPermRead32SimpleLoop(char* memarea, size_t, size_t repeats)
{
uint32_t* begin = (uint32_t*)memarea;
do {
uint32_t* p = begin;
do {
p = (uint32_t*)*p;
}
while (p != begin);
}
while (--repeats != 0);
}
void PermRead32SimpleLoop(char* memarea, size_t, size_t repeats)
{
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"ldr ip, [ip] \n"
"cmp ip, %[memarea] \n"
"bne 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [memarea] "r" (memarea), [repeats] "r" (repeats)
: "ip");
}
REGISTER_PERM(PermRead32SimpleLoop, 4);
void PermRead32UnrollLoop(char* memarea, size_t, size_t repeats)
{
asm("1: \n"
"mov ip, %[memarea] \n"
"2: \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"ldr ip, [ip] \n"
"cmp ip, %[memarea] \n"
"bne 2b \n"
"subs %[repeats], %[repeats], #1 \n"
"bne 1b \n"
:
: [memarea] "r" (memarea), [repeats] "r" (repeats)
: "ip");
}
REGISTER_PERM(PermRead32UnrollLoop, 4);