Optimize consecutive increments

This commit is contained in:
drmortalwombat 2024-07-05 22:59:16 +02:00
parent 301431f12a
commit 470462fe8b
3 changed files with 208 additions and 8 deletions

View File

@ -150,6 +150,9 @@ rem @echo off
@call :test qsorttest.c
@if %errorlevel% neq 0 goto :error
@call :testn plasma.c
@if %errorlevel% neq 0 goto :error
@call :test loopdomtest.c
@if %errorlevel% neq 0 goto :error

145
autotest/plasma.c Normal file
View File

@ -0,0 +1,145 @@
#include <stdlib.h>
static const char sintab[] = {
128, 131, 134, 137, 140, 144, 147, 150, 153, 156, 159, 162, 165, 168, 171, 174, 177, 179, 182, 185, 188, 191, 193, 196, 199, 201, 204, 206, 209, 211, 213, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 235, 237, 239, 240, 241, 243, 244, 245, 246, 248, 249, 250, 250, 251, 252, 253, 253, 254, 254, 254, 255, 255, 255,
255, 255, 255, 255, 254, 254, 254, 253, 253, 252, 251, 250, 250, 249, 248, 246, 245, 244, 243, 241, 240, 239, 237, 235, 234, 232, 230, 228, 226, 224, 222, 220, 218, 216, 213, 211, 209, 206, 204, 201, 199, 196, 193, 191, 188, 185, 182, 179, 177, 174, 171, 168, 165, 162, 159, 156, 153, 150, 147, 144, 140, 137, 134, 131,
128, 125, 122, 119, 116, 112, 109, 106, 103, 100, 97, 94, 91, 88, 85, 82, 79, 77, 74, 71, 68, 65, 63, 60, 57, 55, 52, 50, 47, 45, 43, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 21, 19, 17, 16, 15, 13, 12, 11, 10, 8, 7, 6, 6, 5, 4, 3, 3, 2, 2, 2, 1, 1, 1,
1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 6, 6, 7, 8, 10, 11, 12, 13, 15, 16, 17, 19, 21, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 43, 45, 47, 50, 52, 55, 57, 60, 63, 65, 68, 71, 74, 77, 79, 82, 85, 88, 91, 94, 97, 100, 103, 106, 109, 112, 116, 119, 122, 125
};
char Screen0[1024], Screen1[1024];
char colormap0[256], colormap1[256];
char colors0[] = {0, 6, 14, 1, 13, 5, 0};
char colors1[] = {0, 9, 7, 1, 15, 12, 0};
unsigned c1A, c1B, c2A, c2B, c3A, c3B;
int d1A, d1B, d2A, d2B, d3A, d3B;
void inithires(void)
{
for(int i=0; i<256; i++)
{
colormap0[i] = colors0[i / 37];
colormap1[i] = colors1[i / 37] << 4;
}
}
inline void doplasma(char * scrn)
{
char xbuf0[40], xbuf1[40];
char ybuf0[25], ybuf1[25];
char c2a = c2A >> 8;
char c2b = c2B >> 8;
char c1a = c1A >> 8;
char c1b = c1B >> 8;
for (char i = 0; i < 25; i++) {
ybuf0[i] = sintab[(c1a + c2a) & 0xff] + sintab[c1b];
c1a += 13;
c1b -= 5;
}
for (char i = 0; i < 40; i++) {
xbuf0[i] = sintab[(c2a + c1b) & 0xff] + sintab[c2b];
c2a += 11;
c2b -= 7;
}
c2a = c2B >> 8;
c2b = c3A >> 8;
c1a = c1B >> 8;
c1b = c3B >> 8;
for (char i = 0; i < 25; i++) {
ybuf1[i] = sintab[(c1b + c2a) & 0xff] + sintab[c1a];
c1a += 4;
c1b -= 6;
}
for (char i = 0; i < 40; i++) {
xbuf1[i] = sintab[(c2b + c1a) & 0xff] + sintab[c2a];
c2a += 7;
c2b -= 9;
}
#pragma unroll(full)
for (char k=0; k<5; k++)
{
char tbuf0[5], tbuf1[5];
#pragma unroll(full)
for (char i = 0; i < 4; i++)
{
tbuf0[i] = ybuf0[5 * k + i + 1] - ybuf0[5 * k + i];
tbuf1[i] = ybuf1[5 * k + i + 1] - ybuf1[5 * k + i];
}
for (signed char i = 39; i >= 0; i--)
{
char t = xbuf0[i] + ybuf0[5 * k];
char u = xbuf1[i] + ybuf1[5 * k];
#pragma unroll(full)
for (char j = 0; j < 5; j++)
{
scrn[40 * j + 200 * k + i] = colormap0[u] | colormap1[u];
t += tbuf0[j];
u += tbuf1[j];
}
}
}
c1A += 8 * ((int)sintab[d1A] - 128);
c1B += 16 * ((int)sintab[d1B] - 128);
c2A += 8 * ((int)sintab[d2A] - 128);
c2B += 16 * ((int)sintab[d2B] - 128);
c3A += 6 * ((int)sintab[d3A] - 128);
c3B += 12 * ((int)sintab[d3B] - 128);
d1A += 3;
d1B += rand() & 3;
d2A += 5;
d2B += rand() & 3;
d3A += 2;
d3B += rand() & 3;
}
void doplasma0(void)
{
doplasma(Screen0);
}
void doplasma1(void)
{
doplasma(Screen1);
}
unsigned checksum(const char * scr)
{
unsigned s = 0x1234;
for(int i=0; i<1024; i++)
{
unsigned m = s & 1;
s >>= 1;
if (m)
s ^= 0x2152;
s ^= scr[i];
}
return s;
}
int main(void)
{
inithires();
doplasma0();
doplasma1();
doplasma0();
doplasma1();
doplasma0();
doplasma1();
return checksum(Screen0) + checksum(Screen1) - 16337;
}

View File

@ -16084,6 +16084,22 @@ bool NativeCodeBasicBlock::GlobalLoadStoreForwarding(bool zpage, const NativeCod
mIns[i + 1].mMode = ASMIM_IMPLIED;
changed = true;
}
else if (mXLSIns.mType != ASMIT_INV && ins.mType == ASMIT_LDA && i + 1 < mIns.Size() &&
mIns[i + 1].SameEffectiveAddress(mXLSIns) && mIns[i + 1].IsCommutative())
{
mIns[i + 1].CopyMode(ins);
ins.mType = ASMIT_TXA;
ins.mMode = ASMIM_IMPLIED;
changed = true;
}
else if (mYLSIns.mType != ASMIT_INV && ins.mType == ASMIT_LDA && i + 1 < mIns.Size() &&
mIns[i + 1].SameEffectiveAddress(mYLSIns) && mIns[i + 1].IsCommutative())
{
mIns[i + 1].CopyMode(ins);
ins.mType = ASMIT_TYA;
ins.mMode = ASMIM_IMPLIED;
changed = true;
}
if (ins.mType == ASMIT_STA)
{
@ -16195,6 +16211,22 @@ bool NativeCodeBasicBlock::GlobalLoadStoreForwarding(bool zpage, const NativeCod
mXLSIns.mType = ASMIT_INV;
}
}
else if (ins.mType == ASMIT_TAY)
{
mYLSIns = mALSIns;
}
else if (ins.mType == ASMIT_TAX)
{
mXLSIns = mALSIns;
}
else if (ins.mType == ASMIT_TYA)
{
mALSIns = mYLSIns;
}
else if (ins.mType == ASMIT_TXA)
{
mALSIns = mXLSIns;
}
else
{
if (ins.ChangesAccu())
@ -32879,13 +32911,33 @@ bool NativeCodeBasicBlock::MoveCLCLoadAddZPStoreUp(int at)
int j = at - 1;
while (j > 0)
{
if (mIns[j].mType == ASMIT_STA && mIns[j].mMode == ASMIM_ZERO_PAGE && mIns[j].mAddress == mIns[at + 1].mAddress)
if (mIns[j].mType == ASMIT_STA && mIns[j].mMode == ASMIM_ZERO_PAGE && (mIns[j].mAddress == mIns[at + 1].mAddress || mIns[j].mAddress == mIns[at + 2].mAddress))
{
if (mIns[j].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))
return false;
bool flip = mIns[j].mAddress != mIns[at + 1].mAddress;
if (mIns[j].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))
{
int k = j;
j++;
while (j < mIns.Size() && (mIns[j].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C)))
{
if (mIns[j].ChangesAccu())
return false;
j++;
}
while (k < j)
mIns[k++].mLive |= LIVE_CPU_REG_A;
}
if (mIns[at + 2].mLive & LIVE_CPU_REG_C)
{
for (int i = j + 1; i < at; i++)
mIns[i].mLive |= LIVE_CPU_REG_C;
}
if (flip)
mIns[at + 2].mAddress = mIns[at + 1].mAddress;
mIns[at + 0].mLive |= mIns[j].mLive | LIVE_CPU_REG_A;
mIns[at + 2].mLive |= mIns[j].mLive;
@ -32900,7 +32952,7 @@ bool NativeCodeBasicBlock::MoveCLCLoadAddZPStoreUp(int at)
mIns[at + 3].mType = ASMIT_NOP; mIns[at + 3].mMode = ASMIM_IMPLIED;
mIns[at + 4].mType = ASMIT_NOP; mIns[at + 4].mMode = ASMIM_IMPLIED;
mIns[at + 5].mType = ASMIT_NOP; mIns[at + 5].mMode = ASMIM_IMPLIED;
mIns[at + 6].mType = ASMIT_NOP; mIns[at + 6].mMode = ASMIM_IMPLIED;
mIns[at + 6].mType = ASMIT_LDA;
return true;
}
@ -32910,7 +32962,7 @@ bool NativeCodeBasicBlock::MoveCLCLoadAddZPStoreUp(int at)
return false;
if (mIns[j].UsesZeroPage(mIns[at + 3].mAddress))
return false;
if (mIns[j].ChangesCarry())
if ((mIns[at + 2].mLive & LIVE_CPU_REG_C) && mIns[j].ChangesCarry())
return false;
j--;
@ -41341,7 +41393,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizerShuffle(int pass)
mIns[i + 0].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_ZERO_PAGE &&
mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && (mIns[i + 3].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)) == 0)
mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE)
{
if (MoveCLCLoadAddZPStoreUp(i))
changed = true;