Fix same XY optimization

This commit is contained in:
drmortalwombat 2023-05-04 11:32:12 +02:00
parent df733d09a8
commit 7d12fd4c02
8 changed files with 358 additions and 8 deletions

View File

@ -5523,6 +5523,74 @@ bool InterCodeBasicBlock::PropagateConstTemps(const GrowingInstructionPtrArray&
return changed;
}
bool InterCodeBasicBlock::CombineIndirectAddressing(void)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
GrowingInstructionPtrArray tvalue(nullptr);
for (int i = 0; i < mInstructions.Size(); i++)
{
InterInstruction* lins = mInstructions[i];
InterInstruction* tins = nullptr;
if (lins->mCode == IC_LEA && lins->mSrc[0].mTemp >= 0 && lins->mSrc[1].mTemp < 0 &&
!lins->mSrc[0].IsUByte() &&
(lins->mSrc[1].mMemory == IM_ABSOLUTE || lins->mSrc[1].mMemory == IM_GLOBAL || lins->mSrc[1].mMemory == IM_LOCAL))
{
int j = 0;
while (j < tvalue.Size() &&
!(tvalue[j]->mSrc[0].mTemp == lins->mSrc[0].mTemp &&
tvalue[j]->mSrc[1].mTemp < 0 &&
tvalue[j]->mSrc[1].mMemory == lins->mSrc[1].mMemory &&
tvalue[j]->mSrc[1].mVarIndex == lins->mSrc[1].mVarIndex &&
tvalue[j]->mSrc[1].mIntConst <= lins->mSrc[1].mIntConst &&
tvalue[j]->mSrc[1].mIntConst + 256 > lins->mSrc[1].mIntConst))
j++;
if (j < tvalue.Size())
{
int offset = lins->mSrc[1].mIntConst - tvalue[j]->mSrc[1].mIntConst;
lins->mSrc[1] = tvalue[j]->mDst;
lins->mSrc[0].mTemp = -1;
lins->mSrc[0].mIntConst = offset;
changed = true;
}
else
tins = lins;
}
if (HasSideEffect(lins->mCode))
tvalue.SetSize(0);
else if (lins->mDst.mTemp >= 0)
{
int j = 0;
while (j < tvalue.Size())
{
if (tvalue[j]->ReferencesTemp(lins->mDst.mTemp))
tvalue.Remove(j);
else
j++;
}
}
if (tins)
tvalue.Push(tins);
}
if (mTrueJump && mTrueJump->CombineIndirectAddressing())
changed = true;
if (mFalseJump && mFalseJump->CombineIndirectAddressing())
changed = true;
}
return changed;
}
void InterCodeBasicBlock::SimplifyIntegerRangeRelops(void)
{
if (!mVisited)
@ -13505,6 +13573,17 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray
mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst;
changed = true;
}
else if (
mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[0].mTemp < 0 && mInstructions[i + 0]->mSrc[1].mTemp >= 0 &&
mInstructions[i + 1]->mCode == IC_LOAD && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal &&
mInstructions[i + 1]->mSrc[0].mIntConst + mInstructions[i + 0]->mSrc[0].mIntConst < 256)
{
mInstructions[i + 1]->mSrc[0].mTemp = mInstructions[i + 0]->mSrc[1].mTemp;
mInstructions[i + 1]->mSrc[0].mIntConst += mInstructions[i + 0]->mSrc[0].mIntConst;
mInstructions[i + 0]->mCode = IC_NONE;
mInstructions[i + 0]->mNumOperands = 0;
changed = true;
}
else if (
mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mTemp < 0 &&
mInstructions[i + 1]->mCode == IC_STORE && mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
@ -15302,6 +15381,14 @@ void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory)
} while (changed);
}
void InterCodeProcedure::CombineIndirectAddressing(void)
{
ResetVisited();
mEntryBlock->CombineIndirectAddressing();
BuildDataFlowSets();
}
void InterCodeProcedure::PropagateConstOperationsUp(void)
{
#if 1
@ -16022,6 +16109,8 @@ void InterCodeProcedure::Close(void)
PropagateConstOperationsUp();
CombineIndirectAddressing();
#if 1
for (int i = 0; i < 4; i++)
{

View File

@ -434,6 +434,8 @@ public:
bool BuildGlobalIntegerRangeSets(bool initial, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars);
void SimplifyIntegerRangeRelops(void);
bool CombineIndirectAddressing(void);
GrowingIntArray mEntryRenameTable;
GrowingIntArray mExitRenameTable;
@ -630,6 +632,7 @@ protected:
void ExpandSelect(void);
void PropagateConstOperationsUp(void);
void RebuildIntegerRangeSet(void);
void CombineIndirectAddressing(void);
void MergeBasicBlocks(void);
void CheckUsedDefinedTemps(void);

View File

@ -12831,7 +12831,7 @@ bool NativeCodeBasicBlock::ReplaceYRegWithXReg(int start, int end)
{
bool changed = false;
CheckLive();
// CheckLive();
for (int i = start; i < end; i++)
{
@ -12840,7 +12840,7 @@ bool NativeCodeBasicBlock::ReplaceYRegWithXReg(int start, int end)
changed = true;
}
CheckLive();
// CheckLive();
return changed;
}
@ -13061,6 +13061,89 @@ bool NativeCodeBasicBlock::ForwardAccuAddSub(void)
return changed;
}
bool NativeCodeBasicBlock::ForwardAXYReg(void)
{
bool changed = false;
if (!mVisited)
{
mVisited = true;
bool xisa = false, yisa = false;
int xoffset = -1, yoffset = -1;
for (int i = 0; i < mIns.Size(); i++)
{
if (mIns[i].mType == ASMIT_TAX)
{
xisa = true;
xoffset = i;
}
else if (mIns[i].mType == ASMIT_TXA)
{
xisa = true;
yisa = false;
xoffset = i;
}
else if (mIns[i].mType == ASMIT_TAY)
{
yisa = true;
yoffset = i;
}
else if (mIns[i].mType == ASMIT_TYA)
{
yisa = true;
xisa = false;
yoffset = i;
}
else if (mIns[i].ChangesXReg())
xisa = false;
else if (mIns[i].ChangesYReg())
xisa = false;
else if (mIns[i].ChangesAccu())
{
xisa = false;
yisa = false;
}
else if (i + 1 < mIns.Size() && mIns[i].mType == ASMIT_CLC &&
mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && !(mIns[i + 1].mLive & LIVE_CPU_REG_C))
{
if (xisa && !(mIns[i + 1].mLive & LIVE_CPU_REG_X))
{
if (mIns[i + 1].mAddress == 1)
{
mIns[i + 0].mType = ASMIT_INX;
mIns[i + 1].mType = ASMIT_TXA; mIns[i + 1].mMode = ASMIM_IMPLIED;
for (int j = xoffset; j < i + 1; j++)
mIns[j].mLive |= LIVE_CPU_REG_X;
xisa = false;
changed = true;
}
}
else if (yisa && !(mIns[i + 1].mLive & LIVE_CPU_REG_Y))
{
if (mIns[i + 1].mAddress == 1)
{
mIns[i + 0].mType = ASMIT_INY;
mIns[i + 1].mType = ASMIT_TYA; mIns[i + 1].mMode = ASMIM_IMPLIED;
for (int j = yoffset; j < i + 1; j++)
mIns[j].mLive |= LIVE_CPU_REG_Y;
yisa = false;
changed = true;
}
}
}
}
if (mTrueJump && mTrueJump->ForwardAXYReg())
changed = true;
if (mFalseJump && mFalseJump->ForwardAXYReg())
changed = true;
}
return changed;
}
bool NativeCodeBasicBlock::ForwardZpYIndex(bool full)
{
CheckLive();
@ -13487,21 +13570,41 @@ bool NativeCodeBasicBlock::CombineSameXY(void)
int xpos, ypos;
bool samexy = false;
CheckLive();
for (int i = 0; i < mIns.Size(); i++)
{
NativeCodeInstruction& ins(mIns[i]);
if (ins.ChangesXReg())
{
if (samexy && CombineSameXtoY(xpos, ypos, i))
if (samexy)
{
if (!ins.RequiresXReg() && CombineSameXtoY(xpos, ypos, i))
changed = true;
else if (!ins.RequiresYReg() && !(ins.mLive & LIVE_CPU_REG_Y) && CombineSameYtoX(xpos, ypos, i))
{
changed = true;
yreg = -1;
}
}
xreg = -1;
samexy = false;
}
if (ins.ChangesYReg())
{
if (samexy && CombineSameYtoX(ypos, xpos, i))
if (samexy)
{
if (!ins.RequiresYReg() && CombineSameYtoX(xpos, ypos, i))
changed = true;
else if (!ins.RequiresXReg() && !(ins.mLive & LIVE_CPU_REG_X) && CombineSameXtoY(xpos, ypos, i))
{
changed = true;
xreg = -1;
}
}
yreg = -1;
samexy = false;
}
@ -13565,6 +13668,8 @@ bool NativeCodeBasicBlock::CombineSameXY(void)
changed = true;
}
CheckLive();
if (mTrueJump && mTrueJump->CombineSameXY())
changed = true;
if (mFalseJump && mFalseJump->CombineSameXY())
@ -13854,14 +13959,14 @@ bool NativeCodeBasicBlock::ReplaceXRegWithYReg(int start, int end)
{
bool changed = false;
CheckLive();
//CheckLive();
for (int i = start; i < end; i++)
{
NativeCodeInstruction& ins(mIns[i]);
if (ins.ReplaceXRegWithYReg())
changed = true;
}
CheckLive();
//CheckLive();
return changed;
}
@ -39562,6 +39667,12 @@ void NativeCodeProcedure::Optimize(void)
#endif
if (step == 8)
{
ResetVisited();
if (mEntryBlock->ForwardAXYReg())
changed = true;
}
#if 1
if (step == 10)
{

View File

@ -409,6 +409,7 @@ public:
bool ForwardAccuAddSub(void);
bool ForwardZpYIndex(bool full);
bool ForwardZpXIndex(bool full);
bool ForwardAXYReg(void);
bool RegisterValueForwarding(void);
bool CanCombineSameXtoY(int start, int end);

View File

@ -268,6 +268,12 @@
}
"Entry"
{
"MsmKey" = "8:_414E74B8FB564A7EBE074E9464FAB0F5"
"OwnerKey" = "8:_UNDEFINED"
"MsmSig" = "8:_UNDEFINED"
}
"Entry"
{
"MsmKey" = "8:_41A6A127243E4EC2A1E1E2171993C5C1"
"OwnerKey" = "8:_UNDEFINED"
"MsmSig" = "8:_UNDEFINED"
@ -2033,6 +2039,26 @@
"IsDependency" = "11:FALSE"
"IsolateTo" = "8:"
}
"{1FB2D0AE-D3B9-43D4-B9DD-F88EC61E35DE}:_414E74B8FB564A7EBE074E9464FAB0F5"
{
"SourcePath" = "8:..\\samples\\hires\\qsort.prg"
"TargetName" = "8:qsort.prg"
"Tag" = "8:"
"Folder" = "8:_82FDD682B0334DE1B89A843D32862B85"
"Condition" = "8:"
"Transitive" = "11:FALSE"
"Vital" = "11:TRUE"
"ReadOnly" = "11:FALSE"
"Hidden" = "11:FALSE"
"System" = "11:FALSE"
"Permanent" = "11:FALSE"
"SharedLegacy" = "11:FALSE"
"PackageAs" = "3:1"
"Register" = "3:1"
"Exclude" = "11:FALSE"
"IsDependency" = "11:FALSE"
"IsolateTo" = "8:"
}
"{1FB2D0AE-D3B9-43D4-B9DD-F88EC61E35DE}:_41A6A127243E4EC2A1E1E2171993C5C1"
{
"SourcePath" = "8:..\\samples\\scrolling\\grid2d.c"

View File

@ -4,3 +4,5 @@
../../bin/oscar64 lines.c -n
../../bin/oscar64 polygon.c -n
../../bin/oscar64 bitblit.c -n
../../bin/oscar64 fractaltree.c -n
../../bin/oscar64 qsort.c -n

View File

@ -5,3 +5,4 @@ call ..\..\bin\oscar64 polygon.c -n
call ..\..\bin\oscar64 bitblit.c -n
call ..\..\bin\oscar64 cube3d.c -n
call ..\..\bin\oscar64 fractaltree.c -n
call ..\..\bin\oscar64 qsort.c -n

117
samples/hires/qsort.c Normal file
View File

@ -0,0 +1,117 @@
#include <c64/memmap.h>
#include <c64/vic.h>
#include <gfx/bitmap.h>
#include <string.h>
#include <conio.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#define Color ((char *)0xd000)
#define Hires ((char *)0xe000)
Bitmap Screen;
void init(void)
{
mmap_trampoline();
mmap_set(MMAP_RAM);
memset(Color, 0x01, 1000);
memset(Hires, 0x00, 8000);
mmap_set(MMAP_NO_ROM);
vic_setmode(VICM_HIRES, Color, Hires);
vic.color_border = VCOL_WHITE;
bm_init(&Screen, Hires, 40, 25);
}
void done(void)
{
mmap_set(MMAP_ROM);
getch();
vic_setmode(VICM_TEXT, (char *)0x0400, (char *)0x1000);
}
char field[160];
void fill(void)
{
for(int i=0; i<160; i++)
field[i] = i;
}
void shuffle(void)
{
for(int i=0; i<160; i++)
{
int j = rand() % 160;
char t = field[i];
field[i] = field[j];
field[j] = t;
}
}
void draw(unsigned i)
{
bmu_line(&Screen, 2 * i, 0, 2 * i, field[i], 0x00, LINOP_SET);
bmu_line(&Screen, 2 * i, field[i], 2 * i, 160, 0xff, LINOP_SET);
}
void partition(int l, int r)
{
while (l < r)
{
int i = l;
int j = r;
char pi = field[(r + l) >> 1];
while (i <= j)
{
while (field[i] > pi)
i++;
while (field[j] < pi)
j--;
if (i <= j)
{
char t = field[i];
field[i] = field[j];
field[j] = t;
draw(i);
draw(j);
i++;
j--;
}
}
partition(l, j);
l = i;
}
}
int main(void)
{
init();
fill();
shuffle();
for(int i=0; i<160; i++)
draw(i);
clock_t t0 = clock();
partition(0, 159);
clock_t t1 = clock();
char t[20];
sprintf(t, "TIME : %.1f SECS.", (float)(t1 - t0) / 60);
bmu_put_chars(&Screen, 4, 170, t, strlen(t), BLTOP_COPY);
done();
return 0;
}