From 7d12fd4c02e127f489193c032c7d3311eda40542 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Thu, 4 May 2023 11:32:12 +0200 Subject: [PATCH] Fix same XY optimization --- oscar64/InterCode.cpp | 89 ++++++++++++++++++++++ oscar64/InterCode.h | 3 + oscar64/NativeCodeGenerator.cpp | 127 +++++++++++++++++++++++++++++-- oscar64/NativeCodeGenerator.h | 1 + oscar64setup/oscar64setup.vdproj | 26 +++++++ samples/hires/build.sh | 2 + samples/hires/make.bat | 1 + samples/hires/qsort.c | 117 ++++++++++++++++++++++++++++ 8 files changed, 358 insertions(+), 8 deletions(-) create mode 100644 samples/hires/qsort.c diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 1fbb52e..7bfefe5 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -5523,6 +5523,74 @@ bool InterCodeBasicBlock::PropagateConstTemps(const GrowingInstructionPtrArray& return changed; } +bool InterCodeBasicBlock::CombineIndirectAddressing(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + GrowingInstructionPtrArray tvalue(nullptr); + + for (int i = 0; i < mInstructions.Size(); i++) + { + InterInstruction* lins = mInstructions[i]; + InterInstruction* tins = nullptr; + + if (lins->mCode == IC_LEA && lins->mSrc[0].mTemp >= 0 && lins->mSrc[1].mTemp < 0 && + !lins->mSrc[0].IsUByte() && + (lins->mSrc[1].mMemory == IM_ABSOLUTE || lins->mSrc[1].mMemory == IM_GLOBAL || lins->mSrc[1].mMemory == IM_LOCAL)) + { + int j = 0; + while (j < tvalue.Size() && + !(tvalue[j]->mSrc[0].mTemp == lins->mSrc[0].mTemp && + tvalue[j]->mSrc[1].mTemp < 0 && + tvalue[j]->mSrc[1].mMemory == lins->mSrc[1].mMemory && + tvalue[j]->mSrc[1].mVarIndex == lins->mSrc[1].mVarIndex && + tvalue[j]->mSrc[1].mIntConst <= lins->mSrc[1].mIntConst && + tvalue[j]->mSrc[1].mIntConst + 256 > lins->mSrc[1].mIntConst)) + j++; + + if (j < tvalue.Size()) + { + int offset = lins->mSrc[1].mIntConst - tvalue[j]->mSrc[1].mIntConst; + lins->mSrc[1] = tvalue[j]->mDst; + lins->mSrc[0].mTemp = -1; + lins->mSrc[0].mIntConst = offset; + changed = true; + } + else + tins = lins; + } + + if (HasSideEffect(lins->mCode)) + tvalue.SetSize(0); + else if (lins->mDst.mTemp >= 0) + { + int j = 0; + while (j < tvalue.Size()) + { + if (tvalue[j]->ReferencesTemp(lins->mDst.mTemp)) + tvalue.Remove(j); + else + j++; + } + } + + if (tins) + tvalue.Push(tins); + } + + if (mTrueJump && mTrueJump->CombineIndirectAddressing()) + changed = true; + if (mFalseJump && mFalseJump->CombineIndirectAddressing()) + changed = true; + } + + return changed; +} + void InterCodeBasicBlock::SimplifyIntegerRangeRelops(void) { if (!mVisited) @@ -13505,6 +13573,17 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst; changed = true; } + else if ( + mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[0].mTemp < 0 && mInstructions[i + 0]->mSrc[1].mTemp >= 0 && + mInstructions[i + 1]->mCode == IC_LOAD && mInstructions[i + 1]->mSrc[0].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[0].mFinal && + mInstructions[i + 1]->mSrc[0].mIntConst + mInstructions[i + 0]->mSrc[0].mIntConst < 256) + { + mInstructions[i + 1]->mSrc[0].mTemp = mInstructions[i + 0]->mSrc[1].mTemp; + mInstructions[i + 1]->mSrc[0].mIntConst += mInstructions[i + 0]->mSrc[0].mIntConst; + mInstructions[i + 0]->mCode = IC_NONE; + mInstructions[i + 0]->mNumOperands = 0; + changed = true; + } else if ( mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mTemp < 0 && mInstructions[i + 1]->mCode == IC_STORE && mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal && @@ -15302,6 +15381,14 @@ void InterCodeProcedure::LoadStoreForwarding(InterMemory paramMemory) } while (changed); } +void InterCodeProcedure::CombineIndirectAddressing(void) +{ + ResetVisited(); + + mEntryBlock->CombineIndirectAddressing(); + BuildDataFlowSets(); +} + void InterCodeProcedure::PropagateConstOperationsUp(void) { #if 1 @@ -16022,6 +16109,8 @@ void InterCodeProcedure::Close(void) PropagateConstOperationsUp(); + CombineIndirectAddressing(); + #if 1 for (int i = 0; i < 4; i++) { diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index a4edf8c..fc43fe9 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -434,6 +434,8 @@ public: bool BuildGlobalIntegerRangeSets(bool initial, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars); void SimplifyIntegerRangeRelops(void); + bool CombineIndirectAddressing(void); + GrowingIntArray mEntryRenameTable; GrowingIntArray mExitRenameTable; @@ -630,6 +632,7 @@ protected: void ExpandSelect(void); void PropagateConstOperationsUp(void); void RebuildIntegerRangeSet(void); + void CombineIndirectAddressing(void); void MergeBasicBlocks(void); void CheckUsedDefinedTemps(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index a3d4596..c739f2c 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -12831,7 +12831,7 @@ bool NativeCodeBasicBlock::ReplaceYRegWithXReg(int start, int end) { bool changed = false; - CheckLive(); +// CheckLive(); for (int i = start; i < end; i++) { @@ -12840,7 +12840,7 @@ bool NativeCodeBasicBlock::ReplaceYRegWithXReg(int start, int end) changed = true; } - CheckLive(); +// CheckLive(); return changed; } @@ -13061,6 +13061,89 @@ bool NativeCodeBasicBlock::ForwardAccuAddSub(void) return changed; } +bool NativeCodeBasicBlock::ForwardAXYReg(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + bool xisa = false, yisa = false; + int xoffset = -1, yoffset = -1; + + for (int i = 0; i < mIns.Size(); i++) + { + if (mIns[i].mType == ASMIT_TAX) + { + xisa = true; + xoffset = i; + } + else if (mIns[i].mType == ASMIT_TXA) + { + xisa = true; + yisa = false; + xoffset = i; + } + else if (mIns[i].mType == ASMIT_TAY) + { + yisa = true; + yoffset = i; + } + else if (mIns[i].mType == ASMIT_TYA) + { + yisa = true; + xisa = false; + yoffset = i; + } + else if (mIns[i].ChangesXReg()) + xisa = false; + else if (mIns[i].ChangesYReg()) + xisa = false; + else if (mIns[i].ChangesAccu()) + { + xisa = false; + yisa = false; + } + else if (i + 1 < mIns.Size() && mIns[i].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && !(mIns[i + 1].mLive & LIVE_CPU_REG_C)) + { + if (xisa && !(mIns[i + 1].mLive & LIVE_CPU_REG_X)) + { + if (mIns[i + 1].mAddress == 1) + { + mIns[i + 0].mType = ASMIT_INX; + mIns[i + 1].mType = ASMIT_TXA; mIns[i + 1].mMode = ASMIM_IMPLIED; + for (int j = xoffset; j < i + 1; j++) + mIns[j].mLive |= LIVE_CPU_REG_X; + xisa = false; + changed = true; + } + } + else if (yisa && !(mIns[i + 1].mLive & LIVE_CPU_REG_Y)) + { + if (mIns[i + 1].mAddress == 1) + { + mIns[i + 0].mType = ASMIT_INY; + mIns[i + 1].mType = ASMIT_TYA; mIns[i + 1].mMode = ASMIM_IMPLIED; + for (int j = yoffset; j < i + 1; j++) + mIns[j].mLive |= LIVE_CPU_REG_Y; + yisa = false; + changed = true; + } + } + } + } + + if (mTrueJump && mTrueJump->ForwardAXYReg()) + changed = true; + if (mFalseJump && mFalseJump->ForwardAXYReg()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::ForwardZpYIndex(bool full) { CheckLive(); @@ -13487,21 +13570,41 @@ bool NativeCodeBasicBlock::CombineSameXY(void) int xpos, ypos; bool samexy = false; + CheckLive(); + for (int i = 0; i < mIns.Size(); i++) { NativeCodeInstruction& ins(mIns[i]); if (ins.ChangesXReg()) { - if (samexy && CombineSameXtoY(xpos, ypos, i)) - changed = true; + if (samexy) + { + if (!ins.RequiresXReg() && CombineSameXtoY(xpos, ypos, i)) + changed = true; + else if (!ins.RequiresYReg() && !(ins.mLive & LIVE_CPU_REG_Y) && CombineSameYtoX(xpos, ypos, i)) + { + changed = true; + yreg = -1; + } + } + xreg = -1; samexy = false; } if (ins.ChangesYReg()) { - if (samexy && CombineSameYtoX(ypos, xpos, i)) - changed = true; + if (samexy) + { + if (!ins.RequiresYReg() && CombineSameYtoX(xpos, ypos, i)) + changed = true; + else if (!ins.RequiresXReg() && !(ins.mLive & LIVE_CPU_REG_X) && CombineSameXtoY(xpos, ypos, i)) + { + changed = true; + xreg = -1; + } + } + yreg = -1; samexy = false; } @@ -13565,6 +13668,8 @@ bool NativeCodeBasicBlock::CombineSameXY(void) changed = true; } + CheckLive(); + if (mTrueJump && mTrueJump->CombineSameXY()) changed = true; if (mFalseJump && mFalseJump->CombineSameXY()) @@ -13854,14 +13959,14 @@ bool NativeCodeBasicBlock::ReplaceXRegWithYReg(int start, int end) { bool changed = false; - CheckLive(); + //CheckLive(); for (int i = start; i < end; i++) { NativeCodeInstruction& ins(mIns[i]); if (ins.ReplaceXRegWithYReg()) changed = true; } - CheckLive(); + //CheckLive(); return changed; } @@ -39562,6 +39667,12 @@ void NativeCodeProcedure::Optimize(void) #endif + if (step == 8) + { + ResetVisited(); + if (mEntryBlock->ForwardAXYReg()) + changed = true; + } #if 1 if (step == 10) { diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index c7cb77e..2c06441 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -409,6 +409,7 @@ public: bool ForwardAccuAddSub(void); bool ForwardZpYIndex(bool full); bool ForwardZpXIndex(bool full); + bool ForwardAXYReg(void); bool RegisterValueForwarding(void); bool CanCombineSameXtoY(int start, int end); diff --git a/oscar64setup/oscar64setup.vdproj b/oscar64setup/oscar64setup.vdproj index 2490945..006c4f8 100644 --- a/oscar64setup/oscar64setup.vdproj +++ b/oscar64setup/oscar64setup.vdproj @@ -268,6 +268,12 @@ } "Entry" { + "MsmKey" = "8:_414E74B8FB564A7EBE074E9464FAB0F5" + "OwnerKey" = "8:_UNDEFINED" + "MsmSig" = "8:_UNDEFINED" + } + "Entry" + { "MsmKey" = "8:_41A6A127243E4EC2A1E1E2171993C5C1" "OwnerKey" = "8:_UNDEFINED" "MsmSig" = "8:_UNDEFINED" @@ -2033,6 +2039,26 @@ "IsDependency" = "11:FALSE" "IsolateTo" = "8:" } + "{1FB2D0AE-D3B9-43D4-B9DD-F88EC61E35DE}:_414E74B8FB564A7EBE074E9464FAB0F5" + { + "SourcePath" = "8:..\\samples\\hires\\qsort.prg" + "TargetName" = "8:qsort.prg" + "Tag" = "8:" + "Folder" = "8:_82FDD682B0334DE1B89A843D32862B85" + "Condition" = "8:" + "Transitive" = "11:FALSE" + "Vital" = "11:TRUE" + "ReadOnly" = "11:FALSE" + "Hidden" = "11:FALSE" + "System" = "11:FALSE" + "Permanent" = "11:FALSE" + "SharedLegacy" = "11:FALSE" + "PackageAs" = "3:1" + "Register" = "3:1" + "Exclude" = "11:FALSE" + "IsDependency" = "11:FALSE" + "IsolateTo" = "8:" + } "{1FB2D0AE-D3B9-43D4-B9DD-F88EC61E35DE}:_41A6A127243E4EC2A1E1E2171993C5C1" { "SourcePath" = "8:..\\samples\\scrolling\\grid2d.c" diff --git a/samples/hires/build.sh b/samples/hires/build.sh index 9184d76..e1886b3 100644 --- a/samples/hires/build.sh +++ b/samples/hires/build.sh @@ -4,3 +4,5 @@ ../../bin/oscar64 lines.c -n ../../bin/oscar64 polygon.c -n ../../bin/oscar64 bitblit.c -n +../../bin/oscar64 fractaltree.c -n +../../bin/oscar64 qsort.c -n diff --git a/samples/hires/make.bat b/samples/hires/make.bat index f41a25a..00a8fe6 100644 --- a/samples/hires/make.bat +++ b/samples/hires/make.bat @@ -5,3 +5,4 @@ call ..\..\bin\oscar64 polygon.c -n call ..\..\bin\oscar64 bitblit.c -n call ..\..\bin\oscar64 cube3d.c -n call ..\..\bin\oscar64 fractaltree.c -n +call ..\..\bin\oscar64 qsort.c -n diff --git a/samples/hires/qsort.c b/samples/hires/qsort.c new file mode 100644 index 0000000..b97e526 --- /dev/null +++ b/samples/hires/qsort.c @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define Color ((char *)0xd000) +#define Hires ((char *)0xe000) + +Bitmap Screen; + +void init(void) +{ + mmap_trampoline(); + mmap_set(MMAP_RAM); + + memset(Color, 0x01, 1000); + memset(Hires, 0x00, 8000); + + mmap_set(MMAP_NO_ROM); + + vic_setmode(VICM_HIRES, Color, Hires); + + vic.color_border = VCOL_WHITE; + + bm_init(&Screen, Hires, 40, 25); +} + +void done(void) +{ + mmap_set(MMAP_ROM); + + getch(); + + vic_setmode(VICM_TEXT, (char *)0x0400, (char *)0x1000); +} + +char field[160]; + +void fill(void) +{ + for(int i=0; i<160; i++) + field[i] = i; +} + +void shuffle(void) +{ + for(int i=0; i<160; i++) + { + int j = rand() % 160; + char t = field[i]; + field[i] = field[j]; + field[j] = t; + } +} + +void draw(unsigned i) +{ + bmu_line(&Screen, 2 * i, 0, 2 * i, field[i], 0x00, LINOP_SET); + bmu_line(&Screen, 2 * i, field[i], 2 * i, 160, 0xff, LINOP_SET); +} + +void partition(int l, int r) +{ + while (l < r) + { + int i = l; + int j = r; + char pi = field[(r + l) >> 1]; + while (i <= j) + { + while (field[i] > pi) + i++; + while (field[j] < pi) + j--; + if (i <= j) + { + char t = field[i]; + field[i] = field[j]; + field[j] = t; + draw(i); + draw(j); + i++; + j--; + } + } + + partition(l, j); + l = i; + } +} + +int main(void) +{ + init(); + + fill(); + shuffle(); + + for(int i=0; i<160; i++) + draw(i); + + clock_t t0 = clock(); + partition(0, 159); + clock_t t1 = clock(); + + char t[20]; + sprintf(t, "TIME : %.1f SECS.", (float)(t1 - t0) / 60); + bmu_put_chars(&Screen, 4, 170, t, strlen(t), BLTOP_COPY); + + done(); + + return 0; +}