From 728e70702450145901b8eac6bed702b6ea017472 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Sat, 14 May 2022 17:57:32 +0200 Subject: [PATCH] Optimize single page 2D array access --- oscar64/NativeCodeGenerator.cpp | 145 +++++++++++++++++++++++++++++++ oscar64/NativeCodeGenerator.h | 2 + oscar64/oscar64.cpp | 2 +- oscar64/oscar64.rc | 8 +- oscar64setup/oscar64setup.vdproj | 6 +- 5 files changed, 155 insertions(+), 8 deletions(-) diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index ae2a5fe..70a13aa 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -9337,6 +9337,44 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In } } +void NativeCodeBasicBlock::LoadStoreOpAbsolute2D(InterCodeProcedure* proc, const InterInstruction* lins1, const InterInstruction* lins2, const InterInstruction* mins) +{ + mIns.Push(NativeCodeInstruction(ASMIT_CLC)); + + if (lins1->mSrc[0].mTemp < 0) + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, lins1->mSrc[0].mIntConst)); + else + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[lins1->mSrc[0].mTemp])); + + if (lins2->mSrc[0].mTemp < 0) + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_IMMEDIATE, lins2->mSrc[0].mIntConst)); + else + mIns.Push(NativeCodeInstruction(ASMIT_ADC, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[lins2->mSrc[0].mTemp])); + + mIns.Push(NativeCodeInstruction(ASMIT_TAY)); + + if (mins->mCode == IC_STORE) + { + for (int i = 0; i < InterTypeSize[mins->mSrc[0].mType]; i++) + { + if (mins->mSrc[0].mTemp < 0) + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, (mins->mSrc[0].mIntConst >> (8 * i)) & 0xff)); + else + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[mins->mSrc[0].mTemp] + i)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ABSOLUTE_Y, lins1->mSrc[1].mIntConst + i, lins1->mSrc[1].mLinkerObject)); + } + } + else + { + for (int i = 0; i < InterTypeSize[mins->mSrc[0].mType]; i++) + { + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ABSOLUTE_Y, lins1->mSrc[1].mIntConst + i, lins1->mSrc[1].mLinkerObject)); + mIns.Push(NativeCodeInstruction(ASMIT_STA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[mins->mDst.mTemp] + i)); + } + } +} + + void NativeCodeBasicBlock::LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid) { bool isub = false; @@ -11621,6 +11659,35 @@ bool NativeCodeBasicBlock::ExpandADCToBranch(NativeCodeProcedure* proc) } + if (mIns.Size() >= 3 && mFalseJump && (mBranch == ASMIT_BCC || mBranch == ASMIT_BCS)) + { + int sz = mIns.Size(); + + if (mIns[sz - 3].mType == ASMIT_STA && mIns[sz - 3].mMode == ASMIM_ZERO_PAGE && + mIns[sz - 2].mType == ASMIT_LDA && mIns[sz - 2].mMode == ASMIM_ZERO_PAGE && + mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 1].mMode == ASMIM_ZERO_PAGE && mIns[sz - 1].mAddress == mIns[sz - 3].mAddress && !(mIns[sz - 1].mLive & (LIVE_CPU_REG_A | LIVE_MEM))) + { + changed = true; + + NativeCodeBasicBlock* cblock = proc->AllocateBlock(); + cblock->Close(mFalseJump, mTrueJump, mBranch); + + mIns[sz - 1].mAddress = mIns[sz - 2].mAddress; mIns[sz - 1].mLive |= LIVE_CPU_REG_Z; + mIns[sz - 2].mType = ASMIT_NOP; mIns[sz - 2].mMode = ASMIM_IMPLIED; + + if (mBranch == ASMIT_BCC) + { + mBranch = ASMIT_BNE; + mTrueJump = cblock; + } + else + { + mBranch = ASMIT_BEQ; + mFalseJump = cblock; + } + } + } + if (mIns.Size() >= 8 && mFalseJump && (mBranch == ASMIT_BNE || mBranch == ASMIT_BEQ)) { int sz = mIns.Size(); @@ -22325,6 +22392,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass #endif + #if 0 if (i + 13 < mIns.Size()) { @@ -22366,6 +22434,60 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } #endif + +#if 1 + if (i + 7 < mIns.Size() && pass > 0 && + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_STA && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].mAddress == mIns[i + 2].mAddress && + mIns[i + 3].mType == ASMIT_LDA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && + mIns[i + 4].mType == ASMIT_ADC && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && + mIns[i + 5].mType == ASMIT_STA && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && mIns[i + 5].mAddress == mIns[i + 2].mAddress + 1 && + mIns[i + 6].mType == ASMIT_LDY && mIns[i + 6].mMode == ASMIM_IMMEDIATE && mIns[i + 6].mAddress == 0 && + mIns[i + 7].mMode == ASMIM_INDIRECT_Y && mIns[i + 7].mAddress == mIns[i + 2].mAddress && + !(mIns[i + 7].mLive & LIVE_MEM)) + { + if (mIns[i + 7].mLive & LIVE_CPU_REG_Y) + mIns.Insert(i + 8, mIns[i + 6]); + + mIns[i + 1] = NativeCodeInstruction(ASMIT_TAY); + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + mIns[i + 3].mLive |= LIVE_CPU_REG_Y; + mIns[i + 4].mLive |= LIVE_CPU_REG_Y; + mIns[i + 5].mLive |= LIVE_CPU_REG_Y; + mIns[i + 6].mType = ASMIT_NOP; mIns[i + 6].mMode = ASMIM_IMPLIED; + progress = true; + } +#endif + +#if 1 + if (i + 8 < mIns.Size() && pass > 0 && + mIns[i + 0].mType == ASMIT_CLC && + mIns[i + 1].mType == ASMIT_LDA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_ZERO_PAGE && + mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 2].mAddress == mIns[i + 3].mAddress && + mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && + mIns[i + 5].mType == ASMIT_ADC && mIns[i + 5].mMode == ASMIM_ZERO_PAGE && + mIns[i + 6].mType == ASMIT_STA && mIns[i + 6].mMode == ASMIM_ZERO_PAGE && mIns[i + 6].mAddress == mIns[i + 3].mAddress + 1 && + mIns[i + 7].mType == ASMIT_LDY && mIns[i + 7].mMode == ASMIM_IMMEDIATE && mIns[i + 7].mAddress == 0 && + mIns[i + 8].mMode == ASMIM_INDIRECT_Y && mIns[i + 8].mAddress == mIns[i + 3].mAddress && + !(mIns[i + 8].mLive & LIVE_MEM)) + { + if (mIns[i + 8].mLive & LIVE_CPU_REG_Y) + mIns.Insert(i + 9, mIns[i + 7]); + + mIns[i + 1].mType = ASMIT_LDY; mIns[i + 1].mLive |= LIVE_CPU_REG_Y; + mIns[i + 2].mType = ASMIT_NOP; mIns[i + 2].mMode = ASMIM_IMPLIED; + mIns[i + 3].mType = ASMIT_NOP; mIns[i + 3].mMode = ASMIM_IMPLIED; + mIns[i + 4].mLive |= LIVE_CPU_REG_Y; + mIns[i + 5].mLive |= LIVE_CPU_REG_Y; + mIns[i + 6].mLive |= LIVE_CPU_REG_Y; + mIns[i + 7].mType = ASMIT_NOP; mIns[i + 7].mMode = ASMIM_IMPLIED; + progress = true; + } +#endif + CheckLive(); + #endif #if 1 if (pass > 1 && mIns[i].mMode == ASMIM_IMMEDIATE_ADDRESS && mIns[i].mLinkerObject && (mIns[i].mFlags & NCIF_LOWER) && !(mIns[i].mAddress & 0xff) && !(mIns[i].mLinkerObject->mAlignment & 0xff)) @@ -22382,6 +22504,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass } + if (progress) changed = true; @@ -24052,6 +24175,8 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode avalid = true; else if (iblock->mInstructions[i + 1]->mCode == IC_STORE && iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp) avalid = true; + else if (iblock->mInstructions[i + 1]->mCode == IC_COPY && (iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp || iblock->mInstructions[i + 1]->mSrc[0].mTemp == ins->mDst.mTemp)) + avalid = true; } #if 1 if (i + 1 < iblock->mInstructions.Size() && @@ -24071,6 +24196,26 @@ void NativeCodeProcedure::CompileInterBlock(InterCodeProcedure* iproc, InterCode block->StoreByteIndexedValue(iproc, ins, iblock->mInstructions[i + 1]); i++; } + else if (i + 2 < iblock->mInstructions.Size() && + ins->mSrc[1].mTemp < 0 && ins->mSrc[1].mMemory == IM_GLOBAL && ins->mSrc[1].mLinkerObject->mSize <= 256 && +// ins->mSrc[0].IsUByte() && + iblock->mInstructions[i + 1]->mCode == IC_LEA && iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[1].mFinal && +// iblock->mInstructions[i + 1]->mSrc[0].IsUByte() && + iblock->mInstructions[i + 2]->mCode == IC_LOAD && iblock->mInstructions[i + 2]->mSrc[0].mTemp == iblock->mInstructions[i + 1]->mDst.mTemp && iblock->mInstructions[i + 2]->mSrc[0].mFinal) + { + block->LoadStoreOpAbsolute2D(iproc, ins, iblock->mInstructions[i + 1], iblock->mInstructions[i + 2]); + i += 2; + } + else if (i + 2 < iblock->mInstructions.Size() && + ins->mSrc[1].mTemp < 0 && ins->mSrc[1].mMemory == IM_GLOBAL && ins->mSrc[1].mLinkerObject->mSize <= 256 && +// ins->mSrc[0].IsUByte() && + iblock->mInstructions[i + 1]->mCode == IC_LEA && iblock->mInstructions[i + 1]->mSrc[1].mTemp == ins->mDst.mTemp && iblock->mInstructions[i + 1]->mSrc[1].mFinal && +// iblock->mInstructions[i + 1]->mSrc[0].IsUByte() && + iblock->mInstructions[i + 2]->mCode == IC_STORE && iblock->mInstructions[i + 2]->mSrc[1].mTemp == iblock->mInstructions[i + 1]->mDst.mTemp && iblock->mInstructions[i + 2]->mSrc[1].mFinal) + { + block->LoadStoreOpAbsolute2D(iproc, ins, iblock->mInstructions[i + 1], iblock->mInstructions[i + 2]); + i += 2; + } else #endif { diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index fd06ae2..8f9cf5c 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -206,6 +206,8 @@ public: void UnaryOperator(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins); void RelationalOperator(InterCodeProcedure* proc, const InterInstruction * ins, NativeCodeProcedure * nproc, NativeCodeBasicBlock* trueJump, NativeCodeBasicBlock * falseJump); void LoadEffectiveAddress(InterCodeProcedure* proc, const InterInstruction * ins, const InterInstruction* sins1, const InterInstruction* sins0, bool addrvalid); + void LoadStoreOpAbsolute2D(InterCodeProcedure* proc, const InterInstruction* lins1, const InterInstruction* lins2, const InterInstruction* mins); + void NumericConversion(InterCodeProcedure* proc, NativeCodeProcedure* nproc, const InterInstruction * ins); NativeCodeBasicBlock * CopyValue(InterCodeProcedure* proc, const InterInstruction * ins, NativeCodeProcedure* nproc); NativeCodeBasicBlock * StrcpyValue(InterCodeProcedure* proc, const InterInstruction* ins, NativeCodeProcedure* nproc); diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index 13203d0..76b8fed 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -74,7 +74,7 @@ int main2(int argc, const char** argv) #else strcpy(strProductName, "oscar64"); - strcpy(strProductVersion, "1.7.127"); + strcpy(strProductVersion, "1.7.128"); #ifdef __APPLE__ uint32_t length = sizeof(basePath); diff --git a/oscar64/oscar64.rc b/oscar64/oscar64.rc index 23e238e..e9db041 100644 --- a/oscar64/oscar64.rc +++ b/oscar64/oscar64.rc @@ -25,8 +25,8 @@ LANGUAGE LANG_ENGLISH, SUBLANG_NEUTRAL // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,7,127,0 - PRODUCTVERSION 1,7,127,0 + FILEVERSION 1,7,128,0 + PRODUCTVERSION 1,7,128,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -43,12 +43,12 @@ BEGIN BEGIN VALUE "CompanyName", "oscar64" VALUE "FileDescription", "oscar64 compiler" - VALUE "FileVersion", "1.7.127.0" + VALUE "FileVersion", "1.7.128.0" VALUE "InternalName", "oscar64.exe" VALUE "LegalCopyright", "Copyright (C) 2021" VALUE "OriginalFilename", "oscar64.exe" VALUE "ProductName", "oscar64" - VALUE "ProductVersion", "1.7.127.0" + VALUE "ProductVersion", "1.7.128.0" END END BLOCK "VarFileInfo" diff --git a/oscar64setup/oscar64setup.vdproj b/oscar64setup/oscar64setup.vdproj index 8e13855..0aed7ef 100644 --- a/oscar64setup/oscar64setup.vdproj +++ b/oscar64setup/oscar64setup.vdproj @@ -4127,15 +4127,15 @@ { "Name" = "8:Microsoft Visual Studio" "ProductName" = "8:oscar64" - "ProductCode" = "8:{DF3630AB-F113-4BB0-8CDF-4EC257D6F55B}" - "PackageCode" = "8:{6ECF2FD0-FB99-4AF4-9F9D-D8B11EB77A29}" + "ProductCode" = "8:{A6AFEFCF-ACB6-4079-AE49-30A6B535F544}" + "PackageCode" = "8:{17D1E370-C1A3-48E9-8621-133A4A23437A}" "UpgradeCode" = "8:{9AB61EFF-ACAC-4079-9950-8D96615CD4EF}" "AspNetVersion" = "8:2.0.50727.0" "RestartWWWService" = "11:FALSE" "RemovePreviousVersions" = "11:TRUE" "DetectNewerInstalledVersion" = "11:TRUE" "InstallAllUsers" = "11:FALSE" - "ProductVersion" = "8:1.7.127" + "ProductVersion" = "8:1.7.128" "Manufacturer" = "8:oscar64" "ARPHELPTELEPHONE" = "8:" "ARPHELPLINK" = "8:"