From a2293a0ed15e4462b61f102bfa4a0d1783a1e2e7 Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Wed, 23 Feb 2022 18:48:31 +0100 Subject: [PATCH] Native code size reduction --- oscar64/InterCode.cpp | 3 +- oscar64/NativeCodeGenerator.cpp | 99 +++++++++++++++++++++++++-- oscar64/NativeCodeGenerator.h | 5 +- oscar64/oscar64.cpp | 2 +- oscar64/oscar64.rc | 8 +-- oscar64setup/oscar64setup.vdproj | 6 +- samples/games/hscrollshmup.c | 114 +++++++++++++++++-------------- samples/memmap/easyflash.crt | Bin 114976 -> 114976 bytes 8 files changed, 169 insertions(+), 68 deletions(-) diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index 16cb527..a1e80d1 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -8088,6 +8088,7 @@ void InterCodeBasicBlock::PeepholeOptimization(void) mInstructions[i + 2]->mSrc[1].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[1].mFinal && (mInstructions[i + 2]->mSrc[0].mIntConst & 1) == 0) { + int shift = mInstructions[i + 0]->mSrc[0].mIntConst; int mshift = 1; while (!(mInstructions[i + 2]->mSrc[0].mIntConst & (1ULL << mshift))) @@ -8110,7 +8111,7 @@ void InterCodeBasicBlock::PeepholeOptimization(void) { mInstructions[i + 0]->mCode = IC_LOAD_TEMPORARY; mInstructions[i + 0]->mSrc[0] = mInstructions[i + 0]->mSrc[1]; - mInstructions[i + 0]->mSrc[0].mTemp = -1; + mInstructions[i + 0]->mSrc[1].mTemp = -1; mInstructions[i + 1]->mSrc[1].mIntConst = 255ULL >> shift << shift; mInstructions[i + 2]->mSrc[0].mIntConst >>= shift; diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index adc3dde..ad84821 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -9492,7 +9492,64 @@ void NativeCodeBasicBlock::RemEntryBlock(NativeCodeBasicBlock* block) mEntryBlocks.Remove(i); } -bool NativeCodeBasicBlock::JoinTailCodeSequences(void) +NativeCodeBasicBlock * NativeCodeBasicBlock::SplitMatchingTails(NativeCodeProcedure* proc) +{ + NativeCodeBasicBlock* nblock = nullptr; + + for (int i = 0; i < mEntryBlocks.Size() - 1; i++) + { + NativeCodeBasicBlock* bi(mEntryBlocks[i]); + + if (bi->mBranch == ASMIT_JMP && bi->mIns.Size() > 1) + { + for (int j = i + 1; j < mEntryBlocks.Size(); j++) + { + NativeCodeBasicBlock* bj(mEntryBlocks[j]); + + if (bj->mBranch == ASMIT_JMP && bj->mIns.Size() > 1) + { + if (bi->mIns[bi->mIns.Size() - 1].IsSame(bj->mIns[bj->mIns.Size() - 1]) && + bi->mIns[bi->mIns.Size() - 2].IsSame(bj->mIns[bj->mIns.Size() - 2])) + { + if (!nblock) + { + nblock = proc->AllocateBlock(); + nblock->mBranch = ASMIT_JMP; + nblock->mVisited = false; + nblock->mTrueJump = this; + + nblock->mEntryBlocks.Push(bi); + bi->mTrueJump = nblock; + mEntryBlocks[i] = nullptr; + } + + nblock->mEntryBlocks.Push(bj); + bj->mTrueJump = nblock; + mEntryBlocks[j] = nullptr; + } + } + } + + if (nblock) + { + int i = 0; + while (i < mEntryBlocks.Size()) + { + if (mEntryBlocks[i]) + i++; + else + mEntryBlocks.Remove(i); + } + + return nblock; + } + } + } + + return nullptr; +} + +bool NativeCodeBasicBlock::JoinTailCodeSequences(NativeCodeProcedure* proc) { bool changed = false; @@ -9529,6 +9586,16 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(void) break; } } + + if (mEntryBlocks.Size() > 2) + { + NativeCodeBasicBlock* nblock = SplitMatchingTails(proc); + if (nblock) + { + if (nblock->JoinTailCodeSequences(proc)) + changed = true; + } + } } #endif #if 1 @@ -9630,9 +9697,9 @@ bool NativeCodeBasicBlock::JoinTailCodeSequences(void) } } #endif - if (mTrueJump && mTrueJump->JoinTailCodeSequences()) + if (mTrueJump && mTrueJump->JoinTailCodeSequences(proc)) changed = true; - if (mFalseJump && mFalseJump->JoinTailCodeSequences()) + if (mFalseJump && mFalseJump->JoinTailCodeSequences(proc)) changed = true; } @@ -16168,6 +16235,26 @@ NativeCodeBasicBlock* NativeCodeBasicBlock::BypassEmptyBlocks(void) } } +int NativeCodeBasicBlock::LeadsInto(NativeCodeBasicBlock* block, int dist) +{ + if (mPlaced) + return 6; + else if (mTrueJump == block || mFalseJump == block) + return dist; + else if (dist < 5) + { + int d0 = mTrueJump ? mTrueJump->LeadsInto(block, dist + 1) : 6; + int d1 = mFalseJump ? mFalseJump->LeadsInto(block, dist + 1) : 6; + + if (d0 < d1) + return d0; + else + return d1; + } + + return 6; +} + void NativeCodeBasicBlock::BuildPlacement(GrowingArray& placement) { if (!mPlaced) @@ -16190,12 +16277,12 @@ void NativeCodeBasicBlock::BuildPlacement(GrowingArray& p mTrueJump->BuildPlacement(placement); } - else if (mTrueJump->mFalseJump == mFalseJump || mTrueJump->mTrueJump == mFalseJump) + else if (mTrueJump->LeadsInto(mFalseJump, 0) < mFalseJump->LeadsInto(mTrueJump, 0)) { mTrueJump->BuildPlacement(placement); mFalseJump->BuildPlacement(placement); } - else if (mFalseJump->mFalseJump == mTrueJump || mFalseJump->mTrueJump == mTrueJump) + else if (mTrueJump->LeadsInto(mFalseJump, 0) > mFalseJump->LeadsInto(mTrueJump, 0)) { mFalseJump->BuildPlacement(placement); mTrueJump->BuildPlacement(placement); @@ -17136,7 +17223,7 @@ void NativeCodeProcedure::Optimize(void) if (step > 2) { ResetVisited(); - if (mEntryBlock->JoinTailCodeSequences()) + if (mEntryBlock->JoinTailCodeSequences(this)) changed = true; } #endif diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 698f1a7..8b40314 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -135,6 +135,7 @@ public: NativeCodeBasicBlock* BypassEmptyBlocks(void); + int LeadsInto(NativeCodeBasicBlock* block, int dist); void BuildPlacement(GrowingArray& placement); void InitialOffset(int& total); bool CalculateOffset(int& total); @@ -246,7 +247,9 @@ public: void AddEntryBlock(NativeCodeBasicBlock* block); void RemEntryBlock(NativeCodeBasicBlock* block); - bool JoinTailCodeSequences(void); + NativeCodeBasicBlock * SplitMatchingTails(NativeCodeProcedure* proc); + + bool JoinTailCodeSequences(NativeCodeProcedure* proc); bool SameTail(const NativeCodeInstruction& ins) const; NativeRegisterDataSet mEntryRegisterDataSet; diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index f66a1dc..2211454 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -73,7 +73,7 @@ int main2(int argc, const char** argv) #else strcpy(strProductName, "oscar64"); - strcpy(strProductVersion, "1.4.94"); + strcpy(strProductVersion, "1.4.95"); #ifdef __APPLE__ uint32_t length = sizeof(basePath); diff --git a/oscar64/oscar64.rc b/oscar64/oscar64.rc index 50a973a..ae25e2a 100644 --- a/oscar64/oscar64.rc +++ b/oscar64/oscar64.rc @@ -25,8 +25,8 @@ LANGUAGE LANG_ENGLISH, SUBLANG_NEUTRAL // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,4,94,0 - PRODUCTVERSION 1,4,94,0 + FILEVERSION 1,4,95,0 + PRODUCTVERSION 1,4,95,0 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -43,12 +43,12 @@ BEGIN BEGIN VALUE "CompanyName", "oscar64" VALUE "FileDescription", "oscar64 compiler" - VALUE "FileVersion", "1.4.94.0" + VALUE "FileVersion", "1.4.95.0" VALUE "InternalName", "oscar64.exe" VALUE "LegalCopyright", "Copyright (C) 2021" VALUE "OriginalFilename", "oscar64.exe" VALUE "ProductName", "oscar64" - VALUE "ProductVersion", "1.4.94.0" + VALUE "ProductVersion", "1.4.95.0" END END BLOCK "VarFileInfo" diff --git a/oscar64setup/oscar64setup.vdproj b/oscar64setup/oscar64setup.vdproj index 11dcc97..a487ed4 100644 --- a/oscar64setup/oscar64setup.vdproj +++ b/oscar64setup/oscar64setup.vdproj @@ -3752,15 +3752,15 @@ { "Name" = "8:Microsoft Visual Studio" "ProductName" = "8:oscar64" - "ProductCode" = "8:{DB788758-808B-4A56-B99C-F2BDBCD45FE3}" - "PackageCode" = "8:{9E6C1E34-6564-490E-B047-ED558001FF36}" + "ProductCode" = "8:{37C3F00F-2A58-4FAD-B5BA-5574475C7831}" + "PackageCode" = "8:{0F1773AB-6C73-4E53-B2D9-A794C417CED2}" "UpgradeCode" = "8:{9AB61EFF-ACAC-4079-9950-8D96615CD4EF}" "AspNetVersion" = "8:2.0.50727.0" "RestartWWWService" = "11:FALSE" "RemovePreviousVersions" = "11:TRUE" "DetectNewerInstalledVersion" = "11:TRUE" "InstallAllUsers" = "11:FALSE" - "ProductVersion" = "8:1.4.94" + "ProductVersion" = "8:1.4.95" "Manufacturer" = "8:oscar64" "ARPHELPTELEPHONE" = "8:" "ARPHELPLINK" = "8:" diff --git a/samples/games/hscrollshmup.c b/samples/games/hscrollshmup.c index b3e0c85..da1df1d 100644 --- a/samples/games/hscrollshmup.c +++ b/samples/games/hscrollshmup.c @@ -171,14 +171,15 @@ void tiles_draw1(char * dp, char * tm) struct Shot { - char x, y, dx, n; -} shots[5][4]; + byte ty, x, ry, n; + sbyte dx; +} shots[18]; + +Shot * firstShot; +Shot * lastShot; inline void shot_draw(char * dp, char i, char xp, char yp) { - __assume(i < 32); - __assume(yp < 8); - char c = dp[xp]; dp[xp] = i | 0xe0; @@ -189,7 +190,42 @@ inline void shot_draw(char * dp, char i, char xp, char yp) fdp[4] = fsp[4]; fdp[5] = fsp[5]; fdp[6] = fsp[6]; fdp[7] = fsp[7]; fdp[yp] = 0x00; +} +void shot_add(int dx, int sy) +{ + char py = sy - 6; + char gy = py >> 5; + char ey = (py >> 3) & 3; + char ry = py & 7; + + Shot * s = lastShot - 1; + while (s->ty > gy) + { + s[1] = s[0]; + s--; + } + s++; + + lastShot++; + lastShot->ty = 6; + + s->ty = gy; + s->ry = ry; + if (dx < 0) + { + s->dx = -1; + char x = (148 - 4 * dx) >> 3; + s->n = x - 1; + s->x = 40 * ey + x; + } + else + { + s->dx = 1; + char x = (156 - 4 * dx) >> 3; + s->x = 40 * ey + x; + s->n = 39 - x; + } } void tiles_draw(unsigned x) @@ -202,6 +238,8 @@ void tiles_draw(unsigned x) char yl = 0; char ci = 0; + Shot * ss = firstShot, * ts = firstShot; + for(int iy=0; iy<5; iy++) { char * dp = Screen + 80 + 160 * iy; @@ -262,22 +300,26 @@ void tiles_draw(unsigned x) dp[k] = 0xf8; } - - Shot * s = shots[iy]; - for(char si=0; si<4; si++) + while (ss->ty == iy) { - if (s->n) - { - s->x += s->dx; - s->n--; - shot_draw(dp, ci++, s->x, s->y); + ss->x += ss->dx; + ss->n--; + shot_draw(dp, ci++, ss->x, ss->ry); + if (ss->n) + { + if (ss != ts) + *ts = *ss; + ts++; } - s++; + ss++; } yl += 4; } + lastShot = ts; + lastShot->ty = 6; + Font[248 * 8 + 2] = ~(1 << xs); vic.ctrl2 = VIC_CTRL2_MCM + xs; @@ -321,14 +363,10 @@ int main(void) for(int i=0; i<24; i++) stars[i] = rand() % 40 + 40 * (i & 3); - for(int i=0; i<5; i++) - { - for(int j=0; j<8; j++) - { - shots[i][j].x = rand() % 160; - shots[i][j].y = rand() & 7; - } - } + shots[0].ty = 0; + firstShot = shots + 1; + lastShot = firstShot; + lastShot->ty = 6; spr_set(0, true, 160, 100, 64, VCOL_BLUE, true, false, false); spr_set(1, true, 160, 100, 64 + 16, VCOL_MED_GREY, true, false, false); @@ -394,36 +432,8 @@ int main(void) fdelay--; else if (joyb[0] && vpx != 0) { - char py = spy - 6; - char gy = py >> 5; - char ey = (py >> 3) & 3; - char ry = py & 7; - - Shot * s = shots[gy]; - - char i = 0; - while (i < 4 && s[i].n != 0) - i++; - - if (i < 4) - { - s[i].y = ry; - if (vpx < 0) - { - s[i].dx = -1; - char x = (148 - 4 * vpx) >> 3; - s[i].n = x - 1; - s[i].x = 40 * ey + x; - } - else if (vpx > 0) - { - s[i].dx = 1; - char x = (156 - 4 * vpx) >> 3; - s[i].x = 40 * ey + x; - s[i].n = 39 - x; - } - fdelay = 4; - } + shot_add(vpx, spy); + fdelay = 5; } spr_move(0, 172 - 4 * vpx, 50 + spy); diff --git a/samples/memmap/easyflash.crt b/samples/memmap/easyflash.crt index 9f6db6336d42c7615285fb4100feee2faf9e9062..e23be2fdeee17ba6a32206ec2534dd017da9dfbd 100644 GIT binary patch delta 454 zcmZ3`#J-@3eL}NRPOIcfo>nP~l{_zbKJc$v$*_Q7qU4DSzn@8+y70|nW%W(xJr|!uCqSS3?VPqsu Pp^77s3SAl*@7MzX@r|wl delta 468 zcmZ3`#J-@3eL}NRO{?Teo>nP~l{_zbKJc$v$*_Q7qU4DSzn@8+y70|nW&6vTiA#4f z)=Z9P)Dy35KT_$L`ZaIK7n@gU+7K368^GAXk;@w77Sbe=qoDT;CL6fFnK1k1>?ramzY&Jop>Zt`N69HGn)&P@VNRdah~Y0RCuDxGiRUYligVK8SOWh zv7D0L9H3yNpoDPYN{15@l~*=^+?v6?66Ogmp3O@f{@D|w?nnc1>Yf}RO5JuAMn>Wk OsyGs<(4~>_jy(X1IkNZw