From 57c72b17880de07c51e9cb597e0c1465cdf1c60f Mon Sep 17 00:00:00 2001 From: drmortalwombat <90205530+drmortalwombat@users.noreply.github.com> Date: Mon, 27 Dec 2021 10:49:18 +0100 Subject: [PATCH] Reduce y register pressure if x available --- autotest/autotest.bat | 188 ++++++++++++------------ oscar64/Compiler.cpp | 5 +- oscar64/Compiler.h | 2 +- oscar64/Emulator.cpp | 6 +- oscar64/Emulator.h | 1 + oscar64/NativeCodeGenerator.cpp | 249 ++++++++++++++++++++++++++++++-- oscar64/NativeCodeGenerator.h | 3 +- oscar64/Parser.cpp | 2 +- oscar64/Scanner.cpp | 1 + oscar64/oscar64.cpp | 6 +- samples/scrolling/bigfont.c | 115 +++++++++++++++ samples/scrolling/grid2d.c | 238 ++++++++++++++++++++++++++++++ samples/scrolling/make.bat | 3 + samples/scrolling/tunnel.c | 218 ++++++++++++++++++++++++++++ 14 files changed, 927 insertions(+), 110 deletions(-) create mode 100644 samples/scrolling/bigfont.c create mode 100644 samples/scrolling/grid2d.c create mode 100644 samples/scrolling/make.bat create mode 100644 samples/scrolling/tunnel.c diff --git a/autotest/autotest.bat b/autotest/autotest.bat index 0215a3d..26a9282 100644 --- a/autotest/autotest.bat +++ b/autotest/autotest.bat @@ -1,123 +1,123 @@ -@echo off +rem @echo off -call :test stdlibtest.c -if %errorlevel% neq 0 goto :error +@call :test stdlibtest.c +@if %errorlevel% neq 0 goto :error -call :test testint16.c -if %errorlevel% neq 0 goto :error +@call :test testint16.c +@if %errorlevel% neq 0 goto :error -call :test testint32.c -if %errorlevel% neq 0 goto :error +@call :test testint32.c +@if %errorlevel% neq 0 goto :error -call :test testint16mul.c -if %errorlevel% neq 0 goto :error +@call :test testint16mul.c +@if %errorlevel% neq 0 goto :error -call :test recursiontest.c -if %errorlevel% neq 0 goto :error +@call :test recursiontest.c +@if %errorlevel% neq 0 goto :error -call :test fastcalltest.c -if %errorlevel% neq 0 goto :error +@call :test fastcalltest.c +@if %errorlevel% neq 0 goto :error -call :test strcmptest.c -if %errorlevel% neq 0 goto :error +@call :test strcmptest.c +@if %errorlevel% neq 0 goto :error -call :test strcmptest2.c -if %errorlevel% neq 0 goto :error +@call :test strcmptest2.c +@if %errorlevel% neq 0 goto :error -call :test arraytest.c -if %errorlevel% neq 0 goto :error +@call :test arraytest.c +@if %errorlevel% neq 0 goto :error -call :test arraytestfloat.c -if %errorlevel% neq 0 goto :error +@call :test arraytestfloat.c +@if %errorlevel% neq 0 goto :error -call :test optiontest.c -if %errorlevel% neq 0 goto :error +@call :test optiontest.c +@if %errorlevel% neq 0 goto :error -call :test floatcmptest.c -if %errorlevel% neq 0 goto :error +@call :test floatcmptest.c +@if %errorlevel% neq 0 goto :error -call :test floatmultest.c -if %errorlevel% neq 0 goto :error +@call :test floatmultest.c +@if %errorlevel% neq 0 goto :error -call :test staticconsttest.c -if %errorlevel% neq 0 goto :error +@call :test staticconsttest.c +@if %errorlevel% neq 0 goto :error -call :test arrayinittest.c -if %errorlevel% neq 0 goto :error +@call :test arrayinittest.c +@if %errorlevel% neq 0 goto :error -call :test array2stringinittest.c -if %errorlevel% neq 0 goto :error +@call :test array2stringinittest.c +@if %errorlevel% neq 0 goto :error -call :test testint16cmp.c -if %errorlevel% neq 0 goto :error +@call :test testint16cmp.c +@if %errorlevel% neq 0 goto :error -call :test testint8cmp.c -if %errorlevel% neq 0 goto :error +@call :test testint8cmp.c +@if %errorlevel% neq 0 goto :error -call :test testint32cmp.c -if %errorlevel% neq 0 goto :error +@call :test testint32cmp.c +@if %errorlevel% neq 0 goto :error -call :test testinterval.c -if %errorlevel% neq 0 goto :error +@call :test testinterval.c +@if %errorlevel% neq 0 goto :error -call :test floatstringtest.c -if %errorlevel% neq 0 goto :error +@call :test floatstringtest.c +@if %errorlevel% neq 0 goto :error -call :test qsorttest.c -if %errorlevel% neq 0 goto :error +@call :test qsorttest.c +@if %errorlevel% neq 0 goto :error -call :test loopdomtest.c -if %errorlevel% neq 0 goto :error +@call :test loopdomtest.c +@if %errorlevel% neq 0 goto :error -call :test loopboundtest.c -if %errorlevel% neq 0 goto :error +@call :test loopboundtest.c +@if %errorlevel% neq 0 goto :error -call :test byteindextest.c -if %errorlevel% neq 0 goto :error +@call :test byteindextest.c +@if %errorlevel% neq 0 goto :error -call :test asmtest.c -if %errorlevel% neq 0 goto :error +@call :test asmtest.c +@if %errorlevel% neq 0 goto :error -call :testb bitshifttest.c -if %errorlevel% neq 0 goto :error +@call :testb bitshifttest.c +@if %errorlevel% neq 0 goto :error -call :test arrparam.c -if %errorlevel% neq 0 goto :error +@call :test arrparam.c +@if %errorlevel% neq 0 goto :error -call :test bsstest.c -if %errorlevel% neq 0 goto :error +@call :test bsstest.c +@if %errorlevel% neq 0 goto :error -call :test copyintvec.c -if %errorlevel% neq 0 goto :error +@call :test copyintvec.c +@if %errorlevel% neq 0 goto :error -call :test divmodtest.c -if %errorlevel% neq 0 goto :error +@call :test divmodtest.c +@if %errorlevel% neq 0 goto :error -call :test enumswitch.c -if %errorlevel% neq 0 goto :error +@call :test enumswitch.c +@if %errorlevel% neq 0 goto :error -call :test incvector.c -if %errorlevel% neq 0 goto :error +@call :test incvector.c +@if %errorlevel% neq 0 goto :error -call :test structoffsettest2.c -if %errorlevel% neq 0 goto :error +@call :test structoffsettest2.c +@if %errorlevel% neq 0 goto :error -call :test funcvartest.c -if %errorlevel% neq 0 goto :error +@call :test funcvartest.c +@if %errorlevel% neq 0 goto :error -call :test structassigntest.c -if %errorlevel% neq 0 goto :error +@call :test structassigntest.c +@if %errorlevel% neq 0 goto :error -call :test structmembertest.c -if %errorlevel% neq 0 goto :error +@call :test structmembertest.c +@if %errorlevel% neq 0 goto :error -call :test randsumtest.c -if %errorlevel% neq 0 goto :error +@call :test randsumtest.c +@if %errorlevel% neq 0 goto :error -call :test longcodetest.c -if %errorlevel% neq 0 goto :error +@call :test longcodetest.c +@if %errorlevel% neq 0 goto :error -exit /b 0 +@exit /b 0 :error echo Failed with error #%errorlevel%. @@ -125,42 +125,42 @@ exit /b %errorlevel% :test ..\release\oscar64 -e %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -n %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O2 %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O2 -n %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O0 %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O0 -n %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O3 %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O3 -n %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error -exit /b 0 +@exit /b 0 :testb ..\release\oscar64 -e %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O2 %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O0 %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error ..\release\oscar64 -e -O3 %~1 -if %errorlevel% neq 0 goto :error +@if %errorlevel% neq 0 goto :error -exit /b 0 +@exit /b 0 diff --git a/oscar64/Compiler.cpp b/oscar64/Compiler.cpp index 4b53142..2483e58 100644 --- a/oscar64/Compiler.cpp +++ b/oscar64/Compiler.cpp @@ -404,7 +404,7 @@ bool Compiler::WriteOutputFile(const char* targetPath) return true; } -int Compiler::ExecuteCode(void) +int Compiler::ExecuteCode(bool profile) { Location loc; @@ -427,6 +427,9 @@ int Compiler::ExecuteCode(void) printf("Emulation result %d\n", ecode); + if (profile) + emu->DumpProfile(); + if (ecode != 0) { char sd[20]; diff --git a/oscar64/Compiler.h b/oscar64/Compiler.h index cb337ee..7001d6f 100644 --- a/oscar64/Compiler.h +++ b/oscar64/Compiler.h @@ -41,7 +41,7 @@ public: bool ParseSource(void); bool GenerateCode(void); bool WriteOutputFile(const char* targetPath); - int ExecuteCode(void); + int ExecuteCode(bool profile); void AddDefine(const Ident* ident, const char* value); diff --git a/oscar64/Emulator.cpp b/oscar64/Emulator.cpp index 2b27553..187b134 100644 --- a/oscar64/Emulator.cpp +++ b/oscar64/Emulator.cpp @@ -491,6 +491,11 @@ bool Emulator::EmulateInstruction(AsmInsType type, AsmInsMode mode, int addr, in return true; } +void Emulator::DumpProfile(void) +{ + DumpCycles(); +} + int Emulator::Emulate(int startIP) { int trace = 0; @@ -683,7 +688,6 @@ int Emulator::Emulate(int startIP) if (mMemory[i] != 0) printf("ZP %02x : %02x\n", i, mMemory[i]); #endif - DumpCycles(); return int16(mMemory[BC_REG_ACCU] + 256 * mMemory[BC_REG_ACCU + 1]); } diff --git a/oscar64/Emulator.h b/oscar64/Emulator.h index ae57920..a0fd4de 100644 --- a/oscar64/Emulator.h +++ b/oscar64/Emulator.h @@ -20,6 +20,7 @@ public: Linker* mLinker; int Emulate(int startIP); + void DumpProfile(void); bool EmulateInstruction(AsmInsType type, AsmInsMode mode, int addr, int & cycles); protected: void UpdateStatus(uint8 result); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index f21e27a..a93e50e 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -8183,6 +8183,141 @@ void NativeCodeBasicBlock::GlobalRegisterYMap(int reg) } } +bool NativeCodeBasicBlock::ReduceLocalYPressure(void) +{ + bool changed = false; + + if (!mVisited) + { + mVisited = true; + + int loadYRegs[256]; + + int start = 0; + + while (start < mIns.Size()) + { + while (start < mIns.Size() && (mIns[start].mLive & LIVE_CPU_REG_X)) + start++; + + if (start < mIns.Size()) + { + int end = start + 1; + + for (int i = 0; i < 256; i++) + loadYRegs[i] = 0; + + int yreg = -1, areg = -1; + while (end < mIns.Size() && !mIns[end].ChangesXReg()) + { + const NativeCodeInstruction& ins(mIns[end]); + + if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_ZERO_PAGE) + { + yreg = ins.mAddress; + if (loadYRegs[yreg] >= 0) + loadYRegs[yreg]++; + } + else if (ins.mType == ASMIT_TAY) + { + yreg = areg; + if (yreg > 0 && loadYRegs[yreg] >= 0) + loadYRegs[yreg]++; + } + else if (ins.mType == ASMIT_STA && ins.mMode == ASMIM_ZERO_PAGE) + { + areg = ins.mAddress; + } + else if (ins.mMode == ASMIM_INDIRECT_Y) + { + if (yreg != -1) + loadYRegs[yreg] = -1; + } + else if (ins.ChangesYReg()) + yreg = -1; + else if (ins.ChangesAccu()) + areg = -1; + + end++; + } + + if (end > start) + { + int maxy = 0, maxr = 0; + for (int i = 1; i < 256; i++) + { + if (loadYRegs[i] > maxy) + { + maxy = loadYRegs[i]; + maxr = i; + } + } + + if (maxy > 1) + { + bool active = false; + int aactive = false; + for (int i = start; i < end; i++) + { + NativeCodeInstruction& ins(mIns[i]); + if (ins.mType == ASMIT_LDY && ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == maxr) + { + ins.mType = ASMIT_LDX; + active = true; + aactive = false; + } + else if ((ins.mType == ASMIT_LDA || ins.mType == ASMIT_STA) && ins.mMode == ASMIM_ZERO_PAGE && ins.mAddress == maxr) + { + aactive = true; + } + else if (active) + { + aactive = false; + if (ins.mType == ASMIT_INY) + ins.mType = ASMIT_INX; + else if (ins.mType == ASMIT_DEY) + ins.mType = ASMIT_DEX; + else if (ins.mType == ASMIT_CPY) + ins.mType = ASMIT_CPX; + else if (ins.mType == ASMIT_TYA) + ins.mType = ASMIT_TXA; + else if (ins.mType == ASMIT_STY) + ins.mType = ASMIT_STX; + else if (ins.mMode == ASMIM_ABSOLUTE_Y) + ins.mMode = ASMIM_ABSOLUTE_X; + else if (ins.mType == ASMIT_LDY || ins.mType == ASMIT_TAY) + active = false; + } + else if (aactive) + { + if (ins.mType == ASMIT_TAY) + { + ins.mType = ASMIT_TAX; + active = true; + } + else if (ins.ChangesAccu()) + aactive = false; + } + } + + changed = true; + } + } + + start = end; + } + } + + if (mTrueJump && mTrueJump->ReduceLocalYPressure()) + changed = true; + + if (mFalseJump && mFalseJump->ReduceLocalYPressure()) + changed = true; + } + + return changed; +} + bool NativeCodeBasicBlock::LocalRegisterXYMap(void) { bool changed = false; @@ -8678,7 +8813,7 @@ bool NativeCodeBasicBlock::FindGlobalAddress(int at, int reg, int& apos) return false; } -bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, int& apos, const NativeCodeInstruction*& ains, const NativeCodeInstruction*& iins, uint32& flags) +bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, int& apos, const NativeCodeInstruction*& ains, const NativeCodeInstruction*& iins, uint32& flags, int& addr) { flags = 0; @@ -8812,6 +8947,38 @@ bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, i iins = &(mIns[j + 1]); apos = j + 0; + addr = mIns[j + 2].mAddress + 256 * (mIns[j + 4].mAddress + mIns[j + 5].mAddress); + + int ireg = iins->mAddress; + if (reg == ireg && !direct) + return false; + + int k = j + 7; + while (k < at) + { + if (mIns[k].mMode == ASMIM_ZERO_PAGE && mIns[k].mAddress == ireg && mIns[k].ChangesAddress()) + return false; + if (reg == ireg && mIns[k].ChangesYReg()) + return false; + k++; + } + + return true; + } + else if (mIns[j + 0].mType == ASMIT_CLC && + mIns[j + 1].mType == ASMIT_LDA && mIns[j + 1].mMode == ASMIM_IMMEDIATE && + mIns[j + 2].mType == ASMIT_ADC && mIns[j + 2].mMode == ASMIM_ZERO_PAGE && + mIns[j + 3].mType == ASMIT_STA && mIns[j + 3].mMode == ASMIM_ZERO_PAGE && mIns[j + 3].mAddress == reg && + mIns[j + 4].mType == ASMIT_LDA && mIns[j + 4].mMode == ASMIM_IMMEDIATE && + mIns[j + 5].mType == ASMIT_ADC && mIns[j + 5].mMode == ASMIM_IMMEDIATE && + mIns[j + 6].mType == ASMIT_STA && mIns[j + 6].mMode == ASMIM_ZERO_PAGE && mIns[j + 6].mAddress == reg + 1) + { + ains = &(mIns[j + 1]); + iins = &(mIns[j + 2]); + apos = j + 0; + + addr = mIns[j + 1].mAddress + 256 * (mIns[j + 4].mAddress + mIns[j + 5].mAddress); + int ireg = iins->mAddress; if (reg == ireg && !direct) return false; @@ -8840,6 +9007,8 @@ bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, i iins = nullptr; apos = j + 1; + addr = mIns[j + 2].mAddress + 256 * (mIns[j + 4].mAddress + mIns[j + 5].mAddress); + if (!direct) return false; @@ -8910,7 +9079,7 @@ bool NativeCodeBasicBlock::FindGlobalAddressSumY(int at, int reg, bool direct, i return false; j--; } - if (mFromJump->FindGlobalAddressSumY(mFromJump->mIns.Size(), reg, false, apos, ains, iins, flags)) + if (mFromJump->FindGlobalAddressSumY(mFromJump->mIns.Size(), reg, false, apos, ains, iins, flags, addr)) { if (iins->mMode == ASMIM_ZERO_PAGE) { @@ -9028,6 +9197,12 @@ bool NativeCodeBasicBlock::PatchAddressSumY(int at, int reg, int apos, int breg, if (last == mIns.Size()) return false; + if (mIns[last].mLive & LIVE_CPU_REG_Y) + { + mIns.Insert(last + 1, NativeCodeInstruction(ASMIT_LDY, ASMIM_IMMEDIATE, yindex)); + mIns[last + 1].mLive |= CPU_REG_Y; + } + for (int i = 0; i < 6; i++) { mIns[apos + i + 1].mType = ASMIT_NOP; @@ -11168,7 +11343,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) int sreg = mIns[i + 0].mAddress; - int apos, breg, ireg; + int apos, breg, ireg, addr; uint32 flags; if (FindAddressSumY(i, sreg, apos, breg, ireg)) @@ -11196,7 +11371,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) #endif } - else if (FindGlobalAddressSumY(i, sreg, true, apos, ains, iins, flags)) + else if (FindGlobalAddressSumY(i, sreg, true, apos, ains, iins, flags, addr)) { if (iins || (flags & LIVE_CPU_REG_Y) || (flags & LIVE_CPU_REG_X)) //!(mIns[i + 1].mLive & LIVE_CPU_REG_X)) { @@ -11219,8 +11394,18 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) { mIns[i + 0].mMode = ASMIM_ABSOLUTE_X; } - mIns[i + 0].mLinkerObject = ains->mLinkerObject; - mIns[i + 0].mAddress = ains->mAddress; + + if (ains->mMode == ASMIM_IMMEDIATE) + { + mIns[i + 0].mLinkerObject = nullptr; + mIns[i + 0].mAddress = addr; + } + else + { + mIns[i + 0].mLinkerObject = ains->mLinkerObject; + mIns[i + 0].mAddress = ains->mAddress; + } + mIns[i + 0].mFlags &= ~NCIF_YZERO; if (!iins) @@ -11564,10 +11749,10 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) const NativeCodeInstruction* ains, *iins; int sreg = mIns[i + 1].mAddress; - int apos; + int apos, addr; uint32 flags; - if (FindGlobalAddressSumY(i, sreg, true, apos, ains, iins, flags)) + if (FindGlobalAddressSumY(i, sreg, true, apos, ains, iins, flags, addr)) { if (iins || (flags & LIVE_CPU_REG_Y) || (flags & LIVE_CPU_REG_X)) //!(mIns[i + 1].mLive & LIVE_CPU_REG_X)) { @@ -11594,7 +11779,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) if (ains->mMode == ASMIM_IMMEDIATE) { mIns[i + 1].mLinkerObject = 0; - mIns[i + 1].mAddress = ains[0].mAddress + 256 * (ains[2].mAddress + ains[3].mAddress); + mIns[i + 1].mAddress = addr; } else { @@ -12154,6 +12339,15 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) progress = true; } #endif + else if ( + mIns[i + 0].mType == ASMIT_TAX && + mIns[i + 1].mType == ASMIT_TAY && + mIns[i + 2].mMode == ASMIM_ABSOLUTE_Y && (mIns[i + 2].mLive & LIVE_CPU_REG_X) && !(mIns[i + 2].mLive & LIVE_CPU_REG_Y)) + { + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + mIns[i + 2].mMode = ASMIM_ABSOLUTE_X; + progress = true; + } #if 1 if ( mIns[i + 0].mType == ASMIT_LDY && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress <= 1 && @@ -12411,6 +12605,39 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(int pass) progress = true; } #endif + else if ( + mIns[i + 0].mType == ASMIT_TYA && + mIns[i + 1].mType == ASMIT_CLC && + mIns[i + 2].mType == ASMIT_ADC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress <= 2 && + mIns[i + 3].mType == ASMIT_TAY && !(mIns[i + 3].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_INY; mIns[i + 2].mMode = ASMIM_IMPLIED; + if (mIns[i + 2].mAddress == 2) + mIns[i + 3].mType = ASMIT_INY; + else + mIns[i + 3].mType = ASMIT_NOP; + mIns[i + 3].mMode = ASMIM_IMPLIED; + progress = true; + } + else if ( + mIns[i + 0].mType == ASMIT_TYA && + mIns[i + 1].mType == ASMIT_SEC && + mIns[i + 2].mType == ASMIT_SBC && mIns[i + 2].mMode == ASMIM_IMMEDIATE && mIns[i + 2].mAddress <= 2 && + mIns[i + 3].mType == ASMIT_TAY && !(mIns[i + 3].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_C))) + { + mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED; + mIns[i + 2].mType = ASMIT_DEY; mIns[i + 2].mMode = ASMIM_IMPLIED; + if (mIns[i + 2].mAddress == 2) + mIns[i + 3].mType = ASMIT_DEY; + else + mIns[i + 3].mType = ASMIT_NOP; + mIns[i + 3].mMode = ASMIM_IMPLIED; + progress = true; + } + else if ( mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_ZERO_PAGE && mIns[i + 1].IsShift() && mIns[i + 1].mMode == ASMIM_IMPLIED && @@ -13601,6 +13828,10 @@ void NativeCodeProcedure::Optimize(void) { ResetVisited(); changed = mEntryBlock->OptimizeInnerLoops(this); + + ResetVisited(); + if (mEntryBlock->ReduceLocalYPressure()) + changed = true; } #if 1 else if (step == 4) diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h index 6abe0d1..24ca0dd 100644 --- a/oscar64/NativeCodeGenerator.h +++ b/oscar64/NativeCodeGenerator.h @@ -204,7 +204,7 @@ public: bool FindAddressSumY(int at, int reg, int & apos, int& breg, int& ireg); bool PatchAddressSumY(int at, int reg, int apos, int breg, int ireg); bool FindGlobalAddress(int at, int reg, int& apos); - bool FindGlobalAddressSumY(int at, int reg, bool direct, int& apos, const NativeCodeInstruction * & ains, const NativeCodeInstruction*& iins, uint32 & flags); + bool FindGlobalAddressSumY(int at, int reg, bool direct, int& apos, const NativeCodeInstruction * & ains, const NativeCodeInstruction*& iins, uint32 & flags, int & addr); bool FindPageStartAddress(int at, int reg, int& addr); bool MoveStoreXUp(int at); bool MoveStoreHighByteDown(int at); @@ -233,6 +233,7 @@ public: void GlobalRegisterXMap(int reg); void GlobalRegisterYMap(int reg); bool LocalRegisterXYMap(void); + bool ReduceLocalYPressure(void); }; class NativeCodeProcedure diff --git a/oscar64/Parser.cpp b/oscar64/Parser.cpp index c0708d9..46f6166 100644 --- a/oscar64/Parser.cpp +++ b/oscar64/Parser.cpp @@ -1135,7 +1135,7 @@ Expression* Parser::ParseSimpleExpression(void) while (mScanner->mToken == TK_STRING) { int s = strlen(mScanner->mTokenString); - uint8* d = new uint8[dec->mSize + s]; + uint8* d = new uint8[dec->mSize + s + 1]; memcpy(d, dec->mData, dec->mSize - 1); int i = 0; while (mScanner->mTokenString[i]) diff --git a/oscar64/Scanner.cpp b/oscar64/Scanner.cpp index 37a555c..95b3d45 100644 --- a/oscar64/Scanner.cpp +++ b/oscar64/Scanner.cpp @@ -593,6 +593,7 @@ void Scanner::NextToken(void) mPreprocessor->PushSource(); mPreprocessor->NextLine(); mOffset = 0; + NextChar(); } mPreprocessorMode = false; } diff --git a/oscar64/oscar64.cpp b/oscar64/oscar64.cpp index e202c12..a68600d 100644 --- a/oscar64/oscar64.cpp +++ b/oscar64/oscar64.cpp @@ -110,7 +110,7 @@ int main(int argc, const char** argv) strcpy_s(crtPath, includePath); strcat_s(crtPath, "crt.c"); - bool emulate = false; + bool emulate = false, profile = false; targetPath[0] = 0; @@ -159,6 +159,8 @@ int main(int argc, const char** argv) else if (arg[1] == 'e') { emulate = true; + if (arg[2] == 'p') + profile = true; } else if (arg[1] == 'd') { @@ -217,7 +219,7 @@ int main(int argc, const char** argv) compiler->WriteOutputFile(targetPath); if (emulate) - compiler->ExecuteCode(); + compiler->ExecuteCode(profile); } } diff --git a/samples/scrolling/bigfont.c b/samples/scrolling/bigfont.c new file mode 100644 index 0000000..0bede60 --- /dev/null +++ b/samples/scrolling/bigfont.c @@ -0,0 +1,115 @@ +#include +#include +#include + +byte font[2048]; + +void copyFont(void) +{ + mmap_set(MMAP_CHAR_ROM); + + memcpy(font, (byte *)0xd800, 2048); + + mmap_set(MMAP_ROM); +} + +#define screen ((byte *)0x0400) +#define color ((byte *)0xd800) +#define sline(x, y) (screen + 40 * (y) + (x)) +#define srow 5 + +void scrollLeft(void) +{ + for(char x=0; x<39; x++) + { +#assign y 0 +#repeat + sline(0, srow + y)[x] = sline(1, srow + y)[x]; +#assign y y + 1 +#until y == 16 + } +} + +void expand(char c, byte f) +{ + byte * fp = font + 8 * c; + +#assign y 0 +#repeat + sline(39, srow + 2 * y + 0)[0] = + sline(39, srow + 2 * y + 1)[0] = (fp[y] & f) ? 160 : 32; +#assign y y + 1 +#until y == 8 + +} + +const char * text = + s"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt " + s"ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo " + s"dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit " + s"amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor " + s"invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam " + s"et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet." + +inline void waitBottom(void) +{ + while (!(vic.ctrl1 & VIC_CTRL1_RST8)) + ; +} + +inline void waitTop(void) +{ + while ((vic.ctrl1 & VIC_CTRL1_RST8)) + ; +} + +int main(void) +{ + mmap_trampoline(); + + copyFont(); + + memset(screen, 0x20, 1000); + + for(int i=0; i<16; i++) + memset(color + 40 * (srow + i), i + 1, 40); + + vic.color_back = VCOL_BLACK; + vic.color_border = VCOL_BLACK; + + vic.ctrl2 = 0; + + int ci = 0; + for(;;) + { + byte cf = 0x80; + while (cf) + { + for(char i=0; i<2; i++) + { + waitBottom(); + vic.ctrl2 = 4; + waitTop(); + + waitBottom(); + vic.ctrl2 = 2; + waitTop(); + + waitBottom(); + vic.ctrl2 = 0; + waitTop(); + + waitBottom(); + vic.ctrl2 = 6; + + scrollLeft(); + expand(text[ci], cf); + } + + cf >>= 1; + } + ci++; + } + + return 0; +} diff --git a/samples/scrolling/grid2d.c b/samples/scrolling/grid2d.c new file mode 100644 index 0000000..42a6814 --- /dev/null +++ b/samples/scrolling/grid2d.c @@ -0,0 +1,238 @@ +#include +#include +#include +#include +#include + +#define screen ((byte *)0x0400) +#define color ((byte *)0xd800) +#define sline(x, y) (screen + 40 * (y) + (x)) + +static const char quad[4][4 * 4] = +{ + { + 0x20, 0x55, 0x6c, 0x4e, + 0x20, 0x5d, 0xe1, 0x65, + 0x20, 0x5d, 0xe1, 0x65, + 0x20, 0x4a, 0x7c, 0x4d, + }, + { + 0x20, 0x40, 0x62, 0x77, + 0x20, 0x20, 0xa0, 0x20, + 0x20, 0x20, 0xa0, 0x20, + 0x20, 0x40, 0xe2, 0x6f, + }, + { + 0x20, 0x40, 0x62, 0x77, + 0x20, 0x20, 0xa0, 0x20, + 0x20, 0x20, 0xa0, 0x20, + 0x20, 0x40, 0xe2, 0x6f, + }, + { + 0x20, 0x49, 0x7b, 0x4d, + 0x20, 0x5d, 0x61, 0x6a, + 0x20, 0x5d, 0x61, 0x6a, + 0x20, 0x4b, 0x7e, 0x4e, + } +}; + +#pragma align(quad, 256) + +void expandrow0(char * dp, const char * grid, char ly) +{ + char gi; +#assign gx 0 +#repeat + gi = grid[gx] | ly; + dp[4 * gx + 0] = quad[0][gi]; + dp[4 * gx + 1] = quad[1][gi]; + dp[4 * gx + 2] = quad[2][gi]; + dp[4 * gx + 3] = quad[3][gi]; +#assign gx gx + 1 +#until gx == 10 +} + +void expandrow1(char * dp, const char * grid, char ly) +{ + char gi; + gi = grid[0] | ly; + dp[0] = quad[1][gi]; + dp[1] = quad[2][gi]; + dp[2] = quad[3][gi]; +#assign gx 0 +#repeat + gi = grid[gx + 1] | ly; + dp[4 * gx + 3] = quad[0][gi]; + dp[4 * gx + 4] = quad[1][gi]; + dp[4 * gx + 5] = quad[2][gi]; + dp[4 * gx + 6] = quad[3][gi]; +#assign gx gx + 1 +#until gx == 9 + gi = grid[10] | ly; + dp[39] = quad[0][gi]; +} + +void expandrow2(char * dp, const char * grid, char ly) +{ + char gi; + gi = grid[0] | ly; + dp[0] = quad[2][gi]; + dp[1] = quad[3][gi]; +#assign gx 0 +#repeat + gi = grid[gx + 1] | ly; + dp[4 * gx + 2] = quad[0][gi]; + dp[4 * gx + 3] = quad[1][gi]; + dp[4 * gx + 4] = quad[2][gi]; + dp[4 * gx + 5] = quad[3][gi]; +#assign gx gx + 1 +#until gx == 9 + gi = grid[10] | ly; + dp[38] = quad[0][gi]; + dp[39] = quad[1][gi]; +} + +void expandrow3(char * dp, const char * grid, char ly) +{ + char gi; + gi = grid[0] | ly; + dp[0] = quad[3][gi]; +#assign gx 0 +#repeat + gi = grid[gx + 1] | ly; + dp[4 * gx + 1] = quad[0][gi]; + dp[4 * gx + 2] = quad[1][gi]; + dp[4 * gx + 3] = quad[2][gi]; + dp[4 * gx + 4] = quad[3][gi]; +#assign gx gx + 1 +#until gx == 9 + gi = grid[10] | ly; + dp[37] = quad[0][gi]; + dp[38] = quad[1][gi]; + dp[39] = quad[2][gi]; +} + +void expand(char * dp, const char * grid, char px, char py) +{ + char ry = 4 * (py & 3); + char rx = px & 3; + + char * cdp = dp; + const char * cgrid = grid + (px >> 2) + 16 * (py >> 2); + + for(char gy=0; gy<20; gy++) + { + switch (rx) + { + case 0: + expandrow0(cdp, cgrid, ry); + break; + case 1: + expandrow1(cdp, cgrid, ry); + break; + case 2: + expandrow2(cdp, cgrid, ry); + break; + default: + expandrow3(cdp, cgrid, ry); + break; + } + cdp += 40; + ry += 4; + if (ry == 16) + { + ry = 0; + cgrid += 16; + } + } +} + + +char grid[16][16]; + +#pragma align(grid, 256) + +RIRQCode blank, scroll, bottom; + +int main(void) +{ + for(char y=0; y<16; y++) + { + for(char x=0; x<16; x++) + { + grid[y][x] = rand() & 3; + } + } + + vic.color_border = 0; + + rirq_init(true); + + rirq_build(&blank, 1); + rirq_write(&blank, 0, &vic.ctrl1, 0); + rirq_set(0, 46 + 5 * 8, &blank); + + rirq_build(&scroll, 3); + rirq_delay(&scroll, 10); + rirq_write(&scroll, 1, &vic.ctrl1, VIC_CTRL1_DEN); + rirq_write(&scroll, 2, &vic.ctrl2, 0); + rirq_set(1, 54 + 5 * 8, &scroll); + + rirq_build(&bottom, 2); + rirq_write(&bottom, 0, &vic.ctrl1, VIC_CTRL1_DEN | VIC_CTRL1_RSEL); + rirq_write(&bottom, 1, &vic.ctrl2, VIC_CTRL2_CSEL); + rirq_set(2, 250, &bottom); + + rirq_sort(); + + rirq_start(); + + int py = 40 * 32, px = 40 * 32, dy = 0, dx = 0, ax = 0, ay = 0; + for(;;) + { + int rx = px >> 5, ry = py >> 5; + + vic.color_border++; + rirq_wait(); + vic.color_border--; + + rirq_data(&blank, 0, ((7 - ry) & 7) | VIC_CTRL1_DEN | VIC_CTRL1_BMM | VIC_CTRL1_ECM); + if ((ry & 7) == 0) + rirq_data(&scroll, 0, 4); + else + rirq_data(&scroll, 0, 10); + rirq_data(&scroll, 1, ((7 - ry) & 7) | VIC_CTRL1_DEN); + rirq_data(&scroll, 2, (7 - rx) & 7); + + expand(screen + 200, &(grid[0][0]), rx >> 3, ry >> 3); + + dx += ax; + dy += ay; + + if ((rand() & 127) == 0) + { + ax = (rand() & 63) - 32; + ay = (rand() & 63) - 32; + } + + dx -= (dx + 8) >> 4; + dy -= (dy + 8) >> 4; + + py += dy; + if (py < 0 || py > 10 * 8 * 4 * 32) + { + dy = -dy; + py += dy; + } + + px += dx; + if (px < 0 || px > 6 * 8 * 4 * 32) + { + dx = -dx; + px += dx; + } + } + + + return 0; +} diff --git a/samples/scrolling/make.bat b/samples/scrolling/make.bat new file mode 100644 index 0000000..5d18a5d --- /dev/null +++ b/samples/scrolling/make.bat @@ -0,0 +1,3 @@ +..\..\bin\oscar64 bigfont.c -n +..\..\bin\oscar64 tunnel.c -n +..\..\bin\oscar64 grid2d.c -n diff --git a/samples/scrolling/tunnel.c b/samples/scrolling/tunnel.c new file mode 100644 index 0000000..d301fc6 --- /dev/null +++ b/samples/scrolling/tunnel.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include + +#define screen ((byte *)0x0400) +#define color ((byte *)0xd800) +#define sline(x, y) (screen + 40 * (y) + (x)) + +char rbuff[25]; + +void expand(char x) +{ +#assign y 0 +#repeat + sline(0, y)[x] = rbuff[y]; +#assign y y + 1 +#until y == 25 +} + +void scrollLeft(void) +{ + for(char x=0; x<39; x++) + { +#assign y 0 +#repeat + sline(0, y)[x] = sline(1, y)[x]; +#assign y y + 1 +#until y == 12 + } +#assign y 0 +#repeat + sline(0, y)[39] = rbuff[y]; +#assign y y + 1 +#until y == 12 + + for(char x=0; x<39; x++) + { +#assign y 12 +#repeat + sline(0, y)[x] = sline(1, y)[x]; +#assign y y + 1 +#until y == 25 + } +#assign y 12 +#repeat + sline(0, y)[39] = rbuff[y]; +#assign y y + 1 +#until y == 25 +} + +void scrollRight(void) +{ + for(char x=39; x>0; x--) + { +#assign y 0 +#repeat + sline(0, y)[x] = sline(-1, y)[x]; +#assign y y + 1 +#until y == 12 + } +#assign y 0 +#repeat + sline(0, y)[0] = rbuff[y]; +#assign y y + 1 +#until y == 12 + + for(char x=39; x>0; x--) + { +#assign y 12 +#repeat + sline(0, y)[x] = sline(-1, y)[x]; +#assign y y + 1 +#until y == 25 + } +#assign y 12 +#repeat + sline(0, y)[0] = rbuff[y]; +#assign y y + 1 +#until y == 25 +} + +inline void waitBottom(void) +{ + while (!(vic.ctrl1 & VIC_CTRL1_RST8)) + ; +} + +inline void waitTop(void) +{ + while ((vic.ctrl1 & VIC_CTRL1_RST8)) + ; +} + +char ytop[256], ybottom[256]; + +void prepcol(char xi) +{ + char yt, yb; + signed char dyt, dyb; + + yt = ytop[(char)(xi + 0)]; + yb = ybottom[(char)(xi + 0)]; + + dyt = yt - ytop[(char)(xi - 1)]; + dyb = yb - ybottom[(char)(xi - 1)]; + + for(char i=0; i 0) + rbuff[yt - 1] = 95; + + if (dyb < 0) + rbuff[yb] = 233; + else if (dyb > 0) + rbuff[yb - 1] = 223; + +} + +void buildTunnel(void) +{ + signed char yt = 1, yb = 24, dyt = 1, dyb = -1; + + for(int i=0; i<256; i++) + { + unsigned r = rand(); + + if (!(r & 0x00e0)) + dyt = -dyt; + if (!(r & 0xe000)) + dyb = -dyb; + + yt += dyt; + yb += dyb; + if (yt < 0) + { + yt = 0; + dyt = 1; + } + if (yb > 25) + { + yb = 25; + dyb = -1; + } + + ytop[i] = yt; + ybottom[i] = yb; + + if (yt + 5 > yb) + { + dyt = -1; + dyb = 1; + } + } + +} + +int main(void) +{ + memset(screen, 0x20, 1000); + memset(color, 7, 1000); + + vic.color_back = VCOL_BLACK; + vic.color_border = VCOL_BLACK; + + buildTunnel(); + + for(char i=0; i<40; i++) + { + prepcol(i); + expand(i); + } + + int xpos = 0, dx = 0, ax = 1; + int xi = 0, pxi = 0; + + for(;;) + { + unsigned r = rand(); + if ((r & 127) == 0) + ax = -ax; + + dx += ax; + if (dx > 32) + dx = 32; + else if (dx < -32) + dx = -32; + + xpos += dx; + pxi = xi; + xi = xpos >> 5; + + if (pxi < xi) + prepcol(xi + 39); + else if (pxi > xi) + prepcol(xi + 0); + + waitTop(); + waitBottom(); + + vic.ctrl2 = (7 - (xpos >> 2)) & 7; + + if (pxi < xi) + scrollLeft(); + else if (pxi > xi) + scrollRight(); + } + + return 0; + +}