diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp
index 9d40e47..670eb6a 100644
--- a/oscar64/InterCode.cpp
+++ b/oscar64/InterCode.cpp
@@ -869,6 +869,22 @@ void ValueSet::InsertValue(InterInstruction * ins)
mInstructions[mNum++] = ins;
}
+static bool HasSideEffect(InterCode code)
+{
+ return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER;
+}
+
+static bool IsMoveable(InterCode code)
+{
+ if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
+ return false;
+ if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE)
+ return false;
+
+ return true;
+}
+
+
static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction* bins)
{
// Check ambiguity
@@ -922,6 +938,9 @@ static bool CanBypassLoad(const InterInstruction* lins, const InterInstruction*
static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins)
{
+ if (HasSideEffect(lins->mCode) && HasSideEffect(bins->mCode))
+ return false;
+
if (lins->mDst.mTemp >= 0)
{
if (lins->mDst.mTemp == bins->mDst.mTemp)
@@ -948,6 +967,9 @@ static bool CanBypass(const InterInstruction* lins, const InterInstruction* bins
static bool CanBypassUp(const InterInstruction* lins, const InterInstruction* bins)
{
+ if (HasSideEffect(lins->mCode) && HasSideEffect(bins->mCode))
+ return false;
+
if (lins->mDst.mTemp >= 0)
{
if (lins->mDst.mTemp == bins->mDst.mTemp)
@@ -2757,22 +2779,6 @@ void InterInstruction::PerformTempForwarding(TempForwardingTable& forwardingTabl
}
}
-bool HasSideEffect(InterCode code)
-{
- return code == IC_CALL || code == IC_CALL_NATIVE || code == IC_ASSEMBLER;
-}
-
-bool IsMoveable(InterCode code)
-{
- if (HasSideEffect(code) || code == IC_COPY || code == IC_STRCPY || code == IC_STORE || code == IC_BRANCH || code == IC_POP_FRAME || code == IC_PUSH_FRAME)
- return false;
- if (code == IC_RETURN || code == IC_RETURN_STRUCT || code == IC_RETURN_VALUE)
- return false;
-
- return true;
-}
-
-
bool InterInstruction::RemoveUnusedResultInstructions(InterInstruction* pre, NumberSet& requiredTemps)
{
bool changed = false;
@@ -6920,27 +6926,29 @@ bool InterCodeBasicBlock::MergeIndexedLoadStore(const GrowingInstructionPtrArra
if (lins->mSrc[0].mTemp >= 0)
{
- InterInstruction* bins = lins;
-
- for (int j = 0; j < ltvalue.Size(); j++)
+ if (lins->mSrc[1].mMemory != IM_ABSOLUTE || (lins->mSrc[0].mRange.mMaxState == IntegerValueRange::S_BOUND && lins->mSrc[0].mRange.mMaxValue >= 256))
{
- InterInstruction* cins = ltvalue[j];
- if (cins &&
- cins->mSrc[0].mTemp == bins->mSrc[0].mTemp &&
- cins->mSrc[1].mTemp < 0 && bins->mSrc[1].mTemp < 0 &&
- cins->mSrc[1].mMemory == bins->mSrc[1].mMemory &&
- cins->mSrc[1].mVarIndex == bins->mSrc[1].mVarIndex &&
- cins->mSrc[1].mIntConst < bins->mSrc[1].mIntConst &&
- cins->mSrc[1].mMemory != IM_ABSOLUTE)
+ InterInstruction* bins = lins;
+ for (int j = 0; j < ltvalue.Size(); j++)
+ {
+ InterInstruction* cins = ltvalue[j];
+ if (cins &&
+ cins->mSrc[0].mTemp == bins->mSrc[0].mTemp &&
+ cins->mSrc[1].mTemp < 0 && bins->mSrc[1].mTemp < 0 &&
+ cins->mSrc[1].mMemory == bins->mSrc[1].mMemory &&
+ cins->mSrc[1].mVarIndex == bins->mSrc[1].mVarIndex &&
+ cins->mSrc[1].mIntConst < bins->mSrc[1].mIntConst)
+ {
+ bins = cins;
+ }
+ }
- bins = cins;
- }
-
- if (bins != lins && ins->mSrc[pi].mIntConst + lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst < 252)
- {
- ins->mSrc[pi].mTemp = bins->mDst.mTemp;
- ins->mSrc[pi].mIntConst += lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst;
- changed = true;
+ if (bins != lins && ins->mSrc[pi].mIntConst + lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst < 252)
+ {
+ ins->mSrc[pi].mTemp = bins->mDst.mTemp;
+ ins->mSrc[pi].mIntConst += lins->mSrc[1].mIntConst - bins->mSrc[1].mIntConst;
+ changed = true;
+ }
}
}
}
@@ -10568,6 +10576,18 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mInstructions[i + 0]->mSrc[0].mIntConst = ~((1LL << shift) - 1);
changed = true;
}
+ else if (
+ mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_SHL && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
+ mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_MUL && mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
+ mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
+ (mInstructions[i + 1]->mSrc[0].mIntConst << mInstructions[i + 0]->mSrc[0].mIntConst) < 65536)
+ {
+ mInstructions[i + 1]->mSrc[0].mIntConst <<= mInstructions[i + 0]->mSrc[0].mIntConst;;
+ mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1];
+ mInstructions[i + 0]->mCode = IC_NONE;
+ mInstructions[i + 0]->mNumOperands = 0;
+ changed = true;
+ }
#if 1
else if (
mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_OR && mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
@@ -10808,6 +10828,15 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
mInstructions[i + 1]->mSrc[0] = mInstructions[i + 0]->mDst;
changed = true;
}
+ else if (
+ mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mTemp < 0 &&
+ mInstructions[i + 1]->mCode == IC_STORE && mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
+ mInstructions[i + 1]->mSrc[1].mIntConst != 0)
+ {
+ mInstructions[i + 0]->mSrc[1].mIntConst += mInstructions[i + 1]->mSrc[1].mIntConst;
+ mInstructions[i + 1]->mSrc[1].mIntConst = 0;
+ changed = true;
+ }
#if 1
else if (
mInstructions[i + 0]->mCode == IC_LEA && mInstructions[i + 0]->mSrc[1].mMemory == IM_GLOBAL &&
@@ -10830,6 +10859,22 @@ void InterCodeBasicBlock::PeepholeOptimization(const GrowingVariableArray& stati
changed = true;
}
#endif
+ else if (
+ mInstructions[i + 0]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 0]->mOperator == IA_ADD &&
+ mInstructions[i + 0]->mSrc[0].mTemp < 0 &&
+ mInstructions[i + 1]->mCode == IC_BINARY_OPERATOR && mInstructions[i + 1]->mOperator == IA_SHL &&
+ mInstructions[i + 1]->mSrc[0].mTemp < 0 &&
+ mInstructions[i + 1]->mSrc[1].mTemp == mInstructions[i + 0]->mDst.mTemp && mInstructions[i + 1]->mSrc[1].mFinal &&
+ mInstructions[i + 2]->mCode == IC_LEA &&
+ mInstructions[i + 2]->mSrc[0].mTemp == mInstructions[i + 1]->mDst.mTemp && mInstructions[i + 2]->mSrc[0].mFinal &&
+ mInstructions[i + 2]->mSrc[1].mTemp < 0)
+ {
+ mInstructions[i + 1]->mSrc[1] = mInstructions[i + 0]->mSrc[1];
+ mInstructions[i + 2]->mSrc[1].mIntConst += mInstructions[i + 0]->mSrc[0].mIntConst << mInstructions[i + 1]->mSrc[0].mIntConst;
+
+ mInstructions[i + 0]->mCode = IC_NONE; mInstructions[i + 0]->mNumOperands = 0;
+ changed = true;
+ }
#if 1
// Postincrement artifact
@@ -12047,6 +12092,8 @@ void InterCodeProcedure::Close(void)
EliminateAliasValues();
+ MergeIndexedLoadStore();
+
#if 1
ResetVisited();
mEntryBlock->RestartLocalIntegerRangeSets(mLocalVars);
diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp
index 6dff860..091604d 100644
--- a/oscar64/NativeCodeGenerator.cpp
+++ b/oscar64/NativeCodeGenerator.cpp
@@ -2726,7 +2726,7 @@ bool NativeCodeInstruction::ValueForwarding(NativeRegisterDataSet& data, AsmInsT
#endif
#if 1
- if (mMode == ASMIM_ABSOLUTE_X && data.mRegs[CPU_REG_X].SameData(data.mRegs[CPU_REG_Y]) && HasAsmInstructionMode(mType, ASMIM_ABSOLUTE_Y))
+ if (mMode == ASMIM_ABSOLUTE_X && data.mRegs[CPU_REG_X].SameData(data.mRegs[CPU_REG_Y]) && HasAsmInstructionMode(mType, ASMIM_ABSOLUTE_Y) && !(mFlags & NICT_INDEXFLIPPED))
{
mMode = ASMIM_ABSOLUTE_Y;
changed = true;
@@ -20740,6 +20740,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
{
assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_X));
mIns[j].mMode = ASMIM_ABSOLUTE_X;
+ mIns[j].mFlags |= NICT_INDEXFLIPPED;
n = j;
changed = true;
}
@@ -20762,6 +20763,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
{
assert(HasAsmInstructionMode(mIns[j].mType, ASMIM_ABSOLUTE_Y));
mIns[j].mMode = ASMIM_ABSOLUTE_Y;
+ mIns[j].mFlags |= NICT_INDEXFLIPPED;
n = j;
changed = true;
}
@@ -21376,7 +21378,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
progress = true;
}
- else if (mIns[i].mType == ASMIT_SEC && mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && !(mIns[i + 1].mLive & LIVE_CPU_REG_C))
+ else if (mIns[i].mType == ASMIT_SEC && mIns[i + 1].mType == ASMIT_ADC && mIns[i + 1].mMode == ASMIM_IMMEDIATE && !(mIns[i + 1].mLive & LIVE_CPU_REG_C))
{
mIns[i + 0].mType = ASMIT_CLC;
mIns[i + 1].mAddress++;
@@ -21401,7 +21403,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
progress = true;
}
else if (mIns[i + 0].mType == ASMIT_LDA && mIns[i + 0].mMode == ASMIM_IMMEDIATE && mIns[i + 0].mAddress == 0 &&
- mIns[i + 1].mType == ASMIT_CMP && !(mIns[i + 1].mLive & (LIVE_CPU_REG_C | LIVE_CPU_REG_A)))
+ mIns[i + 1].mType == ASMIT_CMP && !(mIns[i + 1].mLive & (LIVE_CPU_REG_C | LIVE_CPU_REG_A)))
{
mIns[i + 0].mType = ASMIT_NOP; mIns[i + 0].mMode = ASMIM_IMPLIED;
mIns[i + 1].mType = ASMIT_LDA;
@@ -21491,6 +21493,36 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 1].mType = ASMIT_STA;
progress = true;
}
+#if 1
+ else if (mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_STX)
+ {
+ NativeCodeInstruction ins(mIns[i + 0]);
+ mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_X;
+ mIns[i + 1] = ins;
+ progress = true;
+ }
+ else if (mIns[i + 0].mType == ASMIT_TYA && mIns[i + 1].mType == ASMIT_STY)
+ {
+ NativeCodeInstruction ins(mIns[i + 0]);
+ mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_Y;
+ mIns[i + 1] = ins;
+ progress = true;
+ }
+ else if (mIns[i + 0].mType == ASMIT_TAX && mIns[i + 1].mType == ASMIT_STA && !mIns[i + 1].RequiresXReg())
+ {
+ NativeCodeInstruction ins(mIns[i + 0]);
+ mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A;
+ mIns[i + 1] = ins;
+ progress = true;
+ }
+ else if (mIns[i + 0].mType == ASMIT_TAY && mIns[i + 1].mType == ASMIT_STA && !mIns[i + 1].RequiresYReg())
+ {
+ NativeCodeInstruction ins(mIns[i + 0]);
+ mIns[i + 0] = mIns[i + 1]; mIns[i + 0].mLive |= LIVE_CPU_REG_A;
+ mIns[i + 1] = ins;
+ progress = true;
+ }
+#endif
else if (
mIns[i + 0].mType == ASMIT_ROL && mIns[i + 0].mMode == ASMIM_IMPLIED &&
mIns[i + 1].mType == ASMIT_LSR && mIns[i + 1].mMode == ASMIM_IMPLIED && !(mIns[i + 1].mLive & (LIVE_CPU_REG_A | LIVE_CPU_REG_Z)))
@@ -23871,6 +23903,43 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
progress = true;
}
}
+#if 1
+ else if (
+ mIns[i + 0].mType == ASMIT_LDA &&
+ mIns[i + 1].mType == ASMIT_STA && mIns[i + 1].mMode == ASMIM_ZERO_PAGE &&
+ mIns[i + 2].mType == ASMIT_LDA &&
+ mIns[i + 3].mType == ASMIT_STA && mIns[i + 3].mMode == ASMIM_ZERO_PAGE && mIns[i + 3].mAddress != mIns[i + 1].mAddress &&
+ mIns[i + 4].mType == ASMIT_LDA && mIns[i + 4].mMode == ASMIM_ZERO_PAGE && mIns[i + 4].mAddress == mIns[i + 1].mAddress &&
+
+ !mIns[i + 0].ReferencesZeroPage(mIns[i + 3].mAddress) &&
+ !mIns[i + 2].ReferencesZeroPage(mIns[i + 1].mAddress) &&
+ !(mIns[i + 0].mFlags & NCIF_VOLATILE) && !(mIns[i + 2].mFlags & NCIF_VOLATILE))
+ {
+ NativeCodeInstruction ins(mIns[i + 0]);
+ mIns[i + 0] = mIns[i + 2];
+ mIns[i + 2] = ins;
+ mIns[i + 1].mAddress = mIns[i + 3].mAddress;
+ mIns[i + 3].mAddress = mIns[i + 4].mAddress;
+
+ if (mIns[i + 2].RequiresYReg())
+ {
+ mIns[i + 0].mLive |= LIVE_CPU_REG_Y;
+ mIns[i + 1].mLive |= LIVE_CPU_REG_Y;
+ }
+ if (mIns[i + 2].RequiresXReg())
+ {
+ mIns[i + 0].mLive |= LIVE_CPU_REG_X;
+ mIns[i + 1].mLive |= LIVE_CPU_REG_X;
+ }
+
+ mIns[i + 0].mLive |= mIns[i + 2].mLive;
+ mIns[i + 2].mLive |= mIns[i + 4].mLive;
+ mIns[i + 3].mLive |= mIns[i + 4].mLive;
+
+ mIns[i + 4].mType = ASMIT_NOP; mIns[i + 4].mMode = ASMIM_IMPLIED;
+ progress = true;
+ }
+#endif
}
#endif
CheckLive();
diff --git a/oscar64/NativeCodeGenerator.h b/oscar64/NativeCodeGenerator.h
index ce433c2..d55c170 100644
--- a/oscar64/NativeCodeGenerator.h
+++ b/oscar64/NativeCodeGenerator.h
@@ -64,6 +64,7 @@ static const uint32 NCIF_VOLATILE = 0x00000010;
static const uint32 NCIF_LONG = 0x00000020;
static const uint32 NCIF_FEXEC = 0x00000040;
static const uint32 NCIF_JSRFLAGS = 0x00000080;
+static const uint32 NICT_INDEXFLIPPED = 0x00000100;
static const uint32 NCIF_USE_CPU_REG_A = 0x00001000;
static const uint32 NCIF_USE_CPU_REG_X = 0x00002000;
diff --git a/oscar64/oscar64.vcxproj b/oscar64/oscar64.vcxproj
index 023f311..ee4f784 100644
--- a/oscar64/oscar64.vcxproj
+++ b/oscar64/oscar64.vcxproj
@@ -93,6 +93,7 @@
Console
true
kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;version.lib;%(AdditionalDependencies)
+ 16000000
diff --git a/samples/scrolling/bigfont.c b/samples/scrolling/bigfont.c
index db5e0d6..c196a84 100644
--- a/samples/scrolling/bigfont.c
+++ b/samples/scrolling/bigfont.c
@@ -14,12 +14,12 @@ void copyFont(void)
mmap_set(MMAP_ROM);
}
-// Screen and color space
-#define screen ((byte *)0x0400)
-#define color ((byte *)0xd800)
+// Single row of screen has 40 characters
+typedef char ScreenRow[40];
-// Macro for easy access to screen space
-#define sline(x, y) (screen + 40 * (y) + (x))
+// Screen and color space
+ScreenRow * const screen = (ScreenRow *)0x0400;
+ScreenRow * const color = (ScreenRow *)0xd800;
// Start row for text
#define srow 5
@@ -30,12 +30,12 @@ void scrollLeft(void)
// Loop horizontaly
for(char x=0; x<39; x++)
{
- // Unroll vetical loop 16 times
-#assign y 0
-#repeat
- sline(0, srow + y)[x] = sline(1, srow + y)[x];
-#assign y y + 1
-#until y == 16
+ // Unroll vertical loop 16 times
+ #pragma unroll(full)
+ for(char y=0; y<16; y++)
+ {
+ screen[srow + y][x] = screen[srow + y][x + 1];
+ }
}
}
@@ -46,13 +46,13 @@ void expand(char c, byte f)
byte * fp = font + 8 * c;
// Unroll eight times for each byte in glyph data
-#assign y 0
-#repeat
- sline(39, srow + 2 * y + 0)[0] =
- sline(39, srow + 2 * y + 1)[0] = (fp[y] & f) ? 160 : 32;
-#assign y y + 1
-#until y == 8
-
+// #pragma unroll(full)
+ for(char y=0; y<8; y++)
+ {
+ char t = (fp[y] & f) ? 160 : 32;
+ screen[srow + 2 * y + 0][39] = t;
+ screen[srow + 2 * y + 1][39] = t;
+ }
}
const char * text =
@@ -77,7 +77,7 @@ int main(void)
// Color bars
for(int i=0; i<16; i++)
- memset(color + 40 * (srow + i), i + 1, 40);
+ memset(color[srow + i], i + 1, 40);
vic.color_back = VCOL_BLACK;
vic.color_border = VCOL_BLACK;