diff --git a/autotest/autotest.bat b/autotest/autotest.bat index 567bea6..37f4864 100644 --- a/autotest/autotest.bat +++ b/autotest/autotest.bat @@ -216,6 +216,9 @@ rem @echo off @call :testn stripedarraytest.c @if %errorlevel% neq 0 goto :error +@call :testn mmultest.c +@if %errorlevel% neq 0 goto :error + @exit /b 0 :error diff --git a/autotest/bitfields.cpp b/autotest/bitfields.cpp index 9a4d4e9..02031e7 100644 --- a/autotest/bitfields.cpp +++ b/autotest/bitfields.cpp @@ -334,7 +334,7 @@ void test_add_word_cross(void) } int main(void) { -#if 0 + test_char_fit(); test_char_cross(); test_word_fit(); @@ -347,7 +347,7 @@ int main(void) test_inc_char_fit(); test_inc_char_cross(); test_add_char_cross(); -#endif + test_add_word_fit(); test_add_word_cross(); diff --git a/autotest/mmultest.c b/autotest/mmultest.c new file mode 100644 index 0000000..edcb10c --- /dev/null +++ b/autotest/mmultest.c @@ -0,0 +1,43 @@ +#include +#include +#include + +Matrix4 ml, mr; + +int main(void) +{ + for(char i=0; i<16; i++) + { + for(char j=0; j<16; j++) + { + for(char k=0; k<16; k++) + { + ml.m[k] = (i == k) ? 1.0 : 0.0; + mr.m[k] = (j == k) ? 1.0 : 0.0; + } + + mat4_mmul(&ml, &mr); + +#if 0 + printf("%d, %d\n", i, j); + for(char k=0; k<16; k++) + printf("%f ", ml.m[k]); + printf("\n"); +#endif + + for(char k=0; k<16; k++) + { + char ix = i & 3, iy = i >> 2; + char jx = j & 3, jy = j >> 2; + char kx = k & 3, ky = k >> 2; + + if (ky == jy && kx == ix && jx == iy) + assert(ml.m[k] == 1.0); + else + assert(ml.m[k] == 0.0); + } + } + } + + return 0; +} diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index a5204d7..6c5ac7a 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -8007,6 +8007,26 @@ void InterCodeBasicBlock::UpdateLocalIntegerRangeSets(const GrowingVariableArray +void InterCodeBasicBlock::PruneUnusedIntegerRangeSets(void) +{ + if (!mVisited) + { + mVisited = true; + + if (mEntryValueRange.Size() > 0 && mEntryRequiredTemps.Size()) + { + for (int i = 0; i < mEntryValueRange.Size(); i++) + { + if (!mEntryRequiredTemps[i]) + mEntryValueRange[i].Reset(); + } + } + + if (mTrueJump) mTrueJump->PruneUnusedIntegerRangeSets(); + if (mFalseJump) mFalseJump->PruneUnusedIntegerRangeSets(); + } +} + void InterCodeBasicBlock::RestartLocalIntegerRangeSets(int num, const GrowingVariableArray& localVars, const GrowingVariableArray& paramVars) { if (!mVisited) @@ -10731,6 +10751,9 @@ void InterCodeBasicBlock::RenameValueRanges(const GrowingIntArray& renameTable, { if (renameTable[i] >= 0) { + assert(mLocalValueRange[i].mMinState == IntegerValueRange::S_UNKNOWN || mEntryValueRange[renameTable[i]].mMinState == IntegerValueRange::S_UNKNOWN); + assert(mLocalValueRange[i].mMaxState == IntegerValueRange::S_UNKNOWN || mEntryValueRange[renameTable[i]].mMaxState == IntegerValueRange::S_UNKNOWN); + mEntryValueRange[renameTable[i]].Limit(mLocalValueRange[i]); } } @@ -18017,7 +18040,7 @@ void InterCodeProcedure::Close(void) { GrowingTypeArray tstack(IT_NONE); - CheckFunc = !strcmp(mIdent->mString, "test_add_char_cross"); + CheckFunc = !strcmp(mIdent->mString, "qsort"); CheckCase = false; mEntryBlock = mBlocks[0]; @@ -18373,7 +18396,6 @@ void InterCodeProcedure::Close(void) BuildDataFlowSets(); DisassembleDebug("Followed Jumps 2"); - CheckCase = true; RebuildIntegerRangeSet(); @@ -18502,8 +18524,6 @@ void InterCodeProcedure::Close(void) Disassemble("gcp-"); #endif - CheckCase = true; - #if 1 RebuildIntegerRangeSet(); #endif @@ -18565,8 +18585,6 @@ void InterCodeProcedure::Close(void) LoadStoreForwarding(paramMemory); - CheckCase = true; - RebuildIntegerRangeSet(); #if 1 @@ -18777,6 +18795,9 @@ void InterCodeProcedure::Close(void) PeepholeOptimization(); DisassembleDebug("Peephole Temp Check"); + + if (i == 1) + CheckCase = true; RemoveUnusedInstructions(); @@ -19458,6 +19479,9 @@ void InterCodeProcedure::ReduceTemporaries(void) ResetVisited(); } while (mEntryBlock->BuildGlobalRequiredTempSet(totalRequired2)); + ResetVisited(); + mEntryBlock->PruneUnusedIntegerRangeSets(); + collisionSet = new NumberSet[numTemps]; for (i = 0; i < numTemps; i++) diff --git a/oscar64/InterCode.h b/oscar64/InterCode.h index 3169011..737417f 100644 --- a/oscar64/InterCode.h +++ b/oscar64/InterCode.h @@ -454,6 +454,7 @@ public: void SimplifyIntegerRangeRelops(void); void MarkIntegerRangeBoundUp(int temp, int64 value, GrowingIntegerValueRangeArray& range); void UnionIntegerRanges(const InterCodeBasicBlock* block); + void PruneUnusedIntegerRangeSets(void); bool CombineIndirectAddressing(void); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index bfdd59e..bebe30f 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -26593,7 +26593,7 @@ bool NativeCodeBasicBlock::ForwardReplaceZeroPage(int at, int from, int to) if (mFalseJump && mFalseJump->ForwardReplaceZeroPage(0, from, to)) changed = true; - if (changed) + if (mEntryRequiredRegs[from]) mEntryRequiredRegs += to; } @@ -29635,7 +29635,8 @@ bool NativeCodeBasicBlock::OptimizeLoopCarryOver(void) mExitRequiredRegs += CPU_REG_Y; changed = true; } - else if (sz > 1 && hblock->mIns[0].mType == ASMIT_LDA && mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 2].mType == ASMIT_LDA && hblock->mIns[0].SameEffectiveAddress(mIns[sz - 2]) && !(hblock->mIns[0].mLive & LIVE_CPU_REG_Z)) + else if (sz > 1 && hblock->mIns[0].mType == ASMIT_LDA && mIns[sz - 1].mType == ASMIT_CMP && mIns[sz - 2].mType == ASMIT_LDA + && hblock->mIns[0].SameEffectiveAddress(mIns[sz - 2]) && !(hblock->mIns[0].mLive & LIVE_CPU_REG_Z) && !(hblock->mIns[0].mFlags & NCIF_VOLATILE)) { pblock->mIns.Push(hblock->mIns[0]); hblock->mIns.Remove(0); @@ -41791,7 +41792,7 @@ void NativeCodeProcedure::Compile(InterCodeProcedure* proc) { mInterProc = proc; - CheckFunc = !strcmp(mInterProc->mIdent->mString, "atoi"); + CheckFunc = !strcmp(mInterProc->mIdent->mString, "mat4_mmul"); int nblocks = proc->mBlocks.Size(); tblocks = new NativeCodeBasicBlock * [nblocks]; @@ -43090,7 +43091,7 @@ void NativeCodeProcedure::Optimize(void) #endif #if 1 - if (step == 10) + if (step == 10 && (mInterProc->mCompilerOptions & COPT_OPTIMIZE_BASIC)) { ResetVisited(); mEntryBlock->MarkLocalUsedLinkerObjects(); @@ -43126,7 +43127,6 @@ void NativeCodeProcedure::Optimize(void) } } #endif - #if _DEBUG ResetVisited(); mEntryBlock->CheckAsmCode(); @@ -43157,7 +43157,6 @@ void NativeCodeProcedure::Optimize(void) else cnt++; - } while (changed); #if 1 @@ -43226,6 +43225,7 @@ void NativeCodeProcedure::Optimize(void) changed = mEntryBlock->JoinTailCodeSequences(this, true); } + } while (changed); #endif diff --git a/oscar64/Scanner.cpp b/oscar64/Scanner.cpp index b761cd9..967d160 100644 --- a/oscar64/Scanner.cpp +++ b/oscar64/Scanner.cpp @@ -608,7 +608,7 @@ void Scanner::NextPreToken(void) strcpy_s(mPreprocessor->mSource->mLocationFileName, mTokenString); NextRawToken(); } - mPreprocessor->mLocation.mLine = v - 1; + mPreprocessor->mLocation.mLine = v + mLocation.mLine - l - 1; } else if (mToken == TK_PREP_FOR) {