diff --git a/README.md b/README.md index dd2d984..8d55a70 100644 --- a/README.md +++ b/README.md @@ -247,7 +247,28 @@ The following code scrolls the screen to the left, and completely unrolls the in for(char y=0; y<25; y++) screen[y][x] = screen[y][x + 1]; } - + +Sometimes it is better to unroll the loop not in the order it normaly executes, but using page size chunks. When e.g. filling a C64 screen with a loop: + + for(int i=0; i<1000; i++) + Screen[i] = ' '; + +Unrolling this loop would not help, the index would still not fit into the 8 bit x or y register. Using a page level unroll, the compiler will unroll the loop into four stores, each 250 bytes appart, and use the y or x register for indexing: + + #pragma unroll(page) + for(int i=0; i<1000; i++) + Screen[i] = ' '; + + 0921 LDY #$00 + 0923 LDA #$20 + 0925 STA $0400,y + 0928 STA $04fa,y + 092b STA $05f4,y + 092e STA $06ee,y + 0931 INY + 0932 CPY #$fa + 0934 BCC $0925 + ### Marking functions as native diff --git a/oscar64/InterCode.cpp b/oscar64/InterCode.cpp index e5cef5d..1cd98f5 100644 --- a/oscar64/InterCode.cpp +++ b/oscar64/InterCode.cpp @@ -7343,15 +7343,41 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra { InterInstruction* pins = ltvalue[ins->mSrc[0].mTemp]; - if (ins->mSrc[0].mMemory == IM_INDIRECT && pins->mCode == IC_LEA) - ins->mSrc[0].mLinkerObject = pins->mSrc[1].mLinkerObject; - - if (pins->mCode == IC_LEA && pins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst + pins->mSrc[0].mIntConst >= 0) + if (pins->mCode == IC_LEA) { - ins->mSrc[0].Forward(pins->mSrc[1]); - pins->mSrc[1].mFinal = false; - ins->mSrc[0].mIntConst += pins->mSrc[0].mIntConst; - changed = true; + if (ins->mSrc[0].mMemory == IM_INDIRECT) + ins->mSrc[0].mLinkerObject = pins->mSrc[1].mLinkerObject; + + if (pins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst + pins->mSrc[0].mIntConst >= 0) + { + ins->mSrc[0].Forward(pins->mSrc[1]); + pins->mSrc[1].mFinal = false; + ins->mSrc[0].mIntConst += pins->mSrc[0].mIntConst; + changed = true; + } + else if (pins->mSrc[1].mTemp < 0 && pins->mSrc[0].mTemp >= 0 && ins->mSrc[0].mIntConst && (ins->mSrc[0].mIntConst >= 256 || pins->mSrc[0].IsUByte())) + { + int k = mInstructions.IndexOf(pins); + if (k >= 0) + { + if (spareTemps + 2 >= ltvalue.Size()) + return true; + + InterInstruction* nins = new InterInstruction(); + nins->mCode = IC_LEA; + nins->mSrc[0].Forward(pins->mSrc[0]); + nins->mSrc[1].ForwardMem(pins->mSrc[1]); + nins->mSrc[1].mIntConst += ins->mSrc[0].mIntConst; + nins->mDst.mTemp = spareTemps++; + nins->mDst.mType = IT_POINTER; + nins->mDst.mRange = ins->mDst.mRange; + ins->mSrc[0].mIntConst = 0; + ins->mSrc[0].mTemp = nins->mDst.mTemp; + + mInstructions.Insert(k + 1, nins); + changed = true; + } + } } } break; @@ -10504,6 +10530,8 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa { if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == st && cins->mSrc[0].mTemp < 0) toffset += cins->mSrc[0].mIntConst; + else if (cins->mCode == IC_BINARY_OPERATOR && cins->mOperator == IA_ADD && cins->mSrc[1].mTemp == st && cins->mSrc[0].mTemp < 0) + toffset += cins->mSrc[0].mIntConst; else break; } @@ -10525,6 +10553,16 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa ins->mSrc[0].mType = IT_INT16; ins->mSrc[0].mIntConst = -toffset; } + else + { + ins->mCode = IC_BINARY_OPERATOR; + ins->mOperator = IA_ADD; + ins->mNumOperands = 2; + ins->mSrc[1] = ins->mSrc[0]; + ins->mSrc[0].mTemp = -1; + ins->mSrc[0].mType = IT_INT16; + ins->mSrc[0].mIntConst = -toffset; + } } tailBlock->mInstructions.Insert(0, ins); mInstructions.Remove(i); diff --git a/oscar64/NativeCodeGenerator.cpp b/oscar64/NativeCodeGenerator.cpp index 1836687..0d511c0 100644 --- a/oscar64/NativeCodeGenerator.cpp +++ b/oscar64/NativeCodeGenerator.cpp @@ -9778,6 +9778,19 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In } } +#endif +#if 1 + // Special case counting to 256 + else if (ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst == 256 && op == IA_CMPLU && + ins->mSrc[1].mRange.mMinState == IntegerValueRange::S_BOUND && ins->mSrc[1].mRange.mMinValue > 0 && + ins->mSrc[1].mRange.mMaxState == IntegerValueRange::S_BOUND && ins->mSrc[1].mRange.mMaxValue == 256) + { + if (ins->mSrc[1].mTemp < 0) + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins->mSrc[1].mIntConst & 0xff)); + else + mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp])); + this->Close(falseJump, trueJump, ASMIT_BEQ); + } #endif else { @@ -19229,7 +19242,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc changed = true; } } - else if (mIns[ai].mType == ASMIT_LDY && mIns[ai].mMode == ASMIM_ZERO_PAGE) + else if (mIns[ai].mType == ASMIT_LDY && mIns[ai].mMode == ASMIM_ZERO_PAGE && !(mIns[ai].mLive & LIVE_CPU_REG_Z)) { int i = 0; while (i < mIns.Size() && (i == ai || !mIns[i].ChangesYReg())) @@ -21811,7 +21824,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass if (MoveStoreXUp(i)) changed = true; } - else if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_MEM)) + else if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & (LIVE_MEM | LIVE_CPU_REG_Z))) { if (MoveLoadXUp(i)) changed = true; @@ -21824,7 +21837,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass changed = true; } } - else if (mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_MEM)) + else if (mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & (LIVE_MEM | LIVE_CPU_REG_Z))) { if (MoveLoadYUp(i)) changed = true; @@ -22836,7 +22849,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType == ASMIT_TXA && mIns[i + 1].mType == ASMIT_TAX) { - mIns[i + 0].mLive |= LIVE_CPU_REG_X; + mIns[i + 0].mLive |= mIns[i + 1].mLive; mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; progress = true; } @@ -22844,7 +22857,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType == ASMIT_TYA && mIns[i + 1].mType == ASMIT_TAY) { - mIns[i + 0].mLive |= LIVE_CPU_REG_Y; + mIns[i + 0].mLive |= mIns[i + 1].mLive; mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; progress = true; } @@ -22852,7 +22865,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType == ASMIT_TAX && mIns[i + 1].mType == ASMIT_TXA) { - mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 0].mLive |= mIns[i + 1].mLive; mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; progress = true; } @@ -22860,7 +22873,23 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass mIns[i + 0].mType == ASMIT_TAY && mIns[i + 1].mType == ASMIT_TYA) { - mIns[i + 0].mLive |= LIVE_CPU_REG_A; + mIns[i + 0].mLive |= mIns[i + 1].mLive; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + progress = true; + } + else if ( + mIns[i + 0].mType == ASMIT_INY && + mIns[i + 1].mType == ASMIT_TYA && !(mIns[i + 1].mLive & LIVE_CPU_REG_A)) + { + mIns[i + 0].mLive |= mIns[i + 1].mLive; + mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; + progress = true; + } + else if ( + mIns[i + 0].mType == ASMIT_INX && + mIns[i + 1].mType == ASMIT_TXA && !(mIns[i + 1].mLive & LIVE_CPU_REG_A)) + { + mIns[i + 0].mLive |= mIns[i + 1].mLive; mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED; progress = true; } diff --git a/oscar64/Parser.cpp b/oscar64/Parser.cpp index 5b39e73..34d44fd 100644 --- a/oscar64/Parser.cpp +++ b/oscar64/Parser.cpp @@ -14,6 +14,7 @@ Parser::Parser(Errors* errors, Scanner* scanner, CompilationUnits* compilationUn mBSSection = compilationUnits->mSectionBSS; mUnrollLoop = 0; + mUnrollLoopPage = false; for (int i = 0; i < 256; i++) mCharMap[i] = i; @@ -2125,8 +2126,10 @@ Expression* Parser::ParseStatement(void) mScanner->NextToken(); exp = new Expression(mScanner->mLocation, EX_FOR); - int unrollLoop = mUnrollLoop; + int unrollLoop = mUnrollLoop; + bool unrollPage = mUnrollLoopPage; mUnrollLoop = 0; + mUnrollLoopPage = false; Expression* initExp = nullptr, * iterateExp = nullptr, * conditionExp = nullptr, * bodyExp = nullptr, * finalExp = nullptr; @@ -2163,53 +2166,130 @@ Expression* Parser::ParseStatement(void) if (unrollLoop > 1 && initExp && iterateExp && conditionExp) { if ((initExp->mType == EX_ASSIGNMENT || initExp->mType == EX_INITIALIZATION) && initExp->mLeft->mType == EX_VARIABLE && initExp->mRight->mType == EX_CONSTANT && - (iterateExp->mType == EX_POSTINCDEC || iterateExp->mType == EX_PREINCDEC) && iterateExp->mLeft->IsSame(initExp->mLeft) && + (iterateExp->mType == EX_POSTINCDEC || iterateExp->mType == EX_PREINCDEC || iterateExp->mType == EX_ASSIGNMENT && iterateExp->mToken == TK_ASSIGN_ADD && iterateExp->mRight->mType == EX_CONSTANT) && + iterateExp->mLeft->IsSame(initExp->mLeft) && conditionExp->mType == EX_RELATIONAL && conditionExp->mToken == TK_LESS_THAN && conditionExp->mLeft->IsSame(initExp->mLeft) && conditionExp->mRight->mType == EX_CONSTANT) { if (initExp->mRight->mDecValue->mType == DT_CONST_INTEGER && conditionExp->mRight->mDecValue->mType == DT_CONST_INTEGER) { int startValue = initExp->mRight->mDecValue->mInteger; int endValue = conditionExp->mRight->mDecValue->mInteger; + int stepValue = 1; + if (iterateExp->mType == EX_ASSIGNMENT) + stepValue = iterateExp->mRight->mDecValue->mInteger; - int remain = (endValue - startValue) % unrollLoop; - endValue -= remain; - - conditionExp->mRight->mDecValue->mInteger = endValue; - - Expression* unrollBody = new Expression(mScanner->mLocation, EX_SEQUENCE); - unrollBody->mLeft = bodyExp; - Expression* bexp = unrollBody; - if (endValue > startValue) + if (unrollPage) { - for (int i = 1; i < unrollLoop; i++) + int numLoops = (endValue - startValue + 255) / 256; + int numIterations = (endValue - startValue) / numLoops; + int stride = (endValue - startValue + numLoops - 1) / numLoops; + int remain = (endValue - startValue) - numIterations * numLoops; + + Expression* unrollBody = new Expression(mScanner->mLocation, EX_SEQUENCE); + unrollBody->mLeft = bodyExp; + Expression* bexp = unrollBody; + + Expression* iexp = new Expression(mScanner->mLocation, EX_ASSIGNMENT); + iexp->mToken = TK_ASSIGN_ADD; + iexp->mLeft = iterateExp->mLeft; + iexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT); + + Declaration * idec = new Declaration(mScanner->mLocation, DT_CONST_INTEGER); + idec->mInteger = stride; + idec->mBase = TheSignedIntTypeDeclaration; + iexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT); + iexp->mRight->mDecValue = idec; + iexp->mRight->mDecType = idec->mBase; + + Expression* dexp = new Expression(mScanner->mLocation, EX_ASSIGNMENT); + dexp->mToken = TK_ASSIGN_SUB; + dexp->mLeft = iterateExp->mLeft; + dexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT); + + Declaration* ddec = new Declaration(mScanner->mLocation, DT_CONST_INTEGER); + ddec->mInteger = stride * (numLoops - 1); + ddec->mBase = TheSignedIntTypeDeclaration; + dexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT); + dexp->mRight->mDecValue = ddec; + dexp->mRight->mDecType = ddec->mBase; + + for (int i = 1; i < numLoops; i++) { bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); bexp = bexp->mRight; - bexp->mLeft = iterateExp; + bexp->mLeft = iexp; bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); bexp = bexp->mRight; bexp->mLeft = bodyExp; } - } + bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); + bexp = bexp->mRight; + bexp->mLeft = dexp; - if (remain) - { - finalExp = new Expression(mScanner->mLocation, EX_SEQUENCE); - finalExp->mLeft = bodyExp; - Expression* bexp = finalExp; + conditionExp->mRight->mDecValue->mInteger = numIterations; - for (int i = 1; i < remain; i++) + if (remain) { - bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); - bexp = bexp->mRight; - bexp->mLeft = iterateExp; - bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); - bexp = bexp->mRight; - bexp->mLeft = bodyExp; - } - } + finalExp = new Expression(mScanner->mLocation, EX_SEQUENCE); + finalExp->mLeft = bodyExp; + Expression* bexp = finalExp; - bodyExp = unrollBody; + for (int i = 1; i < remain; i++) + { + bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); + bexp = bexp->mRight; + bexp->mLeft = iexp; + bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); + bexp = bexp->mRight; + bexp->mLeft = bodyExp; + } + } + + bodyExp = unrollBody; + } + else + { + int numSteps = (endValue - startValue) / stepValue; + int remain = numSteps % unrollLoop; + endValue -= remain * stepValue; + + conditionExp->mRight->mDecValue->mInteger = endValue; + + Expression* unrollBody = new Expression(mScanner->mLocation, EX_SEQUENCE); + unrollBody->mLeft = bodyExp; + Expression* bexp = unrollBody; + if (endValue > startValue) + { + for (int i = 1; i < unrollLoop; i++) + { + bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); + bexp = bexp->mRight; + bexp->mLeft = iterateExp; + bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); + bexp = bexp->mRight; + bexp->mLeft = bodyExp; + } + } + + if (remain) + { + finalExp = new Expression(mScanner->mLocation, EX_SEQUENCE); + finalExp->mLeft = bodyExp; + Expression* bexp = finalExp; + + for (int i = 1; i < remain; i++) + { + bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); + bexp = bexp->mRight; + bexp->mLeft = iterateExp; + bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE); + bexp = bexp->mRight; + bexp->mLeft = bodyExp; + } + } + + bodyExp = unrollBody; + } } else mErrors->Error(exp->mLocation, EWARN_LOOP_UNROLL_IGNORED, "Loop unroll ignored, bounds and step not integer"); @@ -3672,6 +3752,8 @@ void Parser::ParsePragma(void) mScanner->NextToken(); ConsumeToken(TK_OPEN_PARENTHESIS); + mUnrollLoopPage = false; + if (mScanner->mToken == TK_INTEGER) { mUnrollLoop = mScanner->mTokenInteger; @@ -3682,6 +3764,12 @@ void Parser::ParsePragma(void) mUnrollLoop = 0x10000; mScanner->NextToken(); } + else if (mScanner->mToken == TK_IDENT && !strcmp(mScanner->mTokenIdent->mString, "page")) + { + mUnrollLoop = 0x10000; + mUnrollLoopPage = true; + mScanner->NextToken(); + } else mErrors->Error(mScanner->mLocation, EERR_PRAGMA_PARAMETER, "Integer literal expected"); diff --git a/oscar64/Parser.h b/oscar64/Parser.h index 288e33d..bcdc4a0 100644 --- a/oscar64/Parser.h +++ b/oscar64/Parser.h @@ -25,6 +25,7 @@ protected: char mCharMap[256]; int mUnrollLoop; + bool mUnrollLoopPage; uint8* ParseStringLiteral(int msize);