Add page level unrolling

This commit is contained in:
drmortalwombat 2022-08-07 22:29:33 +02:00
parent 4d274f46d7
commit b9396542f0
5 changed files with 222 additions and 45 deletions

View File

@ -248,6 +248,27 @@ The following code scrolls the screen to the left, and completely unrolls the in
screen[y][x] = screen[y][x + 1];
}
Sometimes it is better to unroll the loop not in the order it normaly executes, but using page size chunks. When e.g. filling a C64 screen with a loop:
for(int i=0; i<1000; i++)
Screen[i] = ' ';
Unrolling this loop would not help, the index would still not fit into the 8 bit x or y register. Using a page level unroll, the compiler will unroll the loop into four stores, each 250 bytes appart, and use the y or x register for indexing:
#pragma unroll(page)
for(int i=0; i<1000; i++)
Screen[i] = ' ';
0921 LDY #$00
0923 LDA #$20
0925 STA $0400,y
0928 STA $04fa,y
092b STA $05f4,y
092e STA $06ee,y
0931 INY
0932 CPY #$fa
0934 BCC $0925
### Marking functions as native

View File

@ -7343,16 +7343,42 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra
{
InterInstruction* pins = ltvalue[ins->mSrc[0].mTemp];
if (ins->mSrc[0].mMemory == IM_INDIRECT && pins->mCode == IC_LEA)
if (pins->mCode == IC_LEA)
{
if (ins->mSrc[0].mMemory == IM_INDIRECT)
ins->mSrc[0].mLinkerObject = pins->mSrc[1].mLinkerObject;
if (pins->mCode == IC_LEA && pins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst + pins->mSrc[0].mIntConst >= 0)
if (pins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst + pins->mSrc[0].mIntConst >= 0)
{
ins->mSrc[0].Forward(pins->mSrc[1]);
pins->mSrc[1].mFinal = false;
ins->mSrc[0].mIntConst += pins->mSrc[0].mIntConst;
changed = true;
}
else if (pins->mSrc[1].mTemp < 0 && pins->mSrc[0].mTemp >= 0 && ins->mSrc[0].mIntConst && (ins->mSrc[0].mIntConst >= 256 || pins->mSrc[0].IsUByte()))
{
int k = mInstructions.IndexOf(pins);
if (k >= 0)
{
if (spareTemps + 2 >= ltvalue.Size())
return true;
InterInstruction* nins = new InterInstruction();
nins->mCode = IC_LEA;
nins->mSrc[0].Forward(pins->mSrc[0]);
nins->mSrc[1].ForwardMem(pins->mSrc[1]);
nins->mSrc[1].mIntConst += ins->mSrc[0].mIntConst;
nins->mDst.mTemp = spareTemps++;
nins->mDst.mType = IT_POINTER;
nins->mDst.mRange = ins->mDst.mRange;
ins->mSrc[0].mIntConst = 0;
ins->mSrc[0].mTemp = nins->mDst.mTemp;
mInstructions.Insert(k + 1, nins);
changed = true;
}
}
}
}
break;
#endif
@ -10504,6 +10530,8 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
{
if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == st && cins->mSrc[0].mTemp < 0)
toffset += cins->mSrc[0].mIntConst;
else if (cins->mCode == IC_BINARY_OPERATOR && cins->mOperator == IA_ADD && cins->mSrc[1].mTemp == st && cins->mSrc[0].mTemp < 0)
toffset += cins->mSrc[0].mIntConst;
else
break;
}
@ -10525,6 +10553,16 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
ins->mSrc[0].mType = IT_INT16;
ins->mSrc[0].mIntConst = -toffset;
}
else
{
ins->mCode = IC_BINARY_OPERATOR;
ins->mOperator = IA_ADD;
ins->mNumOperands = 2;
ins->mSrc[1] = ins->mSrc[0];
ins->mSrc[0].mTemp = -1;
ins->mSrc[0].mType = IT_INT16;
ins->mSrc[0].mIntConst = -toffset;
}
}
tailBlock->mInstructions.Insert(0, ins);
mInstructions.Remove(i);

View File

@ -9778,6 +9778,19 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In
}
}
#endif
#if 1
// Special case counting to 256
else if (ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst == 256 && op == IA_CMPLU &&
ins->mSrc[1].mRange.mMinState == IntegerValueRange::S_BOUND && ins->mSrc[1].mRange.mMinValue > 0 &&
ins->mSrc[1].mRange.mMaxState == IntegerValueRange::S_BOUND && ins->mSrc[1].mRange.mMaxValue == 256)
{
if (ins->mSrc[1].mTemp < 0)
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins->mSrc[1].mIntConst & 0xff));
else
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp]));
this->Close(falseJump, trueJump, ASMIT_BEQ);
}
#endif
else
{
@ -19229,7 +19242,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
changed = true;
}
}
else if (mIns[ai].mType == ASMIT_LDY && mIns[ai].mMode == ASMIM_ZERO_PAGE)
else if (mIns[ai].mType == ASMIT_LDY && mIns[ai].mMode == ASMIM_ZERO_PAGE && !(mIns[ai].mLive & LIVE_CPU_REG_Z))
{
int i = 0;
while (i < mIns.Size() && (i == ai || !mIns[i].ChangesYReg()))
@ -21811,7 +21824,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
if (MoveStoreXUp(i))
changed = true;
}
else if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_MEM))
else if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & (LIVE_MEM | LIVE_CPU_REG_Z)))
{
if (MoveLoadXUp(i))
changed = true;
@ -21824,7 +21837,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
changed = true;
}
}
else if (mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_MEM))
else if (mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & (LIVE_MEM | LIVE_CPU_REG_Z)))
{
if (MoveLoadYUp(i))
changed = true;
@ -22836,7 +22849,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 0].mType == ASMIT_TXA &&
mIns[i + 1].mType == ASMIT_TAX)
{
mIns[i + 0].mLive |= LIVE_CPU_REG_X;
mIns[i + 0].mLive |= mIns[i + 1].mLive;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
progress = true;
}
@ -22844,7 +22857,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 0].mType == ASMIT_TYA &&
mIns[i + 1].mType == ASMIT_TAY)
{
mIns[i + 0].mLive |= LIVE_CPU_REG_Y;
mIns[i + 0].mLive |= mIns[i + 1].mLive;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
progress = true;
}
@ -22852,7 +22865,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 0].mType == ASMIT_TAX &&
mIns[i + 1].mType == ASMIT_TXA)
{
mIns[i + 0].mLive |= LIVE_CPU_REG_A;
mIns[i + 0].mLive |= mIns[i + 1].mLive;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
progress = true;
}
@ -22860,7 +22873,23 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
mIns[i + 0].mType == ASMIT_TAY &&
mIns[i + 1].mType == ASMIT_TYA)
{
mIns[i + 0].mLive |= LIVE_CPU_REG_A;
mIns[i + 0].mLive |= mIns[i + 1].mLive;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
progress = true;
}
else if (
mIns[i + 0].mType == ASMIT_INY &&
mIns[i + 1].mType == ASMIT_TYA && !(mIns[i + 1].mLive & LIVE_CPU_REG_A))
{
mIns[i + 0].mLive |= mIns[i + 1].mLive;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
progress = true;
}
else if (
mIns[i + 0].mType == ASMIT_INX &&
mIns[i + 1].mType == ASMIT_TXA && !(mIns[i + 1].mLive & LIVE_CPU_REG_A))
{
mIns[i + 0].mLive |= mIns[i + 1].mLive;
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
progress = true;
}

View File

@ -14,6 +14,7 @@ Parser::Parser(Errors* errors, Scanner* scanner, CompilationUnits* compilationUn
mBSSection = compilationUnits->mSectionBSS;
mUnrollLoop = 0;
mUnrollLoopPage = false;
for (int i = 0; i < 256; i++)
mCharMap[i] = i;
@ -2126,7 +2127,9 @@ Expression* Parser::ParseStatement(void)
exp = new Expression(mScanner->mLocation, EX_FOR);
int unrollLoop = mUnrollLoop;
bool unrollPage = mUnrollLoopPage;
mUnrollLoop = 0;
mUnrollLoopPage = false;
Expression* initExp = nullptr, * iterateExp = nullptr, * conditionExp = nullptr, * bodyExp = nullptr, * finalExp = nullptr;
@ -2163,16 +2166,92 @@ Expression* Parser::ParseStatement(void)
if (unrollLoop > 1 && initExp && iterateExp && conditionExp)
{
if ((initExp->mType == EX_ASSIGNMENT || initExp->mType == EX_INITIALIZATION) && initExp->mLeft->mType == EX_VARIABLE && initExp->mRight->mType == EX_CONSTANT &&
(iterateExp->mType == EX_POSTINCDEC || iterateExp->mType == EX_PREINCDEC) && iterateExp->mLeft->IsSame(initExp->mLeft) &&
(iterateExp->mType == EX_POSTINCDEC || iterateExp->mType == EX_PREINCDEC || iterateExp->mType == EX_ASSIGNMENT && iterateExp->mToken == TK_ASSIGN_ADD && iterateExp->mRight->mType == EX_CONSTANT) &&
iterateExp->mLeft->IsSame(initExp->mLeft) &&
conditionExp->mType == EX_RELATIONAL && conditionExp->mToken == TK_LESS_THAN && conditionExp->mLeft->IsSame(initExp->mLeft) && conditionExp->mRight->mType == EX_CONSTANT)
{
if (initExp->mRight->mDecValue->mType == DT_CONST_INTEGER && conditionExp->mRight->mDecValue->mType == DT_CONST_INTEGER)
{
int startValue = initExp->mRight->mDecValue->mInteger;
int endValue = conditionExp->mRight->mDecValue->mInteger;
int stepValue = 1;
if (iterateExp->mType == EX_ASSIGNMENT)
stepValue = iterateExp->mRight->mDecValue->mInteger;
int remain = (endValue - startValue) % unrollLoop;
endValue -= remain;
if (unrollPage)
{
int numLoops = (endValue - startValue + 255) / 256;
int numIterations = (endValue - startValue) / numLoops;
int stride = (endValue - startValue + numLoops - 1) / numLoops;
int remain = (endValue - startValue) - numIterations * numLoops;
Expression* unrollBody = new Expression(mScanner->mLocation, EX_SEQUENCE);
unrollBody->mLeft = bodyExp;
Expression* bexp = unrollBody;
Expression* iexp = new Expression(mScanner->mLocation, EX_ASSIGNMENT);
iexp->mToken = TK_ASSIGN_ADD;
iexp->mLeft = iterateExp->mLeft;
iexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT);
Declaration * idec = new Declaration(mScanner->mLocation, DT_CONST_INTEGER);
idec->mInteger = stride;
idec->mBase = TheSignedIntTypeDeclaration;
iexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT);
iexp->mRight->mDecValue = idec;
iexp->mRight->mDecType = idec->mBase;
Expression* dexp = new Expression(mScanner->mLocation, EX_ASSIGNMENT);
dexp->mToken = TK_ASSIGN_SUB;
dexp->mLeft = iterateExp->mLeft;
dexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT);
Declaration* ddec = new Declaration(mScanner->mLocation, DT_CONST_INTEGER);
ddec->mInteger = stride * (numLoops - 1);
ddec->mBase = TheSignedIntTypeDeclaration;
dexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT);
dexp->mRight->mDecValue = ddec;
dexp->mRight->mDecType = ddec->mBase;
for (int i = 1; i < numLoops; i++)
{
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
bexp = bexp->mRight;
bexp->mLeft = iexp;
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
bexp = bexp->mRight;
bexp->mLeft = bodyExp;
}
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
bexp = bexp->mRight;
bexp->mLeft = dexp;
conditionExp->mRight->mDecValue->mInteger = numIterations;
if (remain)
{
finalExp = new Expression(mScanner->mLocation, EX_SEQUENCE);
finalExp->mLeft = bodyExp;
Expression* bexp = finalExp;
for (int i = 1; i < remain; i++)
{
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
bexp = bexp->mRight;
bexp->mLeft = iexp;
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
bexp = bexp->mRight;
bexp->mLeft = bodyExp;
}
}
bodyExp = unrollBody;
}
else
{
int numSteps = (endValue - startValue) / stepValue;
int remain = numSteps % unrollLoop;
endValue -= remain * stepValue;
conditionExp->mRight->mDecValue->mInteger = endValue;
@ -2211,6 +2290,7 @@ Expression* Parser::ParseStatement(void)
bodyExp = unrollBody;
}
}
else
mErrors->Error(exp->mLocation, EWARN_LOOP_UNROLL_IGNORED, "Loop unroll ignored, bounds and step not integer");
}
@ -3672,6 +3752,8 @@ void Parser::ParsePragma(void)
mScanner->NextToken();
ConsumeToken(TK_OPEN_PARENTHESIS);
mUnrollLoopPage = false;
if (mScanner->mToken == TK_INTEGER)
{
mUnrollLoop = mScanner->mTokenInteger;
@ -3682,6 +3764,12 @@ void Parser::ParsePragma(void)
mUnrollLoop = 0x10000;
mScanner->NextToken();
}
else if (mScanner->mToken == TK_IDENT && !strcmp(mScanner->mTokenIdent->mString, "page"))
{
mUnrollLoop = 0x10000;
mUnrollLoopPage = true;
mScanner->NextToken();
}
else
mErrors->Error(mScanner->mLocation, EERR_PRAGMA_PARAMETER, "Integer literal expected");

View File

@ -25,6 +25,7 @@ protected:
char mCharMap[256];
int mUnrollLoop;
bool mUnrollLoopPage;
uint8* ParseStringLiteral(int msize);