Add page level unrolling
This commit is contained in:
parent
4d274f46d7
commit
b9396542f0
21
README.md
21
README.md
|
@ -248,6 +248,27 @@ The following code scrolls the screen to the left, and completely unrolls the in
|
|||
screen[y][x] = screen[y][x + 1];
|
||||
}
|
||||
|
||||
Sometimes it is better to unroll the loop not in the order it normaly executes, but using page size chunks. When e.g. filling a C64 screen with a loop:
|
||||
|
||||
for(int i=0; i<1000; i++)
|
||||
Screen[i] = ' ';
|
||||
|
||||
Unrolling this loop would not help, the index would still not fit into the 8 bit x or y register. Using a page level unroll, the compiler will unroll the loop into four stores, each 250 bytes appart, and use the y or x register for indexing:
|
||||
|
||||
#pragma unroll(page)
|
||||
for(int i=0; i<1000; i++)
|
||||
Screen[i] = ' ';
|
||||
|
||||
0921 LDY #$00
|
||||
0923 LDA #$20
|
||||
0925 STA $0400,y
|
||||
0928 STA $04fa,y
|
||||
092b STA $05f4,y
|
||||
092e STA $06ee,y
|
||||
0931 INY
|
||||
0932 CPY #$fa
|
||||
0934 BCC $0925
|
||||
|
||||
|
||||
### Marking functions as native
|
||||
|
||||
|
|
|
@ -7343,16 +7343,42 @@ bool InterCodeBasicBlock::SimplifyIntegerNumeric(const GrowingInstructionPtrArra
|
|||
{
|
||||
InterInstruction* pins = ltvalue[ins->mSrc[0].mTemp];
|
||||
|
||||
if (ins->mSrc[0].mMemory == IM_INDIRECT && pins->mCode == IC_LEA)
|
||||
if (pins->mCode == IC_LEA)
|
||||
{
|
||||
if (ins->mSrc[0].mMemory == IM_INDIRECT)
|
||||
ins->mSrc[0].mLinkerObject = pins->mSrc[1].mLinkerObject;
|
||||
|
||||
if (pins->mCode == IC_LEA && pins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst + pins->mSrc[0].mIntConst >= 0)
|
||||
if (pins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst + pins->mSrc[0].mIntConst >= 0)
|
||||
{
|
||||
ins->mSrc[0].Forward(pins->mSrc[1]);
|
||||
pins->mSrc[1].mFinal = false;
|
||||
ins->mSrc[0].mIntConst += pins->mSrc[0].mIntConst;
|
||||
changed = true;
|
||||
}
|
||||
else if (pins->mSrc[1].mTemp < 0 && pins->mSrc[0].mTemp >= 0 && ins->mSrc[0].mIntConst && (ins->mSrc[0].mIntConst >= 256 || pins->mSrc[0].IsUByte()))
|
||||
{
|
||||
int k = mInstructions.IndexOf(pins);
|
||||
if (k >= 0)
|
||||
{
|
||||
if (spareTemps + 2 >= ltvalue.Size())
|
||||
return true;
|
||||
|
||||
InterInstruction* nins = new InterInstruction();
|
||||
nins->mCode = IC_LEA;
|
||||
nins->mSrc[0].Forward(pins->mSrc[0]);
|
||||
nins->mSrc[1].ForwardMem(pins->mSrc[1]);
|
||||
nins->mSrc[1].mIntConst += ins->mSrc[0].mIntConst;
|
||||
nins->mDst.mTemp = spareTemps++;
|
||||
nins->mDst.mType = IT_POINTER;
|
||||
nins->mDst.mRange = ins->mDst.mRange;
|
||||
ins->mSrc[0].mIntConst = 0;
|
||||
ins->mSrc[0].mTemp = nins->mDst.mTemp;
|
||||
|
||||
mInstructions.Insert(k + 1, nins);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
|
@ -10504,6 +10530,8 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
|
|||
{
|
||||
if (cins->mCode == IC_LEA && cins->mSrc[1].mTemp == st && cins->mSrc[0].mTemp < 0)
|
||||
toffset += cins->mSrc[0].mIntConst;
|
||||
else if (cins->mCode == IC_BINARY_OPERATOR && cins->mOperator == IA_ADD && cins->mSrc[1].mTemp == st && cins->mSrc[0].mTemp < 0)
|
||||
toffset += cins->mSrc[0].mIntConst;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
@ -10525,6 +10553,16 @@ void InterCodeBasicBlock::SingleBlockLoopOptimisation(const NumberSet& aliasedPa
|
|||
ins->mSrc[0].mType = IT_INT16;
|
||||
ins->mSrc[0].mIntConst = -toffset;
|
||||
}
|
||||
else
|
||||
{
|
||||
ins->mCode = IC_BINARY_OPERATOR;
|
||||
ins->mOperator = IA_ADD;
|
||||
ins->mNumOperands = 2;
|
||||
ins->mSrc[1] = ins->mSrc[0];
|
||||
ins->mSrc[0].mTemp = -1;
|
||||
ins->mSrc[0].mType = IT_INT16;
|
||||
ins->mSrc[0].mIntConst = -toffset;
|
||||
}
|
||||
}
|
||||
tailBlock->mInstructions.Insert(0, ins);
|
||||
mInstructions.Remove(i);
|
||||
|
|
|
@ -9778,6 +9778,19 @@ void NativeCodeBasicBlock::RelationalOperator(InterCodeProcedure* proc, const In
|
|||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if 1
|
||||
// Special case counting to 256
|
||||
else if (ins->mSrc[0].mTemp < 0 && ins->mSrc[0].mIntConst == 256 && op == IA_CMPLU &&
|
||||
ins->mSrc[1].mRange.mMinState == IntegerValueRange::S_BOUND && ins->mSrc[1].mRange.mMinValue > 0 &&
|
||||
ins->mSrc[1].mRange.mMaxState == IntegerValueRange::S_BOUND && ins->mSrc[1].mRange.mMaxValue == 256)
|
||||
{
|
||||
if (ins->mSrc[1].mTemp < 0)
|
||||
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_IMMEDIATE, ins->mSrc[1].mIntConst & 0xff));
|
||||
else
|
||||
mIns.Push(NativeCodeInstruction(ASMIT_LDA, ASMIM_ZERO_PAGE, BC_REG_TMP + proc->mTempOffset[ins->mSrc[1].mTemp]));
|
||||
this->Close(falseJump, trueJump, ASMIT_BEQ);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
|
@ -19229,7 +19242,7 @@ bool NativeCodeBasicBlock::OptimizeSimpleLoopInvariant(NativeCodeProcedure* proc
|
|||
changed = true;
|
||||
}
|
||||
}
|
||||
else if (mIns[ai].mType == ASMIT_LDY && mIns[ai].mMode == ASMIM_ZERO_PAGE)
|
||||
else if (mIns[ai].mType == ASMIT_LDY && mIns[ai].mMode == ASMIM_ZERO_PAGE && !(mIns[ai].mLive & LIVE_CPU_REG_Z))
|
||||
{
|
||||
int i = 0;
|
||||
while (i < mIns.Size() && (i == ai || !mIns[i].ChangesYReg()))
|
||||
|
@ -21811,7 +21824,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
|
|||
if (MoveStoreXUp(i))
|
||||
changed = true;
|
||||
}
|
||||
else if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_MEM))
|
||||
else if (mIns[i].mType == ASMIT_LDX && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & (LIVE_MEM | LIVE_CPU_REG_Z)))
|
||||
{
|
||||
if (MoveLoadXUp(i))
|
||||
changed = true;
|
||||
|
@ -21824,7 +21837,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
|
|||
changed = true;
|
||||
}
|
||||
}
|
||||
else if (mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & LIVE_MEM))
|
||||
else if (mIns[i].mType == ASMIT_LDY && mIns[i].mMode == ASMIM_ZERO_PAGE && !(mIns[i].mLive & (LIVE_MEM | LIVE_CPU_REG_Z)))
|
||||
{
|
||||
if (MoveLoadYUp(i))
|
||||
changed = true;
|
||||
|
@ -22836,7 +22849,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
|
|||
mIns[i + 0].mType == ASMIT_TXA &&
|
||||
mIns[i + 1].mType == ASMIT_TAX)
|
||||
{
|
||||
mIns[i + 0].mLive |= LIVE_CPU_REG_X;
|
||||
mIns[i + 0].mLive |= mIns[i + 1].mLive;
|
||||
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
|
||||
progress = true;
|
||||
}
|
||||
|
@ -22844,7 +22857,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
|
|||
mIns[i + 0].mType == ASMIT_TYA &&
|
||||
mIns[i + 1].mType == ASMIT_TAY)
|
||||
{
|
||||
mIns[i + 0].mLive |= LIVE_CPU_REG_Y;
|
||||
mIns[i + 0].mLive |= mIns[i + 1].mLive;
|
||||
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
|
||||
progress = true;
|
||||
}
|
||||
|
@ -22852,7 +22865,7 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
|
|||
mIns[i + 0].mType == ASMIT_TAX &&
|
||||
mIns[i + 1].mType == ASMIT_TXA)
|
||||
{
|
||||
mIns[i + 0].mLive |= LIVE_CPU_REG_A;
|
||||
mIns[i + 0].mLive |= mIns[i + 1].mLive;
|
||||
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
|
||||
progress = true;
|
||||
}
|
||||
|
@ -22860,7 +22873,23 @@ bool NativeCodeBasicBlock::PeepHoleOptimizer(NativeCodeProcedure* proc, int pass
|
|||
mIns[i + 0].mType == ASMIT_TAY &&
|
||||
mIns[i + 1].mType == ASMIT_TYA)
|
||||
{
|
||||
mIns[i + 0].mLive |= LIVE_CPU_REG_A;
|
||||
mIns[i + 0].mLive |= mIns[i + 1].mLive;
|
||||
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
|
||||
progress = true;
|
||||
}
|
||||
else if (
|
||||
mIns[i + 0].mType == ASMIT_INY &&
|
||||
mIns[i + 1].mType == ASMIT_TYA && !(mIns[i + 1].mLive & LIVE_CPU_REG_A))
|
||||
{
|
||||
mIns[i + 0].mLive |= mIns[i + 1].mLive;
|
||||
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
|
||||
progress = true;
|
||||
}
|
||||
else if (
|
||||
mIns[i + 0].mType == ASMIT_INX &&
|
||||
mIns[i + 1].mType == ASMIT_TXA && !(mIns[i + 1].mLive & LIVE_CPU_REG_A))
|
||||
{
|
||||
mIns[i + 0].mLive |= mIns[i + 1].mLive;
|
||||
mIns[i + 1].mType = ASMIT_NOP; mIns[i + 1].mMode = ASMIM_IMPLIED;
|
||||
progress = true;
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ Parser::Parser(Errors* errors, Scanner* scanner, CompilationUnits* compilationUn
|
|||
mBSSection = compilationUnits->mSectionBSS;
|
||||
|
||||
mUnrollLoop = 0;
|
||||
mUnrollLoopPage = false;
|
||||
|
||||
for (int i = 0; i < 256; i++)
|
||||
mCharMap[i] = i;
|
||||
|
@ -2126,7 +2127,9 @@ Expression* Parser::ParseStatement(void)
|
|||
exp = new Expression(mScanner->mLocation, EX_FOR);
|
||||
|
||||
int unrollLoop = mUnrollLoop;
|
||||
bool unrollPage = mUnrollLoopPage;
|
||||
mUnrollLoop = 0;
|
||||
mUnrollLoopPage = false;
|
||||
|
||||
Expression* initExp = nullptr, * iterateExp = nullptr, * conditionExp = nullptr, * bodyExp = nullptr, * finalExp = nullptr;
|
||||
|
||||
|
@ -2163,16 +2166,92 @@ Expression* Parser::ParseStatement(void)
|
|||
if (unrollLoop > 1 && initExp && iterateExp && conditionExp)
|
||||
{
|
||||
if ((initExp->mType == EX_ASSIGNMENT || initExp->mType == EX_INITIALIZATION) && initExp->mLeft->mType == EX_VARIABLE && initExp->mRight->mType == EX_CONSTANT &&
|
||||
(iterateExp->mType == EX_POSTINCDEC || iterateExp->mType == EX_PREINCDEC) && iterateExp->mLeft->IsSame(initExp->mLeft) &&
|
||||
(iterateExp->mType == EX_POSTINCDEC || iterateExp->mType == EX_PREINCDEC || iterateExp->mType == EX_ASSIGNMENT && iterateExp->mToken == TK_ASSIGN_ADD && iterateExp->mRight->mType == EX_CONSTANT) &&
|
||||
iterateExp->mLeft->IsSame(initExp->mLeft) &&
|
||||
conditionExp->mType == EX_RELATIONAL && conditionExp->mToken == TK_LESS_THAN && conditionExp->mLeft->IsSame(initExp->mLeft) && conditionExp->mRight->mType == EX_CONSTANT)
|
||||
{
|
||||
if (initExp->mRight->mDecValue->mType == DT_CONST_INTEGER && conditionExp->mRight->mDecValue->mType == DT_CONST_INTEGER)
|
||||
{
|
||||
int startValue = initExp->mRight->mDecValue->mInteger;
|
||||
int endValue = conditionExp->mRight->mDecValue->mInteger;
|
||||
int stepValue = 1;
|
||||
if (iterateExp->mType == EX_ASSIGNMENT)
|
||||
stepValue = iterateExp->mRight->mDecValue->mInteger;
|
||||
|
||||
int remain = (endValue - startValue) % unrollLoop;
|
||||
endValue -= remain;
|
||||
if (unrollPage)
|
||||
{
|
||||
int numLoops = (endValue - startValue + 255) / 256;
|
||||
int numIterations = (endValue - startValue) / numLoops;
|
||||
int stride = (endValue - startValue + numLoops - 1) / numLoops;
|
||||
int remain = (endValue - startValue) - numIterations * numLoops;
|
||||
|
||||
Expression* unrollBody = new Expression(mScanner->mLocation, EX_SEQUENCE);
|
||||
unrollBody->mLeft = bodyExp;
|
||||
Expression* bexp = unrollBody;
|
||||
|
||||
Expression* iexp = new Expression(mScanner->mLocation, EX_ASSIGNMENT);
|
||||
iexp->mToken = TK_ASSIGN_ADD;
|
||||
iexp->mLeft = iterateExp->mLeft;
|
||||
iexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT);
|
||||
|
||||
Declaration * idec = new Declaration(mScanner->mLocation, DT_CONST_INTEGER);
|
||||
idec->mInteger = stride;
|
||||
idec->mBase = TheSignedIntTypeDeclaration;
|
||||
iexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT);
|
||||
iexp->mRight->mDecValue = idec;
|
||||
iexp->mRight->mDecType = idec->mBase;
|
||||
|
||||
Expression* dexp = new Expression(mScanner->mLocation, EX_ASSIGNMENT);
|
||||
dexp->mToken = TK_ASSIGN_SUB;
|
||||
dexp->mLeft = iterateExp->mLeft;
|
||||
dexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT);
|
||||
|
||||
Declaration* ddec = new Declaration(mScanner->mLocation, DT_CONST_INTEGER);
|
||||
ddec->mInteger = stride * (numLoops - 1);
|
||||
ddec->mBase = TheSignedIntTypeDeclaration;
|
||||
dexp->mRight = new Expression(mScanner->mLocation, EX_CONSTANT);
|
||||
dexp->mRight->mDecValue = ddec;
|
||||
dexp->mRight->mDecType = ddec->mBase;
|
||||
|
||||
for (int i = 1; i < numLoops; i++)
|
||||
{
|
||||
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
|
||||
bexp = bexp->mRight;
|
||||
bexp->mLeft = iexp;
|
||||
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
|
||||
bexp = bexp->mRight;
|
||||
bexp->mLeft = bodyExp;
|
||||
}
|
||||
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
|
||||
bexp = bexp->mRight;
|
||||
bexp->mLeft = dexp;
|
||||
|
||||
conditionExp->mRight->mDecValue->mInteger = numIterations;
|
||||
|
||||
if (remain)
|
||||
{
|
||||
finalExp = new Expression(mScanner->mLocation, EX_SEQUENCE);
|
||||
finalExp->mLeft = bodyExp;
|
||||
Expression* bexp = finalExp;
|
||||
|
||||
for (int i = 1; i < remain; i++)
|
||||
{
|
||||
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
|
||||
bexp = bexp->mRight;
|
||||
bexp->mLeft = iexp;
|
||||
bexp->mRight = new Expression(mScanner->mLocation, EX_SEQUENCE);
|
||||
bexp = bexp->mRight;
|
||||
bexp->mLeft = bodyExp;
|
||||
}
|
||||
}
|
||||
|
||||
bodyExp = unrollBody;
|
||||
}
|
||||
else
|
||||
{
|
||||
int numSteps = (endValue - startValue) / stepValue;
|
||||
int remain = numSteps % unrollLoop;
|
||||
endValue -= remain * stepValue;
|
||||
|
||||
conditionExp->mRight->mDecValue->mInteger = endValue;
|
||||
|
||||
|
@ -2211,6 +2290,7 @@ Expression* Parser::ParseStatement(void)
|
|||
|
||||
bodyExp = unrollBody;
|
||||
}
|
||||
}
|
||||
else
|
||||
mErrors->Error(exp->mLocation, EWARN_LOOP_UNROLL_IGNORED, "Loop unroll ignored, bounds and step not integer");
|
||||
}
|
||||
|
@ -3672,6 +3752,8 @@ void Parser::ParsePragma(void)
|
|||
mScanner->NextToken();
|
||||
ConsumeToken(TK_OPEN_PARENTHESIS);
|
||||
|
||||
mUnrollLoopPage = false;
|
||||
|
||||
if (mScanner->mToken == TK_INTEGER)
|
||||
{
|
||||
mUnrollLoop = mScanner->mTokenInteger;
|
||||
|
@ -3682,6 +3764,12 @@ void Parser::ParsePragma(void)
|
|||
mUnrollLoop = 0x10000;
|
||||
mScanner->NextToken();
|
||||
}
|
||||
else if (mScanner->mToken == TK_IDENT && !strcmp(mScanner->mTokenIdent->mString, "page"))
|
||||
{
|
||||
mUnrollLoop = 0x10000;
|
||||
mUnrollLoopPage = true;
|
||||
mScanner->NextToken();
|
||||
}
|
||||
else
|
||||
mErrors->Error(mScanner->mLocation, EERR_PRAGMA_PARAMETER, "Integer literal expected");
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ protected:
|
|||
|
||||
char mCharMap[256];
|
||||
int mUnrollLoop;
|
||||
bool mUnrollLoopPage;
|
||||
|
||||
uint8* ParseStringLiteral(int msize);
|
||||
|
||||
|
|
Loading…
Reference in New Issue