Improve strength reduction in loops
This commit is contained in:
parent
d3734a66e0
commit
3460c06508
|
@ -228,6 +228,9 @@ rem @echo off
|
||||||
@call :testn mmultest.c
|
@call :testn mmultest.c
|
||||||
@if %errorlevel% neq 0 goto :error
|
@if %errorlevel% neq 0 goto :error
|
||||||
|
|
||||||
|
@call :test tileexpand.cpp
|
||||||
|
@if %errorlevel% neq 0 goto :error
|
||||||
|
|
||||||
@exit /b 0
|
@exit /b 0
|
||||||
|
|
||||||
:error
|
:error
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define MAP_WIDTH 10
|
||||||
|
#define MAP_HEIGHT 2
|
||||||
|
|
||||||
|
#define TITLE_TILE_WIDTH 4
|
||||||
|
#define TITLE_TILE_HEIGHT 4
|
||||||
|
|
||||||
|
#define PTR_SCREEN ((char *)0xc000)
|
||||||
|
#define PTR_BUFFER ((char *)0xc400)
|
||||||
|
#define PTR_COLOR ((char *)0xd800)
|
||||||
|
#define PTR_FONTCHARSET ((char *)0xd800)
|
||||||
|
|
||||||
|
const char TitleMap[1024] = {
|
||||||
|
#for(i, 1024) i * 17,
|
||||||
|
};
|
||||||
|
|
||||||
|
const char TitleTiles[4096] = {
|
||||||
|
#for(i, 4096) i * 31,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Custom screen address
|
||||||
|
extern char* const Screen = PTR_SCREEN;
|
||||||
|
|
||||||
|
// Color mem address
|
||||||
|
extern char* const Color = PTR_COLOR;
|
||||||
|
|
||||||
|
void RenderLogo(char screenY)
|
||||||
|
{
|
||||||
|
char * sp = Screen;
|
||||||
|
char * cp = Color;
|
||||||
|
const char * mp = TitleMap;
|
||||||
|
|
||||||
|
for(char ty=0; ty < MAP_HEIGHT; ty++)
|
||||||
|
{
|
||||||
|
for(char tx=0; tx< MAP_WIDTH; tx++)
|
||||||
|
{
|
||||||
|
char ti = mp[tx];
|
||||||
|
const char* tp = TitleTiles + (TITLE_TILE_WIDTH * TITLE_TILE_HEIGHT) * ti;
|
||||||
|
|
||||||
|
for(char y=0; y<TITLE_TILE_HEIGHT; y++)
|
||||||
|
{
|
||||||
|
for(char x=0; x<TITLE_TILE_WIDTH; x++)
|
||||||
|
{
|
||||||
|
char c = tp[TITLE_TILE_WIDTH * y + x];
|
||||||
|
sp[40 * (y + screenY) + x] = c;
|
||||||
|
cp[40 * (y + screenY) + x] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sp += TITLE_TILE_WIDTH;
|
||||||
|
cp += TITLE_TILE_WIDTH;
|
||||||
|
}
|
||||||
|
sp += 120;
|
||||||
|
cp += 120;
|
||||||
|
mp += MAP_WIDTH;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void VerifyLogo(char screenY)
|
||||||
|
{
|
||||||
|
for(char dy=0; dy<MAP_HEIGHT * TITLE_TILE_HEIGHT; dy++)
|
||||||
|
{
|
||||||
|
for(char dx=0; dx<MAP_WIDTH * TITLE_TILE_WIDTH; dx++)
|
||||||
|
{
|
||||||
|
char ty = dy / TITLE_TILE_HEIGHT, iy = dy % TITLE_TILE_HEIGHT;
|
||||||
|
char tx = dx / TITLE_TILE_WIDTH, ix = dx % TITLE_TILE_WIDTH;
|
||||||
|
|
||||||
|
int si = TitleMap[MAP_WIDTH * ty + tx] * TITLE_TILE_WIDTH * TITLE_TILE_HEIGHT + TITLE_TILE_WIDTH * iy + ix;
|
||||||
|
int di = 40 * (dy + screenY) + dx;
|
||||||
|
|
||||||
|
assert(Screen[di] == TitleTiles[si]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
RenderLogo(1);
|
||||||
|
VerifyLogo(1);
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -14316,7 +14316,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
|
||||||
|
|
||||||
ai->mSrc[1].mRange.SetLimit(-num, -1);
|
ai->mSrc[1].mRange.SetLimit(-num, -1);
|
||||||
ai->mDst.mRange.SetLimit(-num + 1, 0);
|
ai->mDst.mRange.SetLimit(-num + 1, 0);
|
||||||
ci->mSrc[1].mRange.SetLimit(- num + 1, 0);
|
ci->mSrc[1].mRange.SetLimit(-num + 1, 0);
|
||||||
}
|
}
|
||||||
else if (num > 0)
|
else if (num > 0)
|
||||||
{
|
{
|
||||||
|
@ -14396,16 +14396,17 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
|
||||||
if (!modified)
|
if (!modified)
|
||||||
{
|
{
|
||||||
int tz = tail->mInstructions.Size();
|
int tz = tail->mInstructions.Size();
|
||||||
if (tz > 2)
|
for (int i = 0; i < tz; i++)
|
||||||
{
|
{
|
||||||
InterInstruction* ai = tail->mInstructions[tz - 3];
|
InterInstruction* ai = tail->mInstructions[i];
|
||||||
if (ai->mCode == IC_BINARY_OPERATOR && ai->mOperator == IA_ADD && ai->mSrc[0].mTemp < 0 && ai->mDst.mTemp == ai->mSrc[1].mTemp && ai->mSrc[0].mIntConst > 0 && IsIntegerType(ai->mDst.mType) &&
|
if (ai->mCode == IC_BINARY_OPERATOR && ai->mOperator == IA_ADD && ai->mSrc[0].mTemp < 0 && ai->mDst.mTemp == ai->mSrc[1].mTemp && ai->mSrc[0].mIntConst > 0 && IsIntegerType(ai->mDst.mType) &&
|
||||||
!tail->IsTempModifiedInRange(tz - 1, tz, ai->mDst.mTemp) && !tail->IsTempModifiedInRange(0, tz - 3, ai->mDst.mTemp))
|
!tail->IsTempModifiedInRange(i + 1, tz, ai->mDst.mTemp) &&
|
||||||
|
!tail->IsTempModifiedInRange(0, i - 1, ai->mDst.mTemp))
|
||||||
{
|
{
|
||||||
int i = 0;
|
int j = 0;
|
||||||
while (i + 1 < body.Size() && !body[i]->IsTempModified(ai->mDst.mTemp))
|
while (j + 1 < body.Size() && !body[j]->IsTempModified(ai->mDst.mTemp))
|
||||||
i++;
|
j++;
|
||||||
if (i + 1 == body.Size())
|
if (j + 1 == body.Size())
|
||||||
{
|
{
|
||||||
indexScale[ai->mDst.mTemp] = (int)ai->mSrc[0].mIntConst;
|
indexScale[ai->mDst.mTemp] = (int)ai->mSrc[0].mIntConst;
|
||||||
}
|
}
|
||||||
|
@ -14454,6 +14455,36 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
|
||||||
modified = true;
|
modified = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
else if (lins->mOperator == IA_ADD && lins->mSrc[0].mTemp >= 0 && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body))
|
||||||
|
{
|
||||||
|
if (i + 1 < mInstructions.Size() && mInstructions[i + 1]->mCode == IC_LEA && mInstructions[i + 1]->mSrc[0].mTemp == lins->mDst.mTemp)
|
||||||
|
;
|
||||||
|
else if (lins->mSrc[1].mTemp < 0 || IsLoopInvariantTemp(lins->mSrc[1].mTemp, body))
|
||||||
|
{
|
||||||
|
int s = indexScale[lins->mSrc[0].mTemp];
|
||||||
|
|
||||||
|
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
|
||||||
|
mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp;
|
||||||
|
mEntryRequiredTemps += lins->mDst.mTemp;
|
||||||
|
tail->mExitRequiredTemps += lins->mDst.mTemp;
|
||||||
|
tail->mEntryRequiredTemps += lins->mDst.mTemp;
|
||||||
|
mInstructions.Remove(i);
|
||||||
|
|
||||||
|
InterInstruction* ains = new InterInstruction(lins->mLocation, IC_BINARY_OPERATOR);
|
||||||
|
ains->mOperator = IA_ADD;
|
||||||
|
ains->mDst = lins->mDst;
|
||||||
|
ains->mSrc[1] = lins->mDst;
|
||||||
|
ains->mSrc[0].mType = lins->mDst.mType;
|
||||||
|
ains->mSrc[0].mTemp = -1;
|
||||||
|
ains->mSrc[0].mIntConst = s;
|
||||||
|
tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains);
|
||||||
|
|
||||||
|
indexScale[ains->mDst.mTemp] = s;
|
||||||
|
|
||||||
|
modified = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (lins->mCode == IC_CONVERSION_OPERATOR && lins->mOperator == IA_EXT8TO16U && i + 1 < mInstructions.Size() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body))
|
else if (lins->mCode == IC_CONVERSION_OPERATOR && lins->mOperator == IA_EXT8TO16U && i + 1 < mInstructions.Size() && indexScale[lins->mSrc[0].mTemp] != 0 && IsSingleLoopAssign(i, this, body))
|
||||||
{
|
{
|
||||||
|
@ -14461,7 +14492,7 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
|
||||||
|
|
||||||
if (nins->mCode == IC_BINARY_OPERATOR)
|
if (nins->mCode == IC_BINARY_OPERATOR)
|
||||||
{
|
{
|
||||||
if (nins->mOperator == IA_MUL && nins->mSrc[0].mTemp < 0 && (nins->mDst.IsNotUByte() || !IsSimpleFactor(nins->mSrc[0].mIntConst)) && nins->mSrc[1].mTemp >= 0 && nins->mSrc[1].mFinal && nins->mDst.mTemp && IsSingleLoopAssign(i + 1, this, body))
|
if (nins->mOperator == IA_MUL && nins->mSrc[0].mTemp < 0 && (nins->mDst.IsNotUByte() || !IsSimpleFactor(nins->mSrc[0].mIntConst)) && nins->mSrc[1].mTemp == lins->mDst.mTemp && nins->mSrc[1].mFinal && IsSingleLoopAssign(i + 1, this, body))
|
||||||
{
|
{
|
||||||
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
|
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
|
||||||
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, nins);
|
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, nins);
|
||||||
|
@ -14486,6 +14517,40 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#if 1
|
||||||
|
if (nins->mCode == IC_LEA && nins->mSrc[0].mTemp == lins->mDst.mTemp)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
else if (nins->mCode == IC_BINARY_OPERATOR && nins->mOperator == IA_ADD && nins->mSrc[0].mTemp == lins->mDst.mTemp && !nins->mDst.IsNotUByte())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int s = indexScale[lins->mSrc[0].mTemp];
|
||||||
|
|
||||||
|
mLoopPrefix->mInstructions.Insert(mLoopPrefix->mInstructions.Size() - 1, lins);
|
||||||
|
mLoopPrefix->mExitRequiredTemps += lins->mDst.mTemp;
|
||||||
|
mEntryRequiredTemps += lins->mDst.mTemp;
|
||||||
|
tail->mExitRequiredTemps += lins->mDst.mTemp;
|
||||||
|
tail->mEntryRequiredTemps += lins->mDst.mTemp;
|
||||||
|
mInstructions.Remove(i);
|
||||||
|
|
||||||
|
InterInstruction* ains = new InterInstruction(lins->mLocation, IC_BINARY_OPERATOR);
|
||||||
|
ains->mOperator = IA_ADD;
|
||||||
|
ains->mDst = lins->mDst;
|
||||||
|
ains->mSrc[1] = lins->mDst;
|
||||||
|
ains->mSrc[0].mType = lins->mDst.mType;
|
||||||
|
ains->mSrc[0].mTemp = -1;
|
||||||
|
ains->mSrc[0].mIntConst = s;
|
||||||
|
tail->mInstructions.Insert(tail->mInstructions.Size() - 3, ains);
|
||||||
|
|
||||||
|
indexScale[ains->mDst.mTemp] = s;
|
||||||
|
|
||||||
|
modified = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else if (lins->mCode == IC_LEA)
|
else if (lins->mCode == IC_LEA)
|
||||||
{
|
{
|
||||||
|
@ -14629,6 +14694,31 @@ bool InterCodeBasicBlock::SingleTailLoopOptimization(const NumberSet& aliasedPar
|
||||||
|
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (modified)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < indexScale.Size(); j++)
|
||||||
|
{
|
||||||
|
if (indexScale[j] != 0 && !post->mEntryRequiredTemps[j])
|
||||||
|
{
|
||||||
|
int k = 0;
|
||||||
|
int tz = tail->mInstructions.Size();
|
||||||
|
while (k < tz && tail->mInstructions[k]->mDst.mTemp != j)
|
||||||
|
k++;
|
||||||
|
|
||||||
|
if (k < tz && !tail->IsTempReferencedInRange(0, k - 1, j) && !tail->IsTempReferencedInRange(k + 1, tz, j))
|
||||||
|
{
|
||||||
|
int bi = 0;
|
||||||
|
while (bi + 1 < body.Size() && !body[bi]->IsTempReferenced(j))
|
||||||
|
bi++;
|
||||||
|
if (bi + 1 == body.Size())
|
||||||
|
{
|
||||||
|
tail->mInstructions.Remove(k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17539,8 +17629,13 @@ bool InterCodeBasicBlock::PeepholeReplaceOptimization(const GrowingVariableArray
|
||||||
|
|
||||||
if (mshift < shift)
|
if (mshift < shift)
|
||||||
{
|
{
|
||||||
|
mInstructions[i + 0]->mDst.mRange.mMaxValue <<= mshift;
|
||||||
mInstructions[i + 0]->mSrc[0].mIntConst = shift - mshift;
|
mInstructions[i + 0]->mSrc[0].mIntConst = shift - mshift;
|
||||||
|
|
||||||
|
mInstructions[i + 1]->mSrc[0].mRange.mMaxValue = mInstructions[i + 0]->mDst.mRange.mMaxValue;
|
||||||
|
mInstructions[i + 1]->mDst.mRange.mMaxValue = mInstructions[i + 0]->mDst.mRange.mMaxValue;
|
||||||
mInstructions[i + 1]->mSrc[1].mIntConst = 255ULL >> shift << mshift;
|
mInstructions[i + 1]->mSrc[1].mIntConst = 255ULL >> shift << mshift;
|
||||||
|
mInstructions[i + 2]->mSrc[1].mRange.mMaxValue = mInstructions[i + 0]->mDst.mRange.mMaxValue;
|
||||||
mInstructions[i + 2]->mSrc[0].mIntConst >>= mshift;
|
mInstructions[i + 2]->mSrc[0].mIntConst >>= mshift;
|
||||||
}
|
}
|
||||||
else if (mshift >= shift)
|
else if (mshift >= shift)
|
||||||
|
@ -20685,7 +20780,7 @@ void InterCodeProcedure::Close(void)
|
||||||
{
|
{
|
||||||
GrowingTypeArray tstack(IT_NONE);
|
GrowingTypeArray tstack(IT_NONE);
|
||||||
|
|
||||||
CheckFunc = !strcmp(mIdent->mString, "RenderLogo");
|
CheckFunc = !strcmp(mIdent->mString, "VerifyLogo");
|
||||||
CheckCase = false;
|
CheckCase = false;
|
||||||
|
|
||||||
mEntryBlock = mBlocks[0];
|
mEntryBlock = mBlocks[0];
|
||||||
|
|
Loading…
Reference in New Issue