Karol Herbst
2018-12-07 08:57:06 UTC
opnd() might delete the passed in instruction, but it's used through
i->srcExists() later in visit
Signed-off-by: Karol Herbst <***@redhat.com>
---
.../nouveau/codegen/nv50_ir_peephole.cpp | 71 +++++++++++--------
1 file changed, 43 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 9524ba63654..d69ceaafd73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -370,7 +370,8 @@ private:
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
- void opnd(Instruction *, ImmediateValue&, int s);
+ /* true if i was deleted */
+ bool opnd(Instruction *i, ImmediateValue&, int s);
void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -421,11 +422,13 @@ ConstantFolding::visit(BasicBlock *bb)
i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
expr(i, src0, src1);
else
- if (i->srcExists(0) && i->src(0).getImmediate(src0))
- opnd(i, src0, 0);
- else
+ if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
+ if (opnd(i, src0, 0))
+ return true;
+ } else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
- opnd(i, src1, 1);
+ if (opnd(i, src1, 1))
+ return true;
if (i->srcExists(2) && i->src(2).getImmediate(src2))
opnd3(i, src2);
}
@@ -1011,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *
return false;
}
-void
+bool
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
const operation op = i->op;
Instruction *newi = i;
+ bool deleted = false;
switch (i->op) {
case OP_SPLIT: {
@@ -1036,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
val >>= bitsize;
}
delete_Instruction(prog, i);
+ deleted = true;
break;
}
case OP_MUL:
@@ -1050,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
TYPE_S32, i->getSrc(t), bld.mkImm(0));
delete_Instruction(prog, i);
+ deleted = true;
} else if (imm0.isInteger(0) || imm0.isInteger(1)) {
// The high bits can't be set in this case (either mul by 0 or
// unsigned by 1)
@@ -1101,8 +1107,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->src(t).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
} else
if (i->postFactor && i->sType == TYPE_F32) {
/* Can't emit a postfactor with an immediate, have to fold it in */
@@ -1139,8 +1147,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2)))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
}
break;
case OP_SUB:
@@ -1210,6 +1220,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
delete_Instruction(prog, i);
+ deleted = true;
} else
if (imm0.reg.data.s32 == -1) {
i->op = OP_NEG;
@@ -1242,6 +1253,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1273,6 +1285,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);
delete_Instruction(prog, i);
+ deleted = true;
}
} else if (s == 1) {
// In this case, we still want the optimized lowering that we get
@@ -1289,6 +1302,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1301,7 +1315,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
if (imm0.reg.data.u32 != 0 || !si)
- return;
+ return false;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
// We do everything assuming var (cmp) 0, reverse the condition if 0 is
@@ -1327,7 +1341,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case CC_GT: break; // bool > 0 -- bool
case CC_NE: break; // bool != 0 -- bool
default:
- return;
+ return false;
}
// Update the condition of this SET to be identical to the origin set,
@@ -1362,13 +1376,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
} else if (src->asCmp()) {
CmpInstruction *cmp = src->asCmp();
if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
- return;
+ return false;
if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
- return;
+ return false;
if (imm0.reg.data.f32 != 1.0)
- return;
+ return false;
if (cmp->dType != TYPE_U32)
- return;
+ return false;
cmp->dType = TYPE_F32;
if (i->src(t).mod != Modifier(0)) {
@@ -1435,13 +1449,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_MUL:
int muls;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->src(1).getImmediate(imm1))
muls = 1;
else if (si->src(0).getImmediate(imm1))
muls = 0;
else
- return;
+ return false;
bld.setPosition(i, false);
i->op = OP_MUL;
@@ -1452,15 +1466,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_ADD:
int adds;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->op != OP_SUB && si->src(0).getImmediate(imm1))
adds = 0;
else if (si->src(1).getImmediate(imm1))
adds = 1;
else
- return;
+ return false;
if (si->src(!adds).mod != Modifier(0))
- return;
+ return false;
// SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))
// This is more operations, but if one of x, y is an immediate, then
@@ -1475,7 +1489,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkImm(imm0.reg.data.u32)));
break;
default:
- return;
+ return false;
}
}
break;
@@ -1500,7 +1514,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
default:
- return;
+ return false;
}
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
res = 31 - res;
@@ -1526,11 +1540,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
// TODO: handle 64-bit values properly
if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
- return;
+ return false;
// TODO: handle single byte/word extractions
if (i->subOp)
- return;
+ return false;
bld.setPosition(i, true); /* make sure bld is init'ed */
@@ -1567,7 +1581,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CLAMP(imm0.reg.data.u16, umin, umax) : \
imm0.reg.data.u16; \
break; \
- default: return; \
+ default: return false; \
} \
i->setSrc(0, bld.mkImm(res.data.dst)); \
break
@@ -1594,7 +1608,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
default:
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f32));
break;
@@ -1615,12 +1629,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
default:
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f64));
break;
default:
- return;
+ return false;
}
#undef CASE
@@ -1631,7 +1645,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
break;
}
default:
- return;
+ return false;
}
// This can get left behind some of the optimizations which simplify
@@ -1646,6 +1660,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (newi->op != op)
foldCount++;
+ return deleted;
}
// =============================================================================
i->srcExists() later in visit
Signed-off-by: Karol Herbst <***@redhat.com>
---
.../nouveau/codegen/nv50_ir_peephole.cpp | 71 +++++++++++--------
1 file changed, 43 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 9524ba63654..d69ceaafd73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -370,7 +370,8 @@ private:
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
- void opnd(Instruction *, ImmediateValue&, int s);
+ /* true if i was deleted */
+ bool opnd(Instruction *i, ImmediateValue&, int s);
void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -421,11 +422,13 @@ ConstantFolding::visit(BasicBlock *bb)
i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
expr(i, src0, src1);
else
- if (i->srcExists(0) && i->src(0).getImmediate(src0))
- opnd(i, src0, 0);
- else
+ if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
+ if (opnd(i, src0, 0))
+ return true;
+ } else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
- opnd(i, src1, 1);
+ if (opnd(i, src1, 1))
+ return true;
if (i->srcExists(2) && i->src(2).getImmediate(src2))
opnd3(i, src2);
}
@@ -1011,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *
return false;
}
-void
+bool
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
const operation op = i->op;
Instruction *newi = i;
+ bool deleted = false;
switch (i->op) {
case OP_SPLIT: {
@@ -1036,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
val >>= bitsize;
}
delete_Instruction(prog, i);
+ deleted = true;
break;
}
case OP_MUL:
@@ -1050,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
TYPE_S32, i->getSrc(t), bld.mkImm(0));
delete_Instruction(prog, i);
+ deleted = true;
} else if (imm0.isInteger(0) || imm0.isInteger(1)) {
// The high bits can't be set in this case (either mul by 0 or
// unsigned by 1)
@@ -1101,8 +1107,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->src(t).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
} else
if (i->postFactor && i->sType == TYPE_F32) {
/* Can't emit a postfactor with an immediate, have to fold it in */
@@ -1139,8 +1147,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2)))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
}
break;
case OP_SUB:
@@ -1210,6 +1220,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
delete_Instruction(prog, i);
+ deleted = true;
} else
if (imm0.reg.data.s32 == -1) {
i->op = OP_NEG;
@@ -1242,6 +1253,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1273,6 +1285,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);
delete_Instruction(prog, i);
+ deleted = true;
}
} else if (s == 1) {
// In this case, we still want the optimized lowering that we get
@@ -1289,6 +1302,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1301,7 +1315,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
if (imm0.reg.data.u32 != 0 || !si)
- return;
+ return false;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
// We do everything assuming var (cmp) 0, reverse the condition if 0 is
@@ -1327,7 +1341,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case CC_GT: break; // bool > 0 -- bool
case CC_NE: break; // bool != 0 -- bool
default:
- return;
+ return false;
}
// Update the condition of this SET to be identical to the origin set,
@@ -1362,13 +1376,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
} else if (src->asCmp()) {
CmpInstruction *cmp = src->asCmp();
if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
- return;
+ return false;
if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
- return;
+ return false;
if (imm0.reg.data.f32 != 1.0)
- return;
+ return false;
if (cmp->dType != TYPE_U32)
- return;
+ return false;
cmp->dType = TYPE_F32;
if (i->src(t).mod != Modifier(0)) {
@@ -1435,13 +1449,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_MUL:
int muls;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->src(1).getImmediate(imm1))
muls = 1;
else if (si->src(0).getImmediate(imm1))
muls = 0;
else
- return;
+ return false;
bld.setPosition(i, false);
i->op = OP_MUL;
@@ -1452,15 +1466,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_ADD:
int adds;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->op != OP_SUB && si->src(0).getImmediate(imm1))
adds = 0;
else if (si->src(1).getImmediate(imm1))
adds = 1;
else
- return;
+ return false;
if (si->src(!adds).mod != Modifier(0))
- return;
+ return false;
// SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))
// This is more operations, but if one of x, y is an immediate, then
@@ -1475,7 +1489,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkImm(imm0.reg.data.u32)));
break;
default:
- return;
+ return false;
}
}
break;
@@ -1500,7 +1514,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
default:
- return;
+ return false;
}
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
res = 31 - res;
@@ -1526,11 +1540,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
// TODO: handle 64-bit values properly
if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
- return;
+ return false;
// TODO: handle single byte/word extractions
if (i->subOp)
- return;
+ return false;
bld.setPosition(i, true); /* make sure bld is init'ed */
@@ -1567,7 +1581,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CLAMP(imm0.reg.data.u16, umin, umax) : \
imm0.reg.data.u16; \
break; \
- default: return; \
+ default: return false; \
} \
i->setSrc(0, bld.mkImm(res.data.dst)); \
break
@@ -1594,7 +1608,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
default:
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f32));
break;
@@ -1615,12 +1629,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
default:
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f64));
break;
default:
- return;
+ return false;
}
#undef CASE
@@ -1631,7 +1645,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
break;
}
default:
- return;
+ return false;
}
// This can get left behind some of the optimizations which simplify
@@ -1646,6 +1660,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (newi->op != op)
foldCount++;
+ return deleted;
}
// =============================================================================
--
2.19.2
2.19.2