Discussion:
[Mesa-dev] [PATCH 1/2] nv50/ir: fix use-after-free in
Karol Herbst
2018-12-07 08:57:06 UTC
Permalink
opnd() might delete the passed in instruction, but it's used through
i->srcExists() later in visit

Signed-off-by: Karol Herbst <***@redhat.com>
---
.../nouveau/codegen/nv50_ir_peephole.cpp | 71 +++++++++++--------
1 file changed, 43 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 9524ba63654..d69ceaafd73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -370,7 +370,8 @@ private:

void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
- void opnd(Instruction *, ImmediateValue&, int s);
+ /* true if i was deleted */
+ bool opnd(Instruction *i, ImmediateValue&, int s);
void opnd3(Instruction *, ImmediateValue&);

void unary(Instruction *, const ImmediateValue&);
@@ -421,11 +422,13 @@ ConstantFolding::visit(BasicBlock *bb)
i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
expr(i, src0, src1);
else
- if (i->srcExists(0) && i->src(0).getImmediate(src0))
- opnd(i, src0, 0);
- else
+ if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
+ if (opnd(i, src0, 0))
+ return true;
+ } else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
- opnd(i, src1, 1);
+ if (opnd(i, src1, 1))
+ return true;
if (i->srcExists(2) && i->src(2).getImmediate(src2))
opnd3(i, src2);
}
@@ -1011,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *
return false;
}

-void
+bool
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
const operation op = i->op;
Instruction *newi = i;
+ bool deleted = false;

switch (i->op) {
case OP_SPLIT: {
@@ -1036,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
val >>= bitsize;
}
delete_Instruction(prog, i);
+ deleted = true;
break;
}
case OP_MUL:
@@ -1050,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
TYPE_S32, i->getSrc(t), bld.mkImm(0));
delete_Instruction(prog, i);
+ deleted = true;
} else if (imm0.isInteger(0) || imm0.isInteger(1)) {
// The high bits can't be set in this case (either mul by 0 or
// unsigned by 1)
@@ -1101,8 +1107,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->src(t).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
} else
if (i->postFactor && i->sType == TYPE_F32) {
/* Can't emit a postfactor with an immediate, have to fold it in */
@@ -1139,8 +1147,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2)))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
}
break;
case OP_SUB:
@@ -1210,6 +1220,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));

delete_Instruction(prog, i);
+ deleted = true;
} else
if (imm0.reg.data.s32 == -1) {
i->op = OP_NEG;
@@ -1242,6 +1253,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);

delete_Instruction(prog, i);
+ deleted = true;
}
break;

@@ -1273,6 +1285,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);

delete_Instruction(prog, i);
+ deleted = true;
}
} else if (s == 1) {
// In this case, we still want the optimized lowering that we get
@@ -1289,6 +1302,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);

delete_Instruction(prog, i);
+ deleted = true;
}
break;

@@ -1301,7 +1315,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
if (imm0.reg.data.u32 != 0 || !si)
- return;
+ return false;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
// We do everything assuming var (cmp) 0, reverse the condition if 0 is
@@ -1327,7 +1341,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case CC_GT: break; // bool > 0 -- bool
case CC_NE: break; // bool != 0 -- bool
default:
- return;
+ return false;
}

// Update the condition of this SET to be identical to the origin set,
@@ -1362,13 +1376,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
} else if (src->asCmp()) {
CmpInstruction *cmp = src->asCmp();
if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
- return;
+ return false;
if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
- return;
+ return false;
if (imm0.reg.data.f32 != 1.0)
- return;
+ return false;
if (cmp->dType != TYPE_U32)
- return;
+ return false;

cmp->dType = TYPE_F32;
if (i->src(t).mod != Modifier(0)) {
@@ -1435,13 +1449,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_MUL:
int muls;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->src(1).getImmediate(imm1))
muls = 1;
else if (si->src(0).getImmediate(imm1))
muls = 0;
else
- return;
+ return false;

bld.setPosition(i, false);
i->op = OP_MUL;
@@ -1452,15 +1466,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case OP_ADD:
int adds;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->op != OP_SUB && si->src(0).getImmediate(imm1))
adds = 0;
else if (si->src(1).getImmediate(imm1))
adds = 1;
else
- return;
+ return false;
if (si->src(!adds).mod != Modifier(0))
- return;
+ return false;
// SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))

// This is more operations, but if one of x, y is an immediate, then
@@ -1475,7 +1489,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkImm(imm0.reg.data.u32)));
break;
default:
- return;
+ return false;
}
}
break;
@@ -1500,7 +1514,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
default:
- return;
+ return false;
}
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
res = 31 - res;
@@ -1526,11 +1540,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)

// TODO: handle 64-bit values properly
if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
- return;
+ return false;

// TODO: handle single byte/word extractions
if (i->subOp)
- return;
+ return false;

bld.setPosition(i, true); /* make sure bld is init'ed */

@@ -1567,7 +1581,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CLAMP(imm0.reg.data.u16, umin, umax) : \
imm0.reg.data.u16; \
break; \
- default: return; \
+ default: return false; \
} \
i->setSrc(0, bld.mkImm(res.data.dst)); \
break
@@ -1594,7 +1608,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
default:
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f32));
break;
@@ -1615,12 +1629,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
default:
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f64));
break;
default:
- return;
+ return false;
}
#undef CASE

@@ -1631,7 +1645,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
break;
}
default:
- return;
+ return false;
}

// This can get left behind some of the optimizations which simplify
@@ -1646,6 +1660,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)

if (newi->op != op)
foldCount++;
+ return deleted;
}

// =============================================================================
--
2.19.2
Karol Herbst
2018-12-07 08:57:07 UTC
Permalink
this race condition is pretty harmless, but also pretty trivial to fix

Signed-off-by: Karol Herbst <***@redhat.com>
---
.../drivers/nouveau/codegen/nv50_ir_ra.cpp | 23 +++++++++++++------
1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 23bd07af33a..5d1a96f8d71 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -803,7 +803,21 @@ private:
Function *func;
Program *prog;

- static uint8_t relDegree[17][17];
+ struct RelDegree {
+ uint8_t data[17][17];
+
+ RelDegree() {
+ for (int i = 1; i <= 16; ++i)
+ for (int j = 1; j <= 16; ++j)
+ data[i][j] = j * ((i + j - 1) / j);
+ }
+
+ const uint8_t* operator[](std::size_t i) const {
+ return data[i];
+ }
+ };
+
+ static const RelDegree relDegree;

RegisterSet regs;

@@ -815,7 +829,7 @@ private:
std::list<ValuePair> mustSpill;
};

-uint8_t GCRA::relDegree[17][17];
+const GCRA::RelDegree GCRA::relDegree;

GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
{
@@ -1172,11 +1186,6 @@ GCRA::GCRA(Function *fn, SpillCodeInserter& spill) :
spill(spill)
{
prog = func->getProgram();
-
- // initialize relative degrees array - i takes away from j
- for (int i = 1; i <= 16; ++i)
- for (int j = 1; j <= 16; ++j)
- relDegree[i][j] = j * ((i + j - 1) / j);
}

GCRA::~GCRA()
--
2.19.2
Ilia Mirkin
2018-12-07 12:55:03 UTC
Permalink
Post by Karol Herbst
this race condition is pretty harmless, but also pretty trivial to fix
---
.../drivers/nouveau/codegen/nv50_ir_ra.cpp | 23 +++++++++++++------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 23bd07af33a..5d1a96f8d71 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
Function *func;
Program *prog;
- static uint8_t relDegree[17][17];
+ struct RelDegree {
+ uint8_t data[17][17];
+
+ RelDegree() {
+ for (int i = 1; i <= 16; ++i)
+ for (int j = 1; j <= 16; ++j)
+ data[i][j] = j * ((i + j - 1) / j);
+ }
+
+ const uint8_t* operator[](std::size_t i) const {
+ return data[i];
+ }
+ };
+
+ static const RelDegree relDegree;
RegisterSet regs;
std::list<ValuePair> mustSpill;
};
-uint8_t GCRA::relDegree[17][17];
+const GCRA::RelDegree GCRA::relDegree;
GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this)
{
spill(spill)
{
prog = func->getProgram();
-
- // initialize relative degrees array - i takes away from j
- for (int i = 1; i <= 16; ++i)
- for (int j = 1; j <= 16; ++j)
- relDegree[i][j] = j * ((i + j - 1) / j);
}
GCRA::~GCRA()
--
2.19.2
Ilia Mirkin
2018-12-07 12:58:35 UTC
Permalink
Post by Karol Herbst
opnd() might delete the passed in instruction, but it's used through
i->srcExists() later in visit
---
.../nouveau/codegen/nv50_ir_peephole.cpp | 71 +++++++++++--------
1 file changed, 43 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 9524ba63654..d69ceaafd73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
- void opnd(Instruction *, ImmediateValue&, int s);
+ /* true if i was deleted */
+ bool opnd(Instruction *i, ImmediateValue&, int s);
void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -421,11 +422,13 @@ ConstantFolding::visit(BasicBlock *bb)
i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
expr(i, src0, src1);
else
- if (i->srcExists(0) && i->src(0).getImmediate(src0))
- opnd(i, src0, 0);
- else
+ if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
+ if (opnd(i, src0, 0))
+ return true;
+ } else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
- opnd(i, src1, 1);
+ if (opnd(i, src1, 1))
+ return true;
if (i->srcExists(2) && i->src(2).getImmediate(src2))
opnd3(i, src2);
Is there a reason not to do this under an else instead? You're
aborting the whole BB loop here whenever we delete an instruction,
which presumably happens with some frequency.
Post by Karol Herbst
}
@@ -1011,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *
return false;
}
-void
+bool
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
const operation op = i->op;
Instruction *newi = i;
+ bool deleted = false;
switch (i->op) {
case OP_SPLIT: {
@@ -1036,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
val >>= bitsize;
}
delete_Instruction(prog, i);
+ deleted = true;
break;
}
@@ -1050,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
TYPE_S32, i->getSrc(t), bld.mkImm(0));
delete_Instruction(prog, i);
+ deleted = true;
} else if (imm0.isInteger(0) || imm0.isInteger(1)) {
// The high bits can't be set in this case (either mul by 0 or
// unsigned by 1)
@@ -1101,8 +1107,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->src(t).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
} else
if (i->postFactor && i->sType == TYPE_F32) {
/* Can't emit a postfactor with an immediate, have to fold it in */
@@ -1139,8 +1147,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2)))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
}
break;
@@ -1210,6 +1220,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
delete_Instruction(prog, i);
+ deleted = true;
} else
if (imm0.reg.data.s32 == -1) {
i->op = OP_NEG;
@@ -1242,6 +1253,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1273,6 +1285,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);
delete_Instruction(prog, i);
+ deleted = true;
}
} else if (s == 1) {
// In this case, we still want the optimized lowering that we get
@@ -1289,6 +1302,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1301,7 +1315,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
if (imm0.reg.data.u32 != 0 || !si)
- return;
+ return false;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
// We do everything assuming var (cmp) 0, reverse the condition if 0 is
@@ -1327,7 +1341,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case CC_GT: break; // bool > 0 -- bool
case CC_NE: break; // bool != 0 -- bool
- return;
+ return false;
}
// Update the condition of this SET to be identical to the origin set,
@@ -1362,13 +1376,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
} else if (src->asCmp()) {
CmpInstruction *cmp = src->asCmp();
if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
- return;
+ return false;
if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
- return;
+ return false;
if (imm0.reg.data.f32 != 1.0)
- return;
+ return false;
if (cmp->dType != TYPE_U32)
- return;
+ return false;
cmp->dType = TYPE_F32;
if (i->src(t).mod != Modifier(0)) {
@@ -1435,13 +1449,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
int muls;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->src(1).getImmediate(imm1))
muls = 1;
else if (si->src(0).getImmediate(imm1))
muls = 0;
else
- return;
+ return false;
bld.setPosition(i, false);
i->op = OP_MUL;
@@ -1452,15 +1466,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
int adds;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->op != OP_SUB && si->src(0).getImmediate(imm1))
adds = 0;
else if (si->src(1).getImmediate(imm1))
adds = 1;
else
- return;
+ return false;
if (si->src(!adds).mod != Modifier(0))
- return;
+ return false;
// SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))
// This is more operations, but if one of x, y is an immediate, then
@@ -1475,7 +1489,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkImm(imm0.reg.data.u32)));
break;
- return;
+ return false;
}
}
break;
@@ -1500,7 +1514,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
- return;
+ return false;
}
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
res = 31 - res;
@@ -1526,11 +1540,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
// TODO: handle 64-bit values properly
if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
- return;
+ return false;
// TODO: handle single byte/word extractions
if (i->subOp)
- return;
+ return false;
bld.setPosition(i, true); /* make sure bld is init'ed */
@@ -1567,7 +1581,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CLAMP(imm0.reg.data.u16, umin, umax) : \
imm0.reg.data.u16; \
break; \
- default: return; \
+ default: return false; \
} \
i->setSrc(0, bld.mkImm(res.data.dst)); \
break
@@ -1594,7 +1608,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f32));
break;
@@ -1615,12 +1629,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f64));
break;
- return;
+ return false;
}
#undef CASE
@@ -1631,7 +1645,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
break;
}
- return;
+ return false;
}
// This can get left behind some of the optimizations which simplify
@@ -1646,6 +1660,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (newi->op != op)
foldCount++;
+ return deleted;
}
// =============================================================================
--
2.19.2
Karol Herbst
2018-12-07 14:11:52 UTC
Permalink
Post by Ilia Mirkin
Post by Karol Herbst
opnd() might delete the passed in instruction, but it's used through
i->srcExists() later in visit
---
.../nouveau/codegen/nv50_ir_peephole.cpp | 71 +++++++++++--------
1 file changed, 43 insertions(+), 28 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 9524ba63654..d69ceaafd73 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
- void opnd(Instruction *, ImmediateValue&, int s);
+ /* true if i was deleted */
+ bool opnd(Instruction *i, ImmediateValue&, int s);
void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
@@ -421,11 +422,13 @@ ConstantFolding::visit(BasicBlock *bb)
i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1))
expr(i, src0, src1);
else
- if (i->srcExists(0) && i->src(0).getImmediate(src0))
- opnd(i, src0, 0);
- else
+ if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
+ if (opnd(i, src0, 0))
+ return true;
+ } else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
- opnd(i, src1, 1);
+ if (opnd(i, src1, 1))
+ return true;
if (i->srcExists(2) && i->src(2).getImmediate(src2))
opnd3(i, src2);
Is there a reason not to do this under an else instead? You're
aborting the whole BB loop here whenever we delete an instruction,
which presumably happens with some frequency.
I think using continue instead should be better.. I simply didn't see
the loop at all
Post by Ilia Mirkin
Post by Karol Herbst
}
@@ -1011,12 +1014,13 @@ ConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *
return false;
}
-void
+bool
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
const int t = !s;
const operation op = i->op;
Instruction *newi = i;
+ bool deleted = false;
switch (i->op) {
case OP_SPLIT: {
@@ -1036,6 +1040,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
val >>= bitsize;
}
delete_Instruction(prog, i);
+ deleted = true;
break;
}
@@ -1050,6 +1055,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
TYPE_S32, i->getSrc(t), bld.mkImm(0));
delete_Instruction(prog, i);
+ deleted = true;
} else if (imm0.isInteger(0) || imm0.isInteger(1)) {
// The high bits can't be set in this case (either mul by 0 or
// unsigned by 1)
@@ -1101,8 +1107,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->src(t).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
} else
if (i->postFactor && i->sType == TYPE_F32) {
/* Can't emit a postfactor with an immediate, have to fold it in */
@@ -1139,8 +1147,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) {
bld.setPosition(i, false);
int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
- if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2)))
+ if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) {
delete_Instruction(prog, i);
+ deleted = true;
+ }
}
break;
@@ -1210,6 +1220,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
delete_Instruction(prog, i);
+ deleted = true;
} else
if (imm0.reg.data.s32 == -1) {
i->op = OP_NEG;
@@ -1242,6 +1253,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1273,6 +1285,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);
delete_Instruction(prog, i);
+ deleted = true;
}
} else if (s == 1) {
// In this case, we still want the optimized lowering that we get
@@ -1289,6 +1302,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);
delete_Instruction(prog, i);
+ deleted = true;
}
break;
@@ -1301,7 +1315,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
if (imm0.reg.data.u32 != 0 || !si)
- return;
+ return false;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
// We do everything assuming var (cmp) 0, reverse the condition if 0 is
@@ -1327,7 +1341,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case CC_GT: break; // bool > 0 -- bool
case CC_NE: break; // bool != 0 -- bool
- return;
+ return false;
}
// Update the condition of this SET to be identical to the origin set,
@@ -1362,13 +1376,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
} else if (src->asCmp()) {
CmpInstruction *cmp = src->asCmp();
if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
- return;
+ return false;
if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
- return;
+ return false;
if (imm0.reg.data.f32 != 1.0)
- return;
+ return false;
if (cmp->dType != TYPE_U32)
- return;
+ return false;
cmp->dType = TYPE_F32;
if (i->src(t).mod != Modifier(0)) {
@@ -1435,13 +1449,13 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
int muls;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->src(1).getImmediate(imm1))
muls = 1;
else if (si->src(0).getImmediate(imm1))
muls = 0;
else
- return;
+ return false;
bld.setPosition(i, false);
i->op = OP_MUL;
@@ -1452,15 +1466,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
int adds;
if (isFloatType(si->dType))
- return;
+ return false;
if (si->op != OP_SUB && si->src(0).getImmediate(imm1))
adds = 0;
else if (si->src(1).getImmediate(imm1))
adds = 1;
else
- return;
+ return false;
if (si->src(!adds).mod != Modifier(0))
- return;
+ return false;
// SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))
// This is more operations, but if one of x, y is an immediate, then
@@ -1475,7 +1489,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
bld.mkImm(imm0.reg.data.u32)));
break;
- return;
+ return false;
}
}
break;
@@ -1500,7 +1514,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
- return;
+ return false;
}
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
res = 31 - res;
@@ -1526,11 +1540,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
// TODO: handle 64-bit values properly
if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
- return;
+ return false;
// TODO: handle single byte/word extractions
if (i->subOp)
- return;
+ return false;
bld.setPosition(i, true); /* make sure bld is init'ed */
@@ -1567,7 +1581,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
CLAMP(imm0.reg.data.u16, umin, umax) : \
imm0.reg.data.u16; \
break; \
- default: return; \
+ default: return false; \
} \
i->setSrc(0, bld.mkImm(res.data.dst)); \
break
@@ -1594,7 +1608,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f32));
break;
@@ -1615,12 +1629,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
- return;
+ return false;
}
i->setSrc(0, bld.mkImm(res.data.f64));
break;
- return;
+ return false;
}
#undef CASE
@@ -1631,7 +1645,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
break;
}
- return;
+ return false;
}
// This can get left behind some of the optimizations which simplify
@@ -1646,6 +1660,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
if (newi->op != op)
foldCount++;
+ return deleted;
}
// =============================================================================
--
2.19.2
Continue reading on narkive:
Loading...