/* * Copyright 2013 Vadim Girlin * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Vadim Girlin */ #define PPH_DEBUG 0 #if PPH_DEBUG #define PPH_DUMP(q) do { q } while (0) #else #define PPH_DUMP(q) #endif #include "sb_shader.h" #include "sb_pass.h" namespace r600_sb { int peephole::run() { run_on(sh.root); return 0; } void peephole::run_on(container_node* c) { for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { node *n = *I; if (n->is_container()) run_on(static_cast(n)); else { if (n->is_alu_inst()) { alu_node *a = static_cast(n); if (a->bc.op_ptr->flags & (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) { optimize_cc_op(a); } else if (a->bc.op == ALU_OP1_FLT_TO_INT) { alu_node *s = a; if (get_bool_flt_to_int_source(s)) { convert_float_setcc(a, s); } } } } } } void peephole::optimize_cc_op(alu_node* a) { unsigned aflags = a->bc.op_ptr->flags; if (aflags & (AF_PRED | AF_SET | AF_KILL)) { optimize_cc_op2(a); } else if (aflags & AF_CMOV) { optimize_CNDcc_op(a); } } void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) { alu_node *ns = sh.clone(s); ns->dst[0] = f2i->dst[0]; ns->dst[0]->def = ns; ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE)); f2i->insert_after(ns); f2i->remove(); } void peephole::optimize_cc_op2(alu_node* a) { unsigned flags = a->bc.op_ptr->flags; unsigned cc = flags & AF_CC_MASK; if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred) return; unsigned cmp_type = flags & AF_CMP_TYPE_MASK; unsigned dst_type = flags & AF_DST_TYPE_MASK; int op_kind = (flags & AF_PRED) ? 1 : (flags & AF_SET) ? 2 : (flags & AF_KILL) ? 3 : 0; bool swapped = false; if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) { std::swap(a->src[0],a->src[1]); swapped = true; // clear modifiers memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); } if (swapped || (a->src[1]->is_const() && a->src[1]->literal_value == literal(0))) { value *s = a->src[0]; bool_op_info bop = {}; PPH_DUMP( sblog << "cc_op2: "; dump::dump_op(a); sblog << "\n"; ); if (!get_bool_op_info(s, bop)) return; if (cc == AF_CC_E) bop.invert = !bop.invert; bool swap_args = false; cc = bop.n->bc.op_ptr->flags & AF_CC_MASK; if (bop.invert) cc = invert_setcc_condition(cc, swap_args); if (bop.int_cvt) { assert(cmp_type != AF_FLOAT_CMP); cmp_type = AF_FLOAT_CMP; } PPH_DUMP( sblog << "boi node: "; dump::dump_op(bop.n); sblog << " invert: " << bop.invert << " int_cvt: " << bop.int_cvt; sblog <<"\n"; ); unsigned newop; switch(op_kind) { case 1: newop = get_predsetcc_op(cc, cmp_type); break; case 2: newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST); break; case 3: newop = get_killcc_op(cc, cmp_type); break; default: newop = ALU_OP0_NOP; assert(!"invalid op kind"); break; } a->bc.set_op(newop); if (swap_args) { a->src[0] = bop.n->src[1]; a->src[1] = bop.n->src[0]; a->bc.src[0] = bop.n->bc.src[1]; a->bc.src[1] = bop.n->bc.src[0]; } else { a->src[0] = bop.n->src[0]; a->src[1] = bop.n->src[1]; a->bc.src[0] = bop.n->bc.src[0]; a->bc.src[1] = bop.n->bc.src[1]; } } } void peephole::optimize_CNDcc_op(alu_node* a) { unsigned flags = a->bc.op_ptr->flags; unsigned cc = flags & AF_CC_MASK; unsigned cmp_type = flags & AF_CMP_TYPE_MASK; bool swap = false; if (cc == AF_CC_E) { swap = !swap; cc = AF_CC_NE; } else if (cc != AF_CC_NE) return; value *s = a->src[0]; bool_op_info bop = {}; PPH_DUMP( sblog << "cndcc: "; dump::dump_op(a); sblog << "\n"; ); if (!get_bool_op_info(s, bop)) return; alu_node *d = bop.n; if (d->bc.omod) return; PPH_DUMP( sblog << "cndcc def: "; dump::dump_op(d); sblog << "\n"; ); unsigned dflags = d->bc.op_ptr->flags; unsigned dcc = dflags & AF_CC_MASK; unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK; unsigned ddst_type = dflags & AF_DST_TYPE_MASK; int nds; // TODO we can handle some of these cases, // though probably this shouldn't happen if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST) return; if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0)) nds = 1; else if ((d->src[1]->is_const() && d->src[1]->literal_value == literal(0))) nds = 0; else return; // can't propagate ABS modifier to CNDcc because it's OP3 if (d->bc.src[nds].abs) return; // TODO we can handle some cases for uint comparison if (dcmp_type == AF_UINT_CMP) return; if (dcc == AF_CC_NE) { dcc = AF_CC_E; swap = !swap; } if (nds == 1) { switch (dcc) { case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break; case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break; default: break; } } a->src[0] = d->src[nds]; a->bc.src[0] = d->bc.src[nds]; if (swap) { std::swap(a->src[1], a->src[2]); std::swap(a->bc.src[1], a->bc.src[2]); } a->bc.set_op(get_cndcc_op(dcc, dcmp_type)); } bool peephole::get_bool_flt_to_int_source(alu_node* &a) { if (a->bc.op == ALU_OP1_FLT_TO_INT) { if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel) return false; value *s = a->src[0]; if (!s || !s->def || !s->def->is_alu_inst()) return false; alu_node *dn = static_cast(s->def); if (dn->is_alu_op(ALU_OP1_TRUNC)) { s = dn->src[0]; if (!s || !s->def || !s->def->is_alu_inst()) return false; if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 || dn->bc.src[0].rel != 0) { return false; } dn = static_cast(s->def); } if (dn->bc.op_ptr->flags & AF_SET) { a = dn; return true; } } return false; } bool peephole::get_bool_op_info(value* b, bool_op_info& bop) { node *d = b->def; if (!d || !d->is_alu_inst()) return false; alu_node *dn = static_cast(d); if (dn->bc.op_ptr->flags & AF_SET) { bop.n = dn; if (dn->bc.op_ptr->flags & AF_DX10) bop.int_cvt = true; return true; } if (get_bool_flt_to_int_source(dn)) { bop.n = dn; bop.int_cvt = true; return true; } return false; } } // namespace r600_sb