/* * Copyright (C) 2009 Nicolai Haehnle. * Copyright 2010 Tom Stellard * * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #include "radeon_dataflow.h" #include "radeon_compiler.h" #include "radeon_compiler_util.h" #include "radeon_list.h" #include "radeon_swizzle.h" #include "radeon_variable.h" struct src_clobbered_reads_cb_data { rc_register_file File; unsigned int Index; unsigned int Mask; struct rc_reader_data * ReaderData; }; typedef void (*rc_presub_replace_fn)(struct rc_instruction *, struct rc_instruction *, unsigned int); static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) { struct rc_src_register combine; combine.File = inner.File; combine.Index = inner.Index; combine.RelAddr = inner.RelAddr; if (outer.Abs) { combine.Abs = 1; combine.Negate = outer.Negate; } else { combine.Abs = inner.Abs; combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); combine.Negate ^= outer.Negate; } combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); return combine; } static void copy_propagate_scan_read(void * data, struct rc_instruction * inst, struct rc_src_register * src) { rc_register_file file = src->File; struct rc_reader_data * reader_data = data; if(!rc_inst_can_use_presub(inst, reader_data->Writer->U.I.PreSub.Opcode, rc_swizzle_to_writemask(src->Swizzle), src, &reader_data->Writer->U.I.PreSub.SrcReg[0], &reader_data->Writer->U.I.PreSub.SrcReg[1])) { reader_data->Abort = 1; return; } /* XXX This could probably be handled better. */ if (file == RC_FILE_ADDRESS) { reader_data->Abort = 1; return; } /* These instructions cannot read from the constants file. * see radeonTransformTEX() */ if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && (inst->U.I.Opcode == RC_OPCODE_TEX || inst->U.I.Opcode == RC_OPCODE_TXB || inst->U.I.Opcode == RC_OPCODE_TXP || inst->U.I.Opcode == RC_OPCODE_TXD || inst->U.I.Opcode == RC_OPCODE_TXL || inst->U.I.Opcode == RC_OPCODE_KIL)){ reader_data->Abort = 1; return; } } static void src_clobbered_reads_cb( void * data, struct rc_instruction * inst, struct rc_src_register * src) { struct src_clobbered_reads_cb_data * sc_data = data; if (src->File == sc_data->File && src->Index == sc_data->Index && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; } } static void is_src_clobbered_scan_write( void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { struct src_clobbered_reads_cb_data sc_data; struct rc_reader_data * reader_data = data; sc_data.File = file; sc_data.Index = index; sc_data.Mask = mask; sc_data.ReaderData = reader_data; rc_for_all_reads_src(reader_data->Writer, src_clobbered_reads_cb, &sc_data); } static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) { struct rc_reader_data reader_data; unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult) return; /* Get a list of all the readers of this MOV instruction. */ reader_data.ExitOnAbort = 1; rc_get_readers(c, inst_mov, &reader_data, copy_propagate_scan_read, NULL, is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) return; /* We can propagate SaturateMode if all the readers are MOV instructions * without a presubtract operation, source negation and absolute. * In that case, we just move SaturateMode to all readers. */ if (inst_mov->U.I.SaturateMode) { for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; if (inst->U.I.Opcode != RC_OPCODE_MOV || inst->U.I.SrcReg[0].File == RC_FILE_PRESUB || inst->U.I.SrcReg[0].Abs || inst->U.I.SrcReg[0].Negate) { return; } } } /* Propagate the MOV instruction. */ for (i = 0; i < reader_data.ReaderCount; i++) { struct rc_instruction * inst = reader_data.Readers[i].Inst; *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) inst->U.I.PreSub = inst_mov->U.I.PreSub; if (!inst->U.I.SaturateMode) inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode; } /* Finally, remove the original MOV instruction */ rc_remove_instruction(inst_mov); } /** * Check if a source register is actually always the same * swizzle constant. */ static int is_src_uniform_constant(struct rc_src_register src, rc_swizzle * pswz, unsigned int * pnegate) { int have_used = 0; if (src.File != RC_FILE_NONE) { *pswz = 0; return 0; } for(unsigned int chan = 0; chan < 4; ++chan) { unsigned int swz = GET_SWZ(src.Swizzle, chan); if (swz < 4) { *pswz = 0; return 0; } if (swz == RC_SWIZZLE_UNUSED) continue; if (!have_used) { *pswz = swz; *pnegate = GET_BIT(src.Negate, chan); have_used = 1; } else { if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { *pswz = 0; return 0; } } } return 1; } static void constant_folding_mad(struct rc_instruction * inst) { rc_swizzle swz = 0; unsigned int negate= 0; if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { if (swz == RC_SWIZZLE_ZERO) { inst->U.I.Opcode = RC_OPCODE_MUL; return; } } if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { if (swz == RC_SWIZZLE_ONE) { inst->U.I.Opcode = RC_OPCODE_ADD; if (negate) inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; return; } else if (swz == RC_SWIZZLE_ZERO) { inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; return; } } if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { if (swz == RC_SWIZZLE_ONE) { inst->U.I.Opcode = RC_OPCODE_ADD; if (negate) inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; return; } else if (swz == RC_SWIZZLE_ZERO) { inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; return; } } } static void constant_folding_mul(struct rc_instruction * inst) { rc_swizzle swz = 0; unsigned int negate = 0; if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { if (swz == RC_SWIZZLE_ONE) { inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; if (negate) inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; return; } else if (swz == RC_SWIZZLE_ZERO) { inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; return; } } if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { if (swz == RC_SWIZZLE_ONE) { inst->U.I.Opcode = RC_OPCODE_MOV; if (negate) inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; return; } else if (swz == RC_SWIZZLE_ZERO) { inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; return; } } } static void constant_folding_add(struct rc_instruction * inst) { rc_swizzle swz = 0; unsigned int negate = 0; if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { if (swz == RC_SWIZZLE_ZERO) { inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; return; } } if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { if (swz == RC_SWIZZLE_ZERO) { inst->U.I.Opcode = RC_OPCODE_MOV; return; } } } /** * Replace 0.0, 1.0 and 0.5 immediate constants by their * respective swizzles. Simplify instructions like ADD dst, src, 0; */ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); unsigned int i; /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { struct rc_constant * constant; struct rc_src_register newsrc; int have_real_reference; unsigned int chan; /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ for (chan = 0; chan < 4; ++chan) if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) break; if (chan == 4) { inst->U.I.SrcReg[src].File = RC_FILE_NONE; continue; } /* Convert immediates to swizzles. */ if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || inst->U.I.SrcReg[src].RelAddr || inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) continue; constant = &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; if (constant->Type != RC_CONSTANT_IMMEDIATE) continue; newsrc = inst->U.I.SrcReg[src]; have_real_reference = 0; for (chan = 0; chan < 4; ++chan) { unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); unsigned int newswz; float imm; float baseimm; if (swz >= 4) continue; imm = constant->u.Immediate[swz]; baseimm = imm; if (imm < 0.0) baseimm = -baseimm; if (baseimm == 0.0) { newswz = RC_SWIZZLE_ZERO; } else if (baseimm == 1.0) { newswz = RC_SWIZZLE_ONE; } else if (baseimm == 0.5 && c->has_half_swizzles) { newswz = RC_SWIZZLE_HALF; } else { have_real_reference = 1; continue; } SET_SWZ(newsrc.Swizzle, chan, newswz); if (imm < 0.0 && !newsrc.Abs) newsrc.Negate ^= 1 << chan; } if (!have_real_reference) { newsrc.File = RC_FILE_NONE; newsrc.Index = 0; } /* don't make the swizzle worse */ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) continue; inst->U.I.SrcReg[src] = newsrc; } /* Simplify instructions based on constants */ if (inst->U.I.Opcode == RC_OPCODE_MAD) constant_folding_mad(inst); /* note: MAD can simplify to MUL or ADD */ if (inst->U.I.Opcode == RC_OPCODE_MUL) constant_folding_mul(inst); else if (inst->U.I.Opcode == RC_OPCODE_ADD) constant_folding_add(inst); /* In case this instruction has been converted, make sure all of the * registers that are no longer used are empty. */ opcode = rc_get_opcode_info(inst->U.I.Opcode); for(i = opcode->NumSrcRegs; i < 3; i++) { memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); } } /** * If src and dst use the same register, this function returns a writemask that * indicates wich components are read by src. Otherwise zero is returned. */ static unsigned int src_reads_dst_mask(struct rc_src_register src, struct rc_dst_register dst) { if (dst.File != src.File || dst.Index != src.Index) { return 0; } return rc_swizzle_to_writemask(src.Swizzle); } /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) * in any of its channels. Return 0 otherwise. */ static int src_has_const_swz(struct rc_src_register src) { int chan; for(chan = 0; chan < 4; chan++) { unsigned int swz = GET_SWZ(src.Swizzle, chan); if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF || swz == RC_SWIZZLE_ONE) { return 1; } } return 0; } static void presub_scan_read( void * data, struct rc_instruction * inst, struct rc_src_register * src) { struct rc_reader_data * reader_data = data; rc_presubtract_op * presub_opcode = reader_data->CbData; if (!rc_inst_can_use_presub(inst, *presub_opcode, reader_data->Writer->U.I.DstReg.WriteMask, src, &reader_data->Writer->U.I.SrcReg[0], &reader_data->Writer->U.I.SrcReg[1])) { reader_data->Abort = 1; return; } } static int presub_helper( struct radeon_compiler * c, struct rc_instruction * inst_add, rc_presubtract_op presub_opcode, rc_presub_replace_fn presub_replace) { struct rc_reader_data reader_data; unsigned int i; rc_presubtract_op cb_op = presub_opcode; reader_data.CbData = &cb_op; reader_data.ExitOnAbort = 1; rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, is_src_clobbered_scan_write); if (reader_data.Abort || reader_data.ReaderCount == 0) return 0; for(i = 0; i < reader_data.ReaderCount; i++) { unsigned int src_index; struct rc_reader reader = reader_data.Readers[i]; const struct rc_opcode_info * info = rc_get_opcode_info(reader.Inst->U.I.Opcode); for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) presub_replace(inst_add, reader.Inst, src_index); } } return 1; } /* This function assumes that inst_add->U.I.SrcReg[0] and * inst_add->U.I.SrcReg[1] aren't both negative. */ static void presub_replace_add( struct rc_instruction * inst_add, struct rc_instruction * inst_reader, unsigned int src_index) { rc_presubtract_op presub_opcode; if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) presub_opcode = RC_PRESUB_SUB; else presub_opcode = RC_PRESUB_ADD; if (inst_add->U.I.SrcReg[1].Negate) { inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; } else { inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; } inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; inst_reader->U.I.PreSub.Opcode = presub_opcode; inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], inst_reader->U.I.PreSub.SrcReg[0]); inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; } static int is_presub_candidate( struct radeon_compiler * c, struct rc_instruction * inst) { const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); unsigned int i; unsigned int is_constant[2] = {0, 0}; assert(inst->U.I.Opcode == RC_OPCODE_ADD); if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode || inst->U.I.WriteALUResult || inst->U.I.Omod) { return 0; } /* If both sources use a constant swizzle, then we can't convert it to * a presubtract operation. In fact for the ADD and SUB presubtract * operations neither source can contain a constant swizzle. This * specific case is checked in peephole_add_presub_add() when * we make sure the swizzles for both sources are equal, so we * don't need to worry about it here. */ for (i = 0; i < 2; i++) { int chan; for (chan = 0; chan < 4; chan++) { rc_swizzle swz = get_swz(inst->U.I.SrcReg[i].Swizzle, chan); if (swz == RC_SWIZZLE_ONE || swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF) { is_constant[i] = 1; } } } if (is_constant[0] && is_constant[1]) return 0; for(i = 0; i < info->NumSrcRegs; i++) { struct rc_src_register src = inst->U.I.SrcReg[i]; if (src_reads_dst_mask(src, inst->U.I.DstReg)) return 0; src.File = RC_FILE_PRESUB; if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) return 0; } return 1; } static int peephole_add_presub_add( struct radeon_compiler * c, struct rc_instruction * inst_add) { unsigned dstmask = inst_add->U.I.DstReg.WriteMask; unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) return 0; /* src0 and src1 can't have absolute values */ if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) return 0; /* presub_replace_add() assumes only one is negative */ if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) return 0; /* if src0 is negative, at least all bits of dstmask have to be set */ if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) return 0; /* if src1 is negative, at least all bits of dstmask have to be set */ if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) return 0; if (!is_presub_candidate(c, inst_add)) return 0; if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { rc_remove_instruction(inst_add); return 1; } return 0; } static void presub_replace_inv( struct rc_instruction * inst_add, struct rc_instruction * inst_reader, unsigned int src_index) { /* We must be careful not to modify inst_add, since it * is possible it will remain part of the program.*/ inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], inst_reader->U.I.PreSub.SrcReg[0]); inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; } /** * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source * of the add instruction must have the constatnt 1 swizzle. This function * does not check const registers to see if their value is 1.0, so it should * be called after the constant_folding optimization. * @return * 0 if the ADD instruction is still part of the program. * 1 if the ADD instruction is no longer part of the program. */ static int peephole_add_presub_inv( struct radeon_compiler * c, struct rc_instruction * inst_add) { unsigned int i, swz; if (!is_presub_candidate(c, inst_add)) return 0; /* Check if src0 is 1. */ /* XXX It would be nice to use is_src_uniform_constant here, but that * function only works if the register's file is RC_FILE_NONE */ for(i = 0; i < 4; i++ ) { swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); if(((1 << i) & inst_add->U.I.DstReg.WriteMask) && swz != RC_SWIZZLE_ONE) { return 0; } } /* Check src1. */ if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != inst_add->U.I.DstReg.WriteMask || inst_add->U.I.SrcReg[1].Abs || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) || src_has_const_swz(inst_add->U.I.SrcReg[1])) { return 0; } if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { rc_remove_instruction(inst_add); return 1; } return 0; } struct peephole_mul_cb_data { struct rc_dst_register * Writer; unsigned int Clobbered; }; static void omod_filter_reader_cb( void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { struct peephole_mul_cb_data * d = userdata; if (rc_src_reads_dst_mask(file, mask, index, d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { d->Clobbered = 1; } } static void omod_filter_writer_cb( void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { struct peephole_mul_cb_data * d = userdata; if (file == d->Writer->File && index == d->Writer->Index && (mask & d->Writer->WriteMask)) { d->Clobbered = 1; } } static int peephole_mul_omod( struct radeon_compiler * c, struct rc_instruction * inst_mul, struct rc_list * var_list) { unsigned int chan = 0, swz, i; int const_index = -1; int temp_index = -1; float const_value; rc_omod_op omod_op = RC_OMOD_DISABLE; struct rc_list * writer_list; struct rc_variable * var; struct peephole_mul_cb_data cb_data; unsigned writemask_sum; for (i = 0; i < 2; i++) { unsigned int j; if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { return 0; } if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { if (temp_index != -1) { /* The instruction has two temp sources */ return 0; } else { temp_index = i; continue; } } /* If we get this far Src[i] must be a constant src */ if (inst_mul->U.I.SrcReg[i].Negate) { return 0; } /* The constant src needs to read from the same swizzle */ swz = RC_SWIZZLE_UNUSED; chan = 0; for (j = 0; j < 4; j++) { unsigned int j_swz = GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); if (j_swz == RC_SWIZZLE_UNUSED) { continue; } if (swz == RC_SWIZZLE_UNUSED) { swz = j_swz; chan = j; } else if (j_swz != swz) { return 0; } } if (const_index != -1) { /* The instruction has two constant sources */ return 0; } else { const_index = i; } } if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, inst_mul->U.I.SrcReg[const_index].Index)) { return 0; } const_value = rc_get_constant_value(c, inst_mul->U.I.SrcReg[const_index].Index, inst_mul->U.I.SrcReg[const_index].Swizzle, inst_mul->U.I.SrcReg[const_index].Negate, chan); if (const_value == 2.0f) { omod_op = RC_OMOD_MUL_2; } else if (const_value == 4.0f) { omod_op = RC_OMOD_MUL_4; } else if (const_value == 8.0f) { omod_op = RC_OMOD_MUL_8; } else if (const_value == (1.0f / 2.0f)) { omod_op = RC_OMOD_DIV_2; } else if (const_value == (1.0f / 4.0f)) { omod_op = RC_OMOD_DIV_4; } else if (const_value == (1.0f / 8.0f)) { omod_op = RC_OMOD_DIV_8; } else { return 0; } writer_list = rc_variable_list_get_writers_one_reader(var_list, RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); if (!writer_list) { return 0; } cb_data.Clobbered = 0; cb_data.Writer = &inst_mul->U.I.DstReg; for (var = writer_list->Item; var; var = var->Friend) { struct rc_instruction * inst; const struct rc_opcode_info * info = rc_get_opcode_info( var->Inst->U.I.Opcode); if (info->HasTexture) { return 0; } if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { return 0; } for (inst = inst_mul->Prev; inst != var->Inst; inst = inst->Prev) { rc_for_all_reads_mask(inst, omod_filter_reader_cb, &cb_data); rc_for_all_writes_mask(inst, omod_filter_writer_cb, &cb_data); if (cb_data.Clobbered) { break; } } } if (cb_data.Clobbered) { return 0; } /* Rewrite the instructions */ writemask_sum = rc_variable_writemask_sum(writer_list->Item); for (var = writer_list->Item; var; var = var->Friend) { struct rc_variable * writer = var; unsigned conversion_swizzle = rc_make_conversion_swizzle( writemask_sum, inst_mul->U.I.DstReg.WriteMask); writer->Inst->U.I.Omod = omod_op; writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; } rc_remove_instruction(inst_mul); return 1; } /** * @return * 0 if inst is still part of the program. * 1 if inst is no longer part of the program. */ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst) { switch(inst->U.I.Opcode){ case RC_OPCODE_ADD: if (c->has_presub) { if(peephole_add_presub_inv(c, inst)) return 1; if(peephole_add_presub_add(c, inst)) return 1; } break; default: break; } return 0; } void rc_optimize(struct radeon_compiler * c, void *user) { struct rc_instruction * inst = c->Program.Instructions.Next; struct rc_list * var_list; while(inst != &c->Program.Instructions) { struct rc_instruction * cur = inst; inst = inst->Next; constant_folding(c, cur); if(peephole(c, cur)) continue; if (cur->U.I.Opcode == RC_OPCODE_MOV) { copy_propagate(c, cur); /* cur may no longer be part of the program */ } } if (!c->has_omod) { return; } inst = c->Program.Instructions.Next; while(inst != &c->Program.Instructions) { struct rc_instruction * cur = inst; inst = inst->Next; if (cur->U.I.Opcode == RC_OPCODE_MUL) { var_list = rc_get_variables(c); peephole_mul_omod(c, cur, var_list); } } }