head 1.1; branch 1.1.1; access; symbols netbsd-11-0-RC5:1.1.1.3 netbsd-11-0-RC4:1.1.1.3 netbsd-11-0-RC3:1.1.1.3 netbsd-11-0-RC2:1.1.1.3 netbsd-11-0-RC1:1.1.1.3 netbsd-11:1.1.1.3.0.4 netbsd-11-base:1.1.1.3 netbsd-10-1-RELEASE:1.1.1.3 netbsd-9-4-RELEASE:1.1.1.1 netbsd-10-0-RELEASE:1.1.1.3 netbsd-10-0-RC6:1.1.1.3 netbsd-10-0-RC5:1.1.1.3 netbsd-10-0-RC4:1.1.1.3 netbsd-10-0-RC3:1.1.1.3 netbsd-10-0-RC2:1.1.1.3 netbsd-10-0-RC1:1.1.1.3 netbsd-10:1.1.1.3.0.2 netbsd-10-base:1.1.1.3 netbsd-9-3-RELEASE:1.1.1.1 mesa-21-3-7:1.1.1.3 netbsd-9-2-RELEASE:1.1.1.1 netbsd-9-1-RELEASE:1.1.1.1 netbsd-9-0-RELEASE:1.1.1.1 netbsd-9-0-RC2:1.1.1.1 netbsd-9-0-RC1:1.1.1.1 mesalib-19-1-7:1.1.1.2 netbsd-9:1.1.1.1.0.2 netbsd-9-base:1.1.1.1 mesa-18-3-6:1.1.1.1 mesa-18-3-4:1.1.1.1 xorg:1.1.1; locks; strict; comment @// @; 1.1 date 2019.03.10.03.42.41; author mrg; state Exp; branches 1.1.1.1; next ; commitid r12jo1Nf3ebQKLeB; 1.1.1.1 date 2019.03.10.03.42.41; author mrg; state Exp; branches; next 1.1.1.2; commitid r12jo1Nf3ebQKLeB; 1.1.1.2 date 2019.09.24.17.40.29; author maya; state Exp; branches; next 1.1.1.3; commitid KJXusGl8fi9AAhEB; 1.1.1.3 date 2022.05.09.01.23.37; author mrg; state Exp; branches; next ; commitid UEBs6hNk81DdQjDD; desc @@ 1.1 log @Initial revision @ text @/* * Copyright © 2010 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "brw_cfg.h" #include "brw_eu.h" #include "brw_fs.h" #include "brw_nir.h" #include "brw_vec4_tes.h" #include "common/gen_debug.h" #include "main/uniforms.h" #include "util/macros.h" enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type) { switch (type->base_type) { case GLSL_TYPE_FLOAT16: return BRW_REGISTER_TYPE_HF; case GLSL_TYPE_FLOAT: return BRW_REGISTER_TYPE_F; case GLSL_TYPE_INT: case GLSL_TYPE_BOOL: case GLSL_TYPE_SUBROUTINE: return BRW_REGISTER_TYPE_D; case GLSL_TYPE_INT16: return BRW_REGISTER_TYPE_W; case GLSL_TYPE_INT8: return BRW_REGISTER_TYPE_B; case GLSL_TYPE_UINT: return BRW_REGISTER_TYPE_UD; case GLSL_TYPE_UINT16: return BRW_REGISTER_TYPE_UW; case GLSL_TYPE_UINT8: return BRW_REGISTER_TYPE_UB; case GLSL_TYPE_ARRAY: return brw_type_for_base_type(type->fields.array); case GLSL_TYPE_STRUCT: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: /* These should be overridden with the type of the member when * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely * way to trip up if we don't. */ return BRW_REGISTER_TYPE_UD; case GLSL_TYPE_IMAGE: return BRW_REGISTER_TYPE_UD; case GLSL_TYPE_DOUBLE: return BRW_REGISTER_TYPE_DF; case GLSL_TYPE_UINT64: return BRW_REGISTER_TYPE_UQ; case GLSL_TYPE_INT64: return BRW_REGISTER_TYPE_Q; case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: case GLSL_TYPE_INTERFACE: case GLSL_TYPE_FUNCTION: unreachable("not reached"); } return BRW_REGISTER_TYPE_F; } enum brw_conditional_mod brw_conditional_for_comparison(unsigned int op) { switch (op) { case ir_binop_less: return BRW_CONDITIONAL_L; case ir_binop_gequal: return BRW_CONDITIONAL_GE; case ir_binop_equal: case ir_binop_all_equal: /* same as equal for scalars */ return BRW_CONDITIONAL_Z; case ir_binop_nequal: case ir_binop_any_nequal: /* same as nequal for scalars */ return BRW_CONDITIONAL_NZ; default: unreachable("not reached: bad operation for comparison"); } } uint32_t brw_math_function(enum opcode op) { switch (op) { case SHADER_OPCODE_RCP: return BRW_MATH_FUNCTION_INV; case SHADER_OPCODE_RSQ: return BRW_MATH_FUNCTION_RSQ; case SHADER_OPCODE_SQRT: return BRW_MATH_FUNCTION_SQRT; case SHADER_OPCODE_EXP2: return BRW_MATH_FUNCTION_EXP; case SHADER_OPCODE_LOG2: return BRW_MATH_FUNCTION_LOG; case SHADER_OPCODE_POW: return BRW_MATH_FUNCTION_POW; case SHADER_OPCODE_SIN: return BRW_MATH_FUNCTION_SIN; case SHADER_OPCODE_COS: return BRW_MATH_FUNCTION_COS; case SHADER_OPCODE_INT_QUOTIENT: return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; case SHADER_OPCODE_INT_REMAINDER: return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; default: unreachable("not reached: unknown math function"); } } bool brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits) { if (!offsets) return false; /* nonconstant offset; caller will handle it. */ /* offset out of bounds; caller will handle it. */ for (unsigned i = 0; i < num_components; i++) if (offsets[i] > 7 || offsets[i] < -8) return false; /* Combine all three offsets into a single unsigned dword: * * bits 11:8 - U Offset (X component) * bits 7:4 - V Offset (Y component) * bits 3:0 - R Offset (Z component) */ *offset_bits = 0; for (unsigned i = 0; i < num_components; i++) { const unsigned shift = 4 * (2 - i); *offset_bits |= (offsets[i] << shift) & (0xF << shift); } return true; } const char * brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) { switch (op) { case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP: /* The DO instruction doesn't exist on Gen6+, but we use it to mark the * start of a loop in the IR. */ if (devinfo->gen >= 6 && op == BRW_OPCODE_DO) return "do"; /* The following conversion opcodes doesn't exist on Gen8+, but we use * then to mark that we want to do the conversion. */ if (devinfo->gen > 7 && op == BRW_OPCODE_F32TO16) return "f32to16"; if (devinfo->gen > 7 && op == BRW_OPCODE_F16TO32) return "f16to32"; assert(brw_opcode_desc(devinfo, op)->name); return brw_opcode_desc(devinfo, op)->name; case FS_OPCODE_FB_WRITE: return "fb_write"; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; case FS_OPCODE_REP_FB_WRITE: return "rep_fb_write"; case FS_OPCODE_FB_READ: return "fb_read"; case FS_OPCODE_FB_READ_LOGICAL: return "fb_read_logical"; case SHADER_OPCODE_RCP: return "rcp"; case SHADER_OPCODE_RSQ: return "rsq"; case SHADER_OPCODE_SQRT: return "sqrt"; case SHADER_OPCODE_EXP2: return "exp2"; case SHADER_OPCODE_LOG2: return "log2"; case SHADER_OPCODE_POW: return "pow"; case SHADER_OPCODE_INT_QUOTIENT: return "int_quot"; case SHADER_OPCODE_INT_REMAINDER: return "int_rem"; case SHADER_OPCODE_SIN: return "sin"; case SHADER_OPCODE_COS: return "cos"; case SHADER_OPCODE_TEX: return "tex"; case SHADER_OPCODE_TEX_LOGICAL: return "tex_logical"; case SHADER_OPCODE_TXD: return "txd"; case SHADER_OPCODE_TXD_LOGICAL: return "txd_logical"; case SHADER_OPCODE_TXF: return "txf"; case SHADER_OPCODE_TXF_LOGICAL: return "txf_logical"; case SHADER_OPCODE_TXF_LZ: return "txf_lz"; case SHADER_OPCODE_TXL: return "txl"; case SHADER_OPCODE_TXL_LOGICAL: return "txl_logical"; case SHADER_OPCODE_TXL_LZ: return "txl_lz"; case SHADER_OPCODE_TXS: return "txs"; case SHADER_OPCODE_TXS_LOGICAL: return "txs_logical"; case FS_OPCODE_TXB: return "txb"; case FS_OPCODE_TXB_LOGICAL: return "txb_logical"; case SHADER_OPCODE_TXF_CMS: return "txf_cms"; case SHADER_OPCODE_TXF_CMS_LOGICAL: return "txf_cms_logical"; case SHADER_OPCODE_TXF_CMS_W: return "txf_cms_w"; case SHADER_OPCODE_TXF_CMS_W_LOGICAL: return "txf_cms_w_logical"; case SHADER_OPCODE_TXF_UMS: return "txf_ums"; case SHADER_OPCODE_TXF_UMS_LOGICAL: return "txf_ums_logical"; case SHADER_OPCODE_TXF_MCS: return "txf_mcs"; case SHADER_OPCODE_TXF_MCS_LOGICAL: return "txf_mcs_logical"; case SHADER_OPCODE_LOD: return "lod"; case SHADER_OPCODE_LOD_LOGICAL: return "lod_logical"; case SHADER_OPCODE_TG4: return "tg4"; case SHADER_OPCODE_TG4_LOGICAL: return "tg4_logical"; case SHADER_OPCODE_TG4_OFFSET: return "tg4_offset"; case SHADER_OPCODE_TG4_OFFSET_LOGICAL: return "tg4_offset_logical"; case SHADER_OPCODE_SAMPLEINFO: return "sampleinfo"; case SHADER_OPCODE_SAMPLEINFO_LOGICAL: return "sampleinfo_logical"; case SHADER_OPCODE_IMAGE_SIZE: return "image_size"; case SHADER_OPCODE_SHADER_TIME_ADD: return "shader_time_add"; case SHADER_OPCODE_UNTYPED_ATOMIC: return "untyped_atomic"; case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: return "untyped_atomic_logical"; case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: return "untyped_atomic_float"; case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: return "untyped_atomic_float_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_READ: return "untyped_surface_read"; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: return "untyped_surface_read_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: return "untyped_surface_write"; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: return "untyped_surface_write_logical"; case SHADER_OPCODE_TYPED_ATOMIC: return "typed_atomic"; case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: return "typed_atomic_logical"; case SHADER_OPCODE_TYPED_SURFACE_READ: return "typed_surface_read"; case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: return "typed_surface_read_logical"; case SHADER_OPCODE_TYPED_SURFACE_WRITE: return "typed_surface_write"; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return "typed_surface_write_logical"; case SHADER_OPCODE_MEMORY_FENCE: return "memory_fence"; case SHADER_OPCODE_INTERLOCK: /* For an interlock we actually issue a memory fence via sendc. */ return "interlock"; case SHADER_OPCODE_BYTE_SCATTERED_READ: return "byte_scattered_read"; case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: return "byte_scattered_read_logical"; case SHADER_OPCODE_BYTE_SCATTERED_WRITE: return "byte_scattered_write"; case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: return "byte_scattered_write_logical"; case SHADER_OPCODE_LOAD_PAYLOAD: return "load_payload"; case FS_OPCODE_PACK: return "pack"; case SHADER_OPCODE_GEN4_SCRATCH_READ: return "gen4_scratch_read"; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: return "gen4_scratch_write"; case SHADER_OPCODE_GEN7_SCRATCH_READ: return "gen7_scratch_read"; case SHADER_OPCODE_URB_WRITE_SIMD8: return "gen8_urb_write_simd8"; case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: return "gen8_urb_write_simd8_per_slot"; case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: return "gen8_urb_write_simd8_masked"; case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: return "gen8_urb_write_simd8_masked_per_slot"; case SHADER_OPCODE_URB_READ_SIMD8: return "urb_read_simd8"; case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: return "urb_read_simd8_per_slot"; case SHADER_OPCODE_FIND_LIVE_CHANNEL: return "find_live_channel"; case SHADER_OPCODE_BROADCAST: return "broadcast"; case SHADER_OPCODE_SHUFFLE: return "shuffle"; case SHADER_OPCODE_SEL_EXEC: return "sel_exec"; case SHADER_OPCODE_QUAD_SWIZZLE: return "quad_swizzle"; case SHADER_OPCODE_CLUSTER_BROADCAST: return "cluster_broadcast"; case SHADER_OPCODE_GET_BUFFER_SIZE: return "get_buffer_size"; case VEC4_OPCODE_MOV_BYTES: return "mov_bytes"; case VEC4_OPCODE_PACK_BYTES: return "pack_bytes"; case VEC4_OPCODE_UNPACK_UNIFORM: return "unpack_uniform"; case VEC4_OPCODE_DOUBLE_TO_F32: return "double_to_f32"; case VEC4_OPCODE_DOUBLE_TO_D32: return "double_to_d32"; case VEC4_OPCODE_DOUBLE_TO_U32: return "double_to_u32"; case VEC4_OPCODE_TO_DOUBLE: return "single_to_double"; case VEC4_OPCODE_PICK_LOW_32BIT: return "pick_low_32bit"; case VEC4_OPCODE_PICK_HIGH_32BIT: return "pick_high_32bit"; case VEC4_OPCODE_SET_LOW_32BIT: return "set_low_32bit"; case VEC4_OPCODE_SET_HIGH_32BIT: return "set_high_32bit"; case FS_OPCODE_DDX_COARSE: return "ddx_coarse"; case FS_OPCODE_DDX_FINE: return "ddx_fine"; case FS_OPCODE_DDY_COARSE: return "ddy_coarse"; case FS_OPCODE_DDY_FINE: return "ddy_fine"; case FS_OPCODE_LINTERP: return "linterp"; case FS_OPCODE_PIXEL_X: return "pixel_x"; case FS_OPCODE_PIXEL_Y: return "pixel_y"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return "uniform_pull_const"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: return "uniform_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: return "varying_pull_const_gen4"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: return "varying_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: return "varying_pull_const_logical"; case FS_OPCODE_DISCARD_JUMP: return "discard_jump"; case FS_OPCODE_SET_SAMPLE_ID: return "set_sample_id"; case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: return "unpack_half_2x16_split_x"; case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: return "unpack_half_2x16_split_y"; case FS_OPCODE_PLACEHOLDER_HALT: return "placeholder_halt"; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: return "interp_sample"; case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: return "interp_shared_offset"; case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: return "interp_per_slot_offset"; case VS_OPCODE_URB_WRITE: return "vs_urb_write"; case VS_OPCODE_PULL_CONSTANT_LOAD: return "pull_constant_load"; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: return "pull_constant_load_gen7"; case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: return "set_simd4x2_header_gen9"; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: return "unpack_flags_simd4x2"; case GS_OPCODE_URB_WRITE: return "gs_urb_write"; case GS_OPCODE_URB_WRITE_ALLOCATE: return "gs_urb_write_allocate"; case GS_OPCODE_THREAD_END: return "gs_thread_end"; case GS_OPCODE_SET_WRITE_OFFSET: return "set_write_offset"; case GS_OPCODE_SET_VERTEX_COUNT: return "set_vertex_count"; case GS_OPCODE_SET_DWORD_2: return "set_dword_2"; case GS_OPCODE_PREPARE_CHANNEL_MASKS: return "prepare_channel_masks"; case GS_OPCODE_SET_CHANNEL_MASKS: return "set_channel_masks"; case GS_OPCODE_GET_INSTANCE_ID: return "get_instance_id"; case GS_OPCODE_FF_SYNC: return "ff_sync"; case GS_OPCODE_SET_PRIMITIVE_ID: return "set_primitive_id"; case GS_OPCODE_SVB_WRITE: return "gs_svb_write"; case GS_OPCODE_SVB_SET_DST_INDEX: return "gs_svb_set_dst_index"; case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: return "gs_ff_sync_set_primitives"; case CS_OPCODE_CS_TERMINATE: return "cs_terminate"; case SHADER_OPCODE_BARRIER: return "barrier"; case SHADER_OPCODE_MULH: return "mulh"; case SHADER_OPCODE_MOV_INDIRECT: return "mov_indirect"; case VEC4_OPCODE_URB_READ: return "urb_read"; case TCS_OPCODE_GET_INSTANCE_ID: return "tcs_get_instance_id"; case TCS_OPCODE_URB_WRITE: return "tcs_urb_write"; case TCS_OPCODE_SET_INPUT_URB_OFFSETS: return "tcs_set_input_urb_offsets"; case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: return "tcs_set_output_urb_offsets"; case TCS_OPCODE_GET_PRIMITIVE_ID: return "tcs_get_primitive_id"; case TCS_OPCODE_CREATE_BARRIER_HEADER: return "tcs_create_barrier_header"; case TCS_OPCODE_SRC0_010_IS_ZERO: return "tcs_src0<0,1,0>_is_zero"; case TCS_OPCODE_RELEASE_INPUT: return "tcs_release_input"; case TCS_OPCODE_THREAD_END: return "tcs_thread_end"; case TES_OPCODE_CREATE_INPUT_READ_HEADER: return "tes_create_input_read_header"; case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: return "tes_add_indirect_urb_offset"; case TES_OPCODE_GET_PRIMITIVE_ID: return "tes_get_primitive_id"; case SHADER_OPCODE_RND_MODE: return "rnd_mode"; } unreachable("not reached"); } bool brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) { union { unsigned ud; int d; float f; double df; } imm, sat_imm = { 0 }; const unsigned size = type_sz(type); /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise * irrelevant, so just check the size of the type and copy from/to an * appropriately sized field. */ if (size < 8) imm.ud = reg->ud; else imm.df = reg->df; switch (type) { case BRW_REGISTER_TYPE_UD: case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UW: case BRW_REGISTER_TYPE_W: case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: /* Nothing to do. */ return false; case BRW_REGISTER_TYPE_F: sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); break; case BRW_REGISTER_TYPE_DF: sat_imm.df = CLAMP(imm.df, 0.0, 1.0); break; case BRW_REGISTER_TYPE_UB: case BRW_REGISTER_TYPE_B: unreachable("no UB/B immediates"); case BRW_REGISTER_TYPE_V: case BRW_REGISTER_TYPE_UV: case BRW_REGISTER_TYPE_VF: unreachable("unimplemented: saturate vector immediate"); case BRW_REGISTER_TYPE_HF: unreachable("unimplemented: saturate HF immediate"); case BRW_REGISTER_TYPE_NF: unreachable("no NF immediates"); } if (size < 8) { if (imm.ud != sat_imm.ud) { reg->ud = sat_imm.ud; return true; } } else { if (imm.df != sat_imm.df) { reg->df = sat_imm.df; return true; } } return false; } bool brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) { switch (type) { case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: reg->d = -reg->d; return true; case BRW_REGISTER_TYPE_W: case BRW_REGISTER_TYPE_UW: { uint16_t value = -(int16_t)reg->ud; reg->ud = value | (uint32_t)value << 16; return true; } case BRW_REGISTER_TYPE_F: reg->f = -reg->f; return true; case BRW_REGISTER_TYPE_VF: reg->ud ^= 0x80808080; return true; case BRW_REGISTER_TYPE_DF: reg->df = -reg->df; return true; case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: reg->d64 = -reg->d64; return true; case BRW_REGISTER_TYPE_UB: case BRW_REGISTER_TYPE_B: unreachable("no UB/B immediates"); case BRW_REGISTER_TYPE_UV: case BRW_REGISTER_TYPE_V: assert(!"unimplemented: negate UV/V immediate"); case BRW_REGISTER_TYPE_HF: reg->ud ^= 0x80008000; return true; case BRW_REGISTER_TYPE_NF: unreachable("no NF immediates"); } return false; } bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) { switch (type) { case BRW_REGISTER_TYPE_D: reg->d = abs(reg->d); return true; case BRW_REGISTER_TYPE_W: { uint16_t value = abs((int16_t)reg->ud); reg->ud = value | (uint32_t)value << 16; return true; } case BRW_REGISTER_TYPE_F: reg->f = fabsf(reg->f); return true; case BRW_REGISTER_TYPE_DF: reg->df = fabs(reg->df); return true; case BRW_REGISTER_TYPE_VF: reg->ud &= ~0x80808080; return true; case BRW_REGISTER_TYPE_Q: reg->d64 = imaxabs(reg->d64); return true; case BRW_REGISTER_TYPE_UB: case BRW_REGISTER_TYPE_B: unreachable("no UB/B immediates"); case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_UD: case BRW_REGISTER_TYPE_UW: case BRW_REGISTER_TYPE_UV: /* Presumably the absolute value modifier on an unsigned source is a * nop, but it would be nice to confirm. */ assert(!"unimplemented: abs unsigned immediate"); case BRW_REGISTER_TYPE_V: assert(!"unimplemented: abs V immediate"); case BRW_REGISTER_TYPE_HF: reg->ud &= ~0x80008000; return true; case BRW_REGISTER_TYPE_NF: unreachable("no NF immediates"); } return false; } backend_shader::backend_shader(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const nir_shader *shader, struct brw_stage_prog_data *stage_prog_data) : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), nir(shader), stage_prog_data(stage_prog_data), mem_ctx(mem_ctx), cfg(NULL), stage(shader->info.stage) { debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); stage_name = _mesa_shader_stage_to_string(stage); stage_abbrev = _mesa_shader_stage_to_abbrev(stage); } backend_shader::~backend_shader() { } bool backend_reg::equals(const backend_reg &r) const { return brw_regs_equal(this, &r) && offset == r.offset; } bool backend_reg::negative_equals(const backend_reg &r) const { return brw_regs_negative_equal(this, &r) && offset == r.offset; } bool backend_reg::is_zero() const { if (file != IMM) return false; switch (type) { case BRW_REGISTER_TYPE_F: return f == 0; case BRW_REGISTER_TYPE_DF: return df == 0; case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: return d == 0; case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: return u64 == 0; default: return false; } } bool backend_reg::is_one() const { if (file != IMM) return false; switch (type) { case BRW_REGISTER_TYPE_F: return f == 1.0f; case BRW_REGISTER_TYPE_DF: return df == 1.0; case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: return d == 1; case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: return u64 == 1; default: return false; } } bool backend_reg::is_negative_one() const { if (file != IMM) return false; switch (type) { case BRW_REGISTER_TYPE_F: return f == -1.0; case BRW_REGISTER_TYPE_DF: return df == -1.0; case BRW_REGISTER_TYPE_D: return d == -1; case BRW_REGISTER_TYPE_Q: return d64 == -1; default: return false; } } bool backend_reg::is_null() const { return file == ARF && nr == BRW_ARF_NULL; } bool backend_reg::is_accumulator() const { return file == ARF && nr == BRW_ARF_ACCUMULATOR; } bool backend_instruction::is_commutative() const { switch (opcode) { case BRW_OPCODE_AND: case BRW_OPCODE_OR: case BRW_OPCODE_XOR: case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: case SHADER_OPCODE_MULH: return true; case BRW_OPCODE_SEL: /* MIN and MAX are commutative. */ if (conditional_mod == BRW_CONDITIONAL_GE || conditional_mod == BRW_CONDITIONAL_L) { return true; } /* fallthrough */ default: return false; } } bool backend_instruction::is_3src(const struct gen_device_info *devinfo) const { return ::is_3src(devinfo, opcode); } bool backend_instruction::is_tex() const { return (opcode == SHADER_OPCODE_TEX || opcode == FS_OPCODE_TXB || opcode == SHADER_OPCODE_TXD || opcode == SHADER_OPCODE_TXF || opcode == SHADER_OPCODE_TXF_LZ || opcode == SHADER_OPCODE_TXF_CMS || opcode == SHADER_OPCODE_TXF_CMS_W || opcode == SHADER_OPCODE_TXF_UMS || opcode == SHADER_OPCODE_TXF_MCS || opcode == SHADER_OPCODE_TXL || opcode == SHADER_OPCODE_TXL_LZ || opcode == SHADER_OPCODE_TXS || opcode == SHADER_OPCODE_LOD || opcode == SHADER_OPCODE_TG4 || opcode == SHADER_OPCODE_TG4_OFFSET || opcode == SHADER_OPCODE_SAMPLEINFO); } bool backend_instruction::is_math() const { return (opcode == SHADER_OPCODE_RCP || opcode == SHADER_OPCODE_RSQ || opcode == SHADER_OPCODE_SQRT || opcode == SHADER_OPCODE_EXP2 || opcode == SHADER_OPCODE_LOG2 || opcode == SHADER_OPCODE_SIN || opcode == SHADER_OPCODE_COS || opcode == SHADER_OPCODE_INT_QUOTIENT || opcode == SHADER_OPCODE_INT_REMAINDER || opcode == SHADER_OPCODE_POW); } bool backend_instruction::is_control_flow() const { switch (opcode) { case BRW_OPCODE_DO: case BRW_OPCODE_WHILE: case BRW_OPCODE_IF: case BRW_OPCODE_ELSE: case BRW_OPCODE_ENDIF: case BRW_OPCODE_BREAK: case BRW_OPCODE_CONTINUE: return true; default: return false; } } bool backend_instruction::can_do_source_mods() const { switch (opcode) { case BRW_OPCODE_ADDC: case BRW_OPCODE_BFE: case BRW_OPCODE_BFI1: case BRW_OPCODE_BFI2: case BRW_OPCODE_BFREV: case BRW_OPCODE_CBIT: case BRW_OPCODE_FBH: case BRW_OPCODE_FBL: case BRW_OPCODE_SUBB: case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_CLUSTER_BROADCAST: case SHADER_OPCODE_MOV_INDIRECT: return false; default: return true; } } bool backend_instruction::can_do_saturate() const { switch (opcode) { case BRW_OPCODE_ADD: case BRW_OPCODE_ASR: case BRW_OPCODE_AVG: case BRW_OPCODE_DP2: case BRW_OPCODE_DP3: case BRW_OPCODE_DP4: case BRW_OPCODE_DPH: case BRW_OPCODE_F16TO32: case BRW_OPCODE_F32TO16: case BRW_OPCODE_LINE: case BRW_OPCODE_LRP: case BRW_OPCODE_MAC: case BRW_OPCODE_MAD: case BRW_OPCODE_MATH: case BRW_OPCODE_MOV: case BRW_OPCODE_MUL: case SHADER_OPCODE_MULH: case BRW_OPCODE_PLN: case BRW_OPCODE_RNDD: case BRW_OPCODE_RNDE: case BRW_OPCODE_RNDU: case BRW_OPCODE_RNDZ: case BRW_OPCODE_SEL: case BRW_OPCODE_SHL: case BRW_OPCODE_SHR: case FS_OPCODE_LINTERP: case SHADER_OPCODE_COS: case SHADER_OPCODE_EXP2: case SHADER_OPCODE_LOG2: case SHADER_OPCODE_POW: case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: case SHADER_OPCODE_SIN: case SHADER_OPCODE_SQRT: return true; default: return false; } } bool backend_instruction::can_do_cmod() const { switch (opcode) { case BRW_OPCODE_ADD: case BRW_OPCODE_ADDC: case BRW_OPCODE_AND: case BRW_OPCODE_ASR: case BRW_OPCODE_AVG: case BRW_OPCODE_CMP: case BRW_OPCODE_CMPN: case BRW_OPCODE_DP2: case BRW_OPCODE_DP3: case BRW_OPCODE_DP4: case BRW_OPCODE_DPH: case BRW_OPCODE_F16TO32: case BRW_OPCODE_F32TO16: case BRW_OPCODE_FRC: case BRW_OPCODE_LINE: case BRW_OPCODE_LRP: case BRW_OPCODE_LZD: case BRW_OPCODE_MAC: case BRW_OPCODE_MACH: case BRW_OPCODE_MAD: case BRW_OPCODE_MOV: case BRW_OPCODE_MUL: case BRW_OPCODE_NOT: case BRW_OPCODE_OR: case BRW_OPCODE_PLN: case BRW_OPCODE_RNDD: case BRW_OPCODE_RNDE: case BRW_OPCODE_RNDU: case BRW_OPCODE_RNDZ: case BRW_OPCODE_SAD2: case BRW_OPCODE_SADA2: case BRW_OPCODE_SHL: case BRW_OPCODE_SHR: case BRW_OPCODE_SUBB: case BRW_OPCODE_XOR: case FS_OPCODE_LINTERP: return true; default: return false; } } bool backend_instruction::reads_accumulator_implicitly() const { switch (opcode) { case BRW_OPCODE_MAC: case BRW_OPCODE_MACH: case BRW_OPCODE_SADA2: return true; default: return false; } } bool backend_instruction::writes_accumulator_implicitly(const struct gen_device_info *devinfo) const { return writes_accumulator || (devinfo->gen < 6 && ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) || (opcode == FS_OPCODE_LINTERP && (!devinfo->has_pln || devinfo->gen <= 6)); } bool backend_instruction::has_side_effects() const { switch (opcode) { case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_WRITE: case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_INTERLOCK: case SHADER_OPCODE_URB_WRITE_SIMD8: case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case FS_OPCODE_FB_WRITE: case FS_OPCODE_FB_WRITE_LOGICAL: case FS_OPCODE_REP_FB_WRITE: case SHADER_OPCODE_BARRIER: case TCS_OPCODE_URB_WRITE: case TCS_OPCODE_RELEASE_INPUT: case SHADER_OPCODE_RND_MODE: return true; default: return eot; } } bool backend_instruction::is_volatile() const { switch (opcode) { case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_READ: case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: case VEC4_OPCODE_URB_READ: return true; default: return false; } } #ifndef NDEBUG static bool inst_is_in_block(const bblock_t *block, const backend_instruction *inst) { bool found = false; foreach_inst_in_block (backend_instruction, i, block) { if (inst == i) { found = true; } } return found; } #endif static void adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) { for (bblock_t *block_iter = start_block->next(); block_iter; block_iter = block_iter->next()) { block_iter->start_ip += ip_adjustment; block_iter->end_ip += ip_adjustment; } } void backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) { assert(this != inst); if (!this->is_head_sentinel()) assert(inst_is_in_block(block, this) || !"Instruction not in block"); block->end_ip++; adjust_later_block_ips(block, 1); exec_node::insert_after(inst); } void backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) { assert(this != inst); if (!this->is_tail_sentinel()) assert(inst_is_in_block(block, this) || !"Instruction not in block"); block->end_ip++; adjust_later_block_ips(block, 1); exec_node::insert_before(inst); } void backend_instruction::insert_before(bblock_t *block, exec_list *list) { assert(inst_is_in_block(block, this) || !"Instruction not in block"); unsigned num_inst = list->length(); block->end_ip += num_inst; adjust_later_block_ips(block, num_inst); exec_node::insert_before(list); } void backend_instruction::remove(bblock_t *block) { assert(inst_is_in_block(block, this) || !"Instruction not in block"); adjust_later_block_ips(block, -1); if (block->start_ip == block->end_ip) { block->cfg->remove_block(block); } else { block->end_ip--; } exec_node::remove(); } void backend_shader::dump_instructions() { dump_instructions(NULL); } void backend_shader::dump_instructions(const char *name) { FILE *file = stderr; if (name && geteuid() != 0) { file = fopen(name, "w"); if (!file) file = stderr; } if (cfg) { int ip = 0; foreach_block_and_inst(block, backend_instruction, inst, cfg) { if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) fprintf(file, "%4d: ", ip++); dump_instruction(inst, file); } } else { int ip = 0; foreach_in_list(backend_instruction, inst, &instructions) { if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) fprintf(file, "%4d: ", ip++); dump_instruction(inst, file); } } if (file != stderr) { fclose(file); } } void backend_shader::calculate_cfg() { if (this->cfg) return; cfg = new(mem_ctx) cfg_t(&this->instructions); } extern "C" const unsigned * brw_compile_tes(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_tes_prog_key *key, const struct brw_vue_map *input_vue_map, struct brw_tes_prog_data *prog_data, const nir_shader *src_shader, struct gl_program *prog, int shader_time_index, char **error_str) { const struct gen_device_info *devinfo = compiler->devinfo; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; const unsigned *assembly; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); nir->info.inputs_read = key->inputs_read; nir->info.patch_inputs_read = key->patch_inputs_read; nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); brw_nir_lower_tes_inputs(nir, input_vue_map); brw_nir_lower_vue_outputs(nir); nir = brw_postprocess_nir(nir, compiler, is_scalar); brw_compute_vue_map(devinfo, &prog_data->base.vue_map, nir->info.outputs_written, nir->info.separate_shader); unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; assert(output_size_bytes >= 1); if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) { if (error_str) *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); return NULL; } prog_data->base.clip_distance_mask = ((1 << nir->info.clip_distance_array_size) - 1); prog_data->base.cull_distance_mask = ((1 << nir->info.cull_distance_array_size) - 1) << nir->info.clip_distance_array_size; /* URB entry sizes are stored as a multiple of 64 bytes. */ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; /* On Cannonlake software shall not program an allocation size that * specifies a size that is a multiple of 3 64B (512-bit) cachelines. */ if (devinfo->gen == 10 && prog_data->base.urb_entry_size % 3 == 0) prog_data->base.urb_entry_size++; prog_data->base.urb_read_length = 0; STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL == TESS_SPACING_FRACTIONAL_ODD - 1); STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL == TESS_SPACING_FRACTIONAL_EVEN - 1); prog_data->partitioning = (enum brw_tess_partitioning) (nir->info.tess.spacing - 1); switch (nir->info.tess.primitive_mode) { case GL_QUADS: prog_data->domain = BRW_TESS_DOMAIN_QUAD; break; case GL_TRIANGLES: prog_data->domain = BRW_TESS_DOMAIN_TRI; break; case GL_ISOLINES: prog_data->domain = BRW_TESS_DOMAIN_ISOLINE; break; default: unreachable("invalid domain shader primitive mode"); } if (nir->info.tess.point_mode) { prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; } else if (nir->info.tess.primitive_mode == GL_ISOLINES) { prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; } else { /* Hardware winding order is backwards from OpenGL */ prog_data->output_topology = nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; } if (unlikely(INTEL_DEBUG & DEBUG_TES)) { fprintf(stderr, "TES Input "); brw_print_vue_map(stderr, input_vue_map); fprintf(stderr, "TES Output "); brw_print_vue_map(stderr, &prog_data->base.vue_map); } if (is_scalar) { fs_visitor v(compiler, log_data, mem_ctx, (void *) key, &prog_data->base.base, NULL, nir, 8, shader_time_index, input_vue_map); if (!v.run_tes()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_generator g(compiler, log_data, mem_ctx, &prog_data->base.base, v.promoted_constants, false, MESA_SHADER_TESS_EVAL); if (unlikely(INTEL_DEBUG & DEBUG_TES)) { g.enable_debug(ralloc_asprintf(mem_ctx, "%s tessellation evaluation shader %s", nir->info.label ? nir->info.label : "unnamed", nir->info.name)); } g.generate_code(v.cfg, 8); assembly = g.get_assembly(); } else { brw::vec4_tes_visitor v(compiler, log_data, key, prog_data, nir, mem_ctx, shader_time_index); if (!v.run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); return NULL; } if (unlikely(INTEL_DEBUG & DEBUG_TES)) v.dump_instructions(); assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, &prog_data->base, v.cfg); } return assembly; } @ 1.1.1.1 log @from maya: Import mesa 18.3.4. Mesa 18.3.4 implements the OpenGL 4.5 API. Some drivers don't support all the features required in OpenGL 4.5. @ text @@ 1.1.1.2 log @Import mesa 19.1.7 New features in mesa 19.1.0: GL_ARB_parallel_shader_compile on all drivers. GL_EXT_gpu_shader4 on all GL 3.1 drivers. GL_EXT_shader_image_load_formatted on radeonsi. GL_EXT_texture_buffer_object on all GL 3.1 drivers. GL_EXT_texture_compression_s3tc_srgb on Gallium drivers and i965 (ES extension). GL_NV_compute_shader_derivatives on iris and i965. GL_KHR_parallel_shader_compile on all drivers. VK_EXT_buffer_device_address on Intel and RADV. VK_EXT_depth_clip_enable on Intel and RADV. VK_KHR_ycbcr_image_arrays on Intel. VK_EXT_inline_uniform_block on Intel and RADV. VK_EXT_external_memory_host on Intel. VK_EXT_host_query_reset on Intel and RADV. VK_KHR_surface_protected_capabilities on Intel and RADV. VK_EXT_pipeline_creation_feedback on Intel and RADV. VK_KHR_8bit_storage on RADV. VK_AMD_gpu_shader_int16 on RADV. VK_AMD_gpu_shader_half_float on RADV. VK_NV_compute_shader_derivatives on Intel. VK_KHR_shader_float16_int8 on Intel and RADV (RADV only supports int8). VK_KHR_shader_atomic_int64 on Intel. VK_EXT_descriptor_indexing on Intel. VK_KHR_shader_float16_int8 on Intel and RADV. GL_INTEL_conservative_rasterization on iris. VK_EXT_memory_budget on Intel. New features in mesa 19.0.0: GL_AMD_texture_texture4 on all GL 4.0 drivers. GL_EXT_shader_implicit_conversions on all drivers (ES extension). GL_EXT_texture_compression_bptc on all GL 4.0 drivers (ES extension). GL_EXT_texture_compression_rgtc on all GL 3.0 drivers (ES extension). GL_EXT_render_snorm on gallium drivers (ES extension). GL_EXT_texture_view on drivers supporting texture views (ES extension). GL_OES_texture_view on drivers supporting texture views (ES extension). GL_NV_shader_atomic_float on nvc0 (Fermi/Kepler only). Shader-based software implementations of GL_ARB_gpu_shader_fp64, GL_ARB_gpu_shader_int64, GL_ARB_vertex_attrib_64bit, and GL_ARB_shader_ballot on i965. VK_ANDROID_external_memory_android_hardware_buffer on Intel Fixed and re-exposed VK_EXT_pci_bus_info on Intel and RADV VK_EXT_scalar_block_layout on Intel and RADV VK_KHR_depth_stencil_resolve on Intel VK_KHR_draw_indirect_count on Intel VK_EXT_conditional_rendering on Intel VK_EXT_memory_budget on RADV Also, bug fixes. @ text @d29 1 a29 1 #include "dev/gen_debug.h" a57 1 case GLSL_TYPE_INTERFACE: d75 1 d132 1 a132 2 brw_texture_offset(const nir_tex_instr *tex, unsigned src, uint32_t *offset_bits_out) d134 1 a134 2 if (!nir_src_is_const(tex->src[src].src)) return false; d136 4 a139 1 const unsigned num_components = nir_tex_instr_src_size(tex, src); d147 1 a147 1 uint32_t offset_bits = 0; a148 6 int offset = nir_src_comp_as_int(tex->src[src].src, i); /* offset out of bounds; caller will handle it. */ if (offset > 7 || offset < -8) return false; d150 1 a150 1 offset_bits |= (offset << shift) & (0xF << shift); a151 3 *offset_bits_out = offset_bits; a208 3 case SHADER_OPCODE_SEND: return "send"; d270 2 a271 2 case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: return "image_size_logical"; d276 1 a276 1 case VEC4_OPCODE_UNTYPED_ATOMIC: d280 2 d284 1 a284 1 case VEC4_OPCODE_UNTYPED_SURFACE_READ: d288 1 a288 1 case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: d292 2 a293 14 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: return "a64_untyped_read_logical"; case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: return "a64_untyped_write_logical"; case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: return "a64_byte_scattered_read_logical"; case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: return "a64_byte_scattered_write_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: return "a64_untyped_atomic_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: return "a64_untyped_atomic_int64_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: return "a64_untyped_atomic_float_logical"; d296 2 d300 2 d310 2 d314 2 d405 2 d418 4 a710 2 assert(type_sz(type) > 1); a711 3 case BRW_REGISTER_TYPE_HF: assert((d & 0xffff) == ((d >> 16) & 0xffff)); return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000; a715 4 case BRW_REGISTER_TYPE_W: case BRW_REGISTER_TYPE_UW: assert((d & 0xffff) == ((d >> 16) & 0xffff)); return (d & 0xffff) == 0; a732 2 assert(type_sz(type) > 1); a733 3 case BRW_REGISTER_TYPE_HF: assert((d & 0xffff) == ((d >> 16) & 0xffff)); return (d & 0xffff) == 0x3c00; a737 4 case BRW_REGISTER_TYPE_W: case BRW_REGISTER_TYPE_UW: assert((d & 0xffff) == ((d >> 16) & 0xffff)); return (d & 0xffff) == 1; a754 2 assert(type_sz(type) > 1); a755 3 case BRW_REGISTER_TYPE_HF: assert((d & 0xffff) == ((d >> 16) & 0xffff)); return (d & 0xffff) == 0xbc00; a759 3 case BRW_REGISTER_TYPE_W: assert((d & 0xffff) == ((d >> 16) & 0xffff)); return (d & 0xffff) == 0xffff; d1004 1 a1004 4 case SHADER_OPCODE_SEND: return send_has_side_effects; case VEC4_OPCODE_UNTYPED_ATOMIC: d1006 1 d1009 1 a1009 1 case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: d1011 1 a1011 5 case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: d1013 1 d1015 1 d1040 1 a1040 4 case SHADER_OPCODE_SEND: return send_is_volatile; case VEC4_OPCODE_UNTYPED_SURFACE_READ: d1042 1 d1044 1 a1045 2 case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: d1192 1 a1192 1 nir_shader *nir, d1201 1 @ 1.1.1.3 log @initial import of mesa 21.3.7 main changes since 19.1.7 include: - more support for Vulkan functions - better supported for newer radeonsi (both amdgpu and radeon backends) - various bug fixes in many drivers - many fixes and enhancements for intel drivers - some fixes for nvidia - OpenGL 4.6 for some drivers (intel, radeonsi) - intel Tigerlake and Rocketlake support - Vulkan 1.2 for some drivers - OpenGL 4.5, GLES 3.2, and more on llvmpipe - working Panfrost and Midgard drivers - fix warnings in radeonsi vs newer llvm @ text @d29 1 a29 1 #include "dev/intel_debug.h" d164 1 a164 1 brw_instruction_name(const struct intel_device_info *devinfo, enum opcode op) d167 2 a168 2 case 0 ... NUM_BRW_OPCODES - 1: /* The DO instruction doesn't exist on Gfx6+, but we use it to mark the d171 1 a171 1 if (devinfo->ver >= 6 && op == BRW_OPCODE_DO) d174 1 a174 1 /* The following conversion opcodes doesn't exist on Gfx8+, but we use d177 1 a177 1 if (devinfo->ver > 7 && op == BRW_OPCODE_F32TO16) d180 1 a180 1 if (devinfo->ver > 7 && op == BRW_OPCODE_F16TO32) a219 3 case SHADER_OPCODE_UNDEF: return "undef"; a300 6 case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL: return "oword_block_read_logical"; case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: return "unaligned_oword_block_read_logical"; case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: return "oword_block_write_logical"; a302 6 case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL: return "a64_oword_block_read_logical"; case SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL: return "a64_unaligned_oword_block_read_logical"; case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: return "a64_oword_block_write_logical"; a310 2 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: return "a64_untyped_atomic_int16_logical"; d313 2 a314 6 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: return "a64_untyped_atomic_float16_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: return "a64_untyped_atomic_float32_logical"; case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: return "a64_untyped_atomic_float64_logical"; a322 2 case FS_OPCODE_SCHEDULING_FENCE: return "scheduling_fence"; a330 4 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: return "dword_scattered_read_logical"; case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: return "dword_scattered_write_logical"; d337 6 a342 8 case SHADER_OPCODE_GFX4_SCRATCH_READ: return "gfx4_scratch_read"; case SHADER_OPCODE_GFX4_SCRATCH_WRITE: return "gfx4_scratch_write"; case SHADER_OPCODE_GFX7_SCRATCH_READ: return "gfx7_scratch_read"; case SHADER_OPCODE_SCRATCH_HEADER: return "scratch_header"; d344 1 a344 1 return "gfx8_urb_write_simd8"; d346 1 a346 1 return "gfx8_urb_write_simd8_per_slot"; d348 1 a348 1 return "gfx8_urb_write_simd8_masked"; d350 1 a350 1 return "gfx8_urb_write_simd8_masked_per_slot"; a357 3 case FS_OPCODE_LOAD_LIVE_CHANNELS: return "load_live_channels"; a393 4 case VEC4_OPCODE_MOV_FOR_SCRATCH: return "mov_for_scratch"; case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: return "zero_oob_push_regs"; d414 4 a417 4 case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7: return "uniform_pull_const_gfx7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: return "varying_pull_const_gfx4"; d421 3 d430 2 a431 2 case SHADER_OPCODE_HALT_TARGET: return "halt_target"; d444 5 a448 2 case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7: return "pull_constant_load_gfx7"; a486 4 case SHADER_OPCODE_ISUB_SAT: return "isub_sat"; case SHADER_OPCODE_USUB_SAT: return "usub_sat"; a488 2 case SHADER_OPCODE_MOV_RELOC_IMM: return "mov_reloc_imm"; a516 3 case RT_OPCODE_TRACE_RAY_LOGICAL: return "rt_trace_ray_logical"; a518 8 case SHADER_OPCODE_FLOAT_CONTROL_MODE: return "float_control_mode"; case SHADER_OPCODE_GET_DSS_ID: return "get_dss_id"; case SHADER_OPCODE_BTD_SPAWN_LOGICAL: return "btd_spawn_logical"; case SHADER_OPCODE_BTD_RETIRE_LOGICAL: return "btd_retire_logical"; d555 1 a555 1 sat_imm.f = SATURATE(imm.f); d558 1 a558 1 sat_imm.df = SATURATE(imm.df); d681 1 a681 2 struct brw_stage_prog_data *stage_prog_data, bool debug_enabled) d688 2 a689 3 cfg(NULL), idom_analysis(this), stage(shader->info.stage), debug_enabled(debug_enabled) d691 1 a822 1 case BRW_OPCODE_ADD3: d832 1 a832 1 FALLTHROUGH; d839 1 a839 1 backend_instruction::is_3src(const struct intel_device_info *devinfo) const a897 13 backend_instruction::uses_indirect_addressing() const { switch (opcode) { case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_CLUSTER_BROADCAST: case SHADER_OPCODE_MOV_INDIRECT: return true; default: return false; } } bool a908 2 case BRW_OPCODE_ROL: case BRW_OPCODE_ROR: a909 1 case BRW_OPCODE_DP4A: a912 3 case SHADER_OPCODE_SHUFFLE: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: a923 1 case BRW_OPCODE_ADD3: a925 1 case BRW_OPCODE_CSEL: a929 1 case BRW_OPCODE_DP4A: a967 1 case BRW_OPCODE_ADD3: d1023 1 a1023 1 backend_instruction::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const d1026 1 a1026 1 (devinfo->ver < 6 && d1030 1 a1030 2 (!devinfo->has_pln || devinfo->ver <= 6)) || (eot && devinfo->ver >= 12); /* See Wa_14010017096. */ a1039 1 case BRW_OPCODE_SYNC: d1043 1 a1043 1 case SHADER_OPCODE_GFX4_SCRATCH_WRITE: a1048 1 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL: d1050 1 a1050 3 case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT64_LOGICAL: a1051 1 case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: a1066 8 case SHADER_OPCODE_FLOAT_CONTROL_MODE: case FS_OPCODE_SCHEDULING_FENCE: case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL: case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: case SHADER_OPCODE_BTD_SPAWN_LOGICAL: case SHADER_OPCODE_BTD_RETIRE_LOGICAL: case RT_OPCODE_TRACE_RAY_LOGICAL: case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: a1083 1 case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: d1099 1 d1101 3 a1103 2 if (inst == i) return true; d1105 1 a1105 1 return false; a1123 1 assert(block->end_ip_delta == 0); a1138 1 assert(block->end_ip_delta == 0); a1153 1 assert(block->end_ip_delta == 0); d1165 1 a1165 1 backend_instruction::remove(bblock_t *block, bool defer_later_block_ip_updates) d1169 1 a1169 6 if (defer_later_block_ip_updates) { block->end_ip_delta--; } else { assert(block->end_ip_delta == 0); adjust_later_block_ips(block, -1); } a1171 5 if (block->end_ip_delta != 0) { adjust_later_block_ips(block, block->end_ip_delta); block->end_ip_delta = 0; } d1181 1 a1181 1 backend_shader::dump_instructions() const d1187 1 a1187 1 backend_shader::dump_instructions(const char *name) const d1199 1 a1199 1 if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) d1206 1 a1206 1 if (!INTEL_DEBUG(DEBUG_OPTIMIZER)) d1222 1 a1222 7 cfg = new(mem_ctx) cfg_t(this, &this->instructions); } void backend_shader::invalidate_analysis(brw::analysis_dependency_class c) { idom_analysis.invalidate(c); d1233 1 a1234 1 struct brw_compile_stats *stats, d1237 1 a1237 1 const struct intel_device_info *devinfo = compiler->devinfo; a1238 1 const bool debug_enabled = INTEL_DEBUG(DEBUG_TES); a1240 2 prog_data->base.base.stage = MESA_SHADER_TESS_EVAL; d1244 1 a1244 1 brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); d1247 1 a1247 2 brw_postprocess_nir(nir, compiler, is_scalar, debug_enabled, key->base.robust_buffer_access); d1251 1 a1251 1 nir->info.separate_shader, 1); d1256 1 a1256 1 if (output_size_bytes > GFX7_MAX_DS_URB_ENTRY_SIZE_BYTES) { d1271 7 d1314 1 a1314 1 if (unlikely(debug_enabled)) { d1316 1 a1316 1 brw_print_vue_map(stderr, input_vue_map, MESA_SHADER_TESS_EVAL); d1318 1 a1318 2 brw_print_vue_map(stderr, &prog_data->base.vue_map, MESA_SHADER_TESS_EVAL); d1322 3 a1324 3 fs_visitor v(compiler, log_data, mem_ctx, &key->base, &prog_data->base.base, nir, 8, shader_time_index, debug_enabled); d1335 3 a1337 2 &prog_data->base.base, false, MESA_SHADER_TESS_EVAL); if (unlikely(debug_enabled)) { d1345 1 a1345 4 g.generate_code(v.cfg, 8, v.shader_stats, v.performance_analysis.require(), stats); g.add_const_data(nir->constant_data, nir->constant_data_size); d1350 1 a1350 1 nir, mem_ctx, shader_time_index, debug_enabled); d1357 1 a1357 1 if (unlikely(debug_enabled)) d1361 1 a1361 3 &prog_data->base, v.cfg, v.performance_analysis.require(), stats, debug_enabled); @