diff options
author | Wladimir J. van der Laan <laanwj@gmail.com> | 2013-05-02 19:50:50 +0200 |
---|---|---|
committer | Lucas Stach <l.stach@pengutronix.de> | 2014-07-28 12:32:37 +0200 |
commit | a0b500800e114c931344bdbe07818ee2b142dc9a (patch) | |
tree | dab7232aab023ec0a8cae957f7853e87751c0864 | |
parent | efe8cb1e5374e17fdb04e7cf2025093cfa2d4845 (diff) | |
download | mesa-a0b500800e114c931344bdbe07818ee2b142dc9a.tar.gz mesa-a0b500800e114c931344bdbe07818ee2b142dc9a.tar.xz |
add etna driver for Vivante GCxxxx embedded GPUs
57 files changed, 8814 insertions, 80 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000000..b4d288125d --- /dev/null +++ b/README.md @@ -0,0 +1,179 @@ +Etnaviv Mesa fork +================= + +Open source GLES1/2 driver for Vivante GPU hardware. It is capable of running most of the glmark tests +and some games. +This driver has been used to run glquake and d2x and other GLES games so it should be fairly stable. +There may still be quite a few rendering bugs, specific bug reports are very welcome. + +This is being maintained as a set of patches on top of the Mesa main repository. Expect frequent rebases +while in this phase of development. + +Build instructions +------------------- + +To be written. + +My configure script for cubox: +```bash +#!/bin/bash +DIR=... # path to target headers and libraries +ETNAVIV_BASE="${HOME}/projects/etna_viv" +ETNAVIV_LIB="${ETNAVIV_BASE}/native/etnaviv" +ETNAVIV_INC="${ETNAVIV_BASE}/native" + +export TARGET="arm-linux-gnueabihf" +export CFLAGS="-I${DIR}/cubox/include -I${ETNAVIV_INC}" +export CXXFLAGS="-I${DIR}/cubox/include -I${ETNAVIV_INC}" +export LDFLAGS="-L${DIR}/cubox/lib -L${ETNAVIV_LIB}" +export LIBDRM_LIBS="-L${DIR}/cubox/lib -ldrm" + +export ETNA_LIBS="-letnaviv" # important! +export LIBTOOL_FOR_BUILD="/usr/bin/libtool" # important! + +./configure --target=${TARGET} --host=${TARGET} \ + --enable-gles2 --enable-gles1 --disable-glx --enable-egl --enable-dri \ + --with-gallium-drivers=swrast,etna --with-egl-platforms=fbdev \ + --enable-gallium-egl --enable-debug --with-dri-drivers= +``` + +- The etna gallium driver uses `libetnaviv.a` and its headers from the + `etna_viv` project (https://github.com/laanwj/etna_viv) for access to the kernel driver and register descriptions. + *You only need to build libetnaviv by running `make` in `native/etnaviv`*. The rest is part of the test + and reverse engineering framework, and not needed for the driver. + +```bash +export GCABI=v2/v4/dove/imx6/... +# rest of cross-compile target settings +cd native/etnaviv +make +``` + +Mesa cross compiling +--------------------- +- libexpat and libdrm need to be available on the target (neither is used at the moment, but they are +dependencies for Mesa). +In many cases these can be copied from the device, after installing the appropriate development package. + +Setup +=================== + +I use this script to set up the framebuffer console for (double or single buffered) rendering, +as well as prevent blanking and avoid screen corruption by hiding the cursor. + + #!/bin/bash + # Set to usable resolution (double buffered) + fbset 1280x1024-60 -vyres 2048 -depth 32 + # Set to usable resolution (single buffered) + #fbset 1280x1024-60 -vyres 1024 -depth 32 + + # Disable automatic blanking + echo -e '\033[9;0]' > /dev/tty1 + echo 0 > /sys/class/graphics/fb0/blank + + # Disable blinking cursor + echo -e '\033[?17;0;0c' > /dev/tty1 + +Switching between Etna en Swrast +-------------------------------- +Frequently it is useful to compare the rendering from etna to the software rasterizer; +this can be done with the environment variable `EGL_FBDEV_DRIVER`, i.e. + + # Run with etna driver + export EGL_FBDEV_DRIVER=etna + (run EGL demo...) + + # Run with software rasterizer + export EGL_FBDEV_DRIVER=swrast + (run EGL demo...) + +Force single buffering +----------------------- + +To force single buffering (without wait for vsync) use the following: + + export EGL_FBDEV_BUFFERS=1 + +This can be useful for testing or benchmarking. + +Testing +==================== + +This section lists some tests and demos that can be used to exercise the driver. + +Mesatest +------------- +A few testcases that I made especially for this driver, based on the samples from the OpenGL ES 2.0 programming +guide (http://www.opengles-book.com/) can be found here: + +https://github.com/laanwj/mesatest_gles + +Glmark2 +-------------- +Some of the Glmark2 demos already run with this driver, but this is a work in progress. + +Need a special glmark2 with fbdev support, which can be got here: + +https://code.launchpad.net/~laanwj/glmark2/fbdev + +Fetch: + + bzr branch lp:~laanwj/glmark2/fbdev + +Build: + + ./waf configure --with-flavors=fbdev-glesv2 --data-path=${PWD}/data + ./waf + +Run: + + cd build/src + ./glmark2-es2 -b shading -s 1280x1024 --visual-config alpha=0 + +Support matrix (cubox, v2, gc600): + + OK: + [Scene] build -> renders + [Scene] shading -> renders + [Scene] texture -> renders + [Scene] effect2d -> renders + [Scene] bump -> renders + [Scene] desktop -> renders + [Scene] clear -> shows nothing (is not supposed to, either) + [Scene] pulsar -> renders + [Scene] conditionals -> renders + [Scene] function -> renders + [Scene] jellyfish -> renders on GPUs with SQRT_TRIG + + Corrupted: + [Scene] shadow -> rendering corrupted + [Scene] buffer -> renders, but lines are not right + + Shader assertion: + [Scene] ideas -> missing instruction KILP + [Scene] loop -> missing loops support + + Crash: + [Scene] refract -> memory full + [Scene] terrain -> memory full / shader too long + +Mesa demos +------------- + +Mesa also comes with a few OpenGL ES 1 and 2 demos. These can be found in the following repository: + + git://anongit.freedesktop.org/mesa/demos + +All the demos in `src/egl/opengles1` and `src/egl/opengles2` with fbdev and screen backend work. + +OpenGL ES 1: + +- eglfbdev +- drawtex_screen +- gears_screen +- torus_screen +- tri_screen + +OpenGL ES 2: + +- es2gears_screen diff --git a/configure.ac b/configure.ac index 2aaba6169c..d725f05771 100644 --- a/configure.ac +++ b/configure.ac @@ -132,6 +132,8 @@ AX_GCC_BUILTIN([__builtin_bswap64]) AM_CONDITIONAL([GEN_ASM_OFFSETS], test "x$GEN_ASM_OFFSETS" = xyes) +AC_ARG_VAR(LIBTOOL_FOR_BUILD, Path to the libtool instance to be used for building native executables during cross compilation.) + dnl Make sure the pkg-config macros are defined m4_ifndef([PKG_PROG_PKG_CONFIG], [m4_fatal([Could not locate the pkg-config autoconf macros. @@ -1433,6 +1435,9 @@ AC_SUBST([LLVM_INCLUDEDIR]) AC_SUBST([LLVM_VERSION]) AC_SUBST([CLANG_RESOURCE_DIR]) +AC_SUBST([ETNA_LIBS]) +AC_SUBST([ETNA_CFLAGS]) + case "x$enable_opengl$enable_gles1$enable_gles2" in x*yes*) EGL_CLIENT_APIS="$EGL_CLIENT_APIS "'$(GL_LIB)' @@ -1885,6 +1890,13 @@ if test -n "$with_gallium_drivers"; then gallium_check_st "freedreno/drm" "dri-freedreno" "" "" "" DRICOMMON_NEED_LIBDRM=yes ;; + xetna) + HAVE_GALLIUM_ETNA=yes + #PKG_CHECK_MODULES([FREEDRENO], [libdrm_freedreno >= $LIBDRM_FREEDRENO_REQUIRED]) + #gallium_require_drm_loader + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS etna" + #gallium_check_st "freedreno/drm" "dri-freedreno" "" "" "" "" + ;; xswrast) HAVE_GALLIUM_SOFTPIPE=yes GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe" @@ -1954,6 +1966,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes) +AM_CONDITIONAL(HAVE_GALLIUM_ETNA, test "x$HAVE_GALLIUM_ETNA" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes) @@ -2091,6 +2104,7 @@ AC_CONFIG_FILES([Makefile src/gallium/drivers/softpipe/Makefile src/gallium/drivers/svga/Makefile src/gallium/drivers/trace/Makefile + src/gallium/drivers/etna/Makefile src/gallium/state_trackers/Makefile src/gallium/state_trackers/clover/Makefile src/gallium/state_trackers/dri/Makefile @@ -2135,6 +2149,8 @@ AC_CONFIG_FILES([Makefile src/gallium/tests/trivial/Makefile src/gallium/tests/unit/Makefile src/gallium/winsys/Makefile + src/gallium/winsys/etna/drm/Makefile + src/gallium/winsys/etna/fbdev/Makefile src/gallium/winsys/freedreno/drm/Makefile src/gallium/winsys/i915/drm/Makefile src/gallium/winsys/i915/sw/Makefile diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index 0c9c349e00..49cc6683d7 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -184,6 +184,10 @@ struct u_vbuf { uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */ /* Which buffer has a non-zero stride. */ uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */ + /* Which buffers are allowed (supported by hardware). */ + uint32_t allowed_vb_mask; + /* Incompatible index buffer */ + uint32_t incompatible_ib_mask; }; static void * @@ -218,6 +222,10 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps) screen->is_format_supported(screen, PIPE_FORMAT_R32_SSCALED, PIPE_BUFFER, 0, PIPE_BIND_VERTEX_BUFFER); + caps->index_uint32 = + screen->is_format_supported(screen, PIPE_FORMAT_I32_UINT, PIPE_BUFFER, + 0, PIPE_BIND_INDEX_BUFFER); + caps->buffer_offset_unaligned = !screen->get_param(screen, PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY); @@ -232,6 +240,9 @@ void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps) caps->user_vertex_buffers = screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS); + + caps->max_vertex_buffers = + screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS); } struct u_vbuf * @@ -246,6 +257,7 @@ u_vbuf_create(struct pipe_context *pipe, mgr->cso_cache = cso_cache_create(); mgr->translate_cache = translate_cache_create(); memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs)); + mgr->allowed_vb_mask = (1 << mgr->caps.max_vertex_buffers) - 1; mgr->uploader = u_upload_create(pipe, 1024 * 1024, 4, PIPE_BIND_VERTEX_BUFFER); @@ -455,14 +467,15 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key, static boolean u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr, - unsigned mask[VB_NUM]) + unsigned mask[VB_NUM], + uint32_t extra_free_vb_mask) { unsigned type; unsigned fallback_vbs[VB_NUM]; /* Set the bit for each buffer which is incompatible, or isn't set. */ uint32_t unused_vb_mask = - mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | - ~mgr->enabled_vb_mask; + (mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask | + ~mgr->enabled_vb_mask | extra_free_vb_mask) & mgr->allowed_vb_mask; memset(fallback_vbs, ~0, sizeof(fallback_vbs)); @@ -504,7 +517,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, unsigned i, type; unsigned incompatible_vb_mask = mgr->incompatible_vb_mask & mgr->ve->used_vb_mask; - + uint32_t extra_free_vb_mask = 0; int start[VB_NUM] = { start_vertex, /* VERTEX */ start_instance, /* INSTANCE */ @@ -546,11 +559,17 @@ u_vbuf_translate_begin(struct u_vbuf *mgr, mask[VB_VERTEX] |= 1 << vb_index; } } - assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]); + /* In the case of unroll_indices, we can regard all non-constant + * vertex buffers with only non-instance vertex elements as incompatible + * and thus free. + */ + if(unroll_indices) + extra_free_vb_mask = mask[VB_VERTEX] & ~mask[VB_INSTANCE]; + /* Find free vertex buffer slots. */ - if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) { + if (!u_vbuf_translate_find_free_vb_slots(mgr, mask, extra_free_vb_mask)) { return FALSE; } @@ -767,6 +786,18 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, } } + if(used_buffers & ~mgr->allowed_vb_mask) + { + /* More vertex buffers are used than the hardware supports. In + * principle, we only need to make sure that less vertex buffers are + * used, and mark some of the latter vertex buffers as incompatible. + * For now, mark all vertex buffers as incompatible. + */ + ve->incompatible_vb_mask_any = used_buffers; + ve->compatible_vb_mask_any = 0; + ve->incompatible_elem_mask = (1<<count)-1; + } + ve->used_vb_mask = used_buffers; ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers; ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers; @@ -778,8 +809,12 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count, } } - ve->driver_cso = - pipe->create_vertex_elements_state(pipe, count, driver_attribs); + /* Only create driver CSO if no incompatible elements */ + if(!ve->incompatible_elem_mask) + { + ve->driver_cso = + pipe->create_vertex_elements_state(pipe, count, driver_attribs); + } return ve; } @@ -890,8 +925,10 @@ void u_vbuf_set_index_buffer(struct u_vbuf *mgr, assert(ib->offset % ib->index_size == 0); pipe_resource_reference(&mgr->index_buffer.buffer, ib->buffer); memcpy(&mgr->index_buffer, ib, sizeof(*ib)); + mgr->incompatible_ib_mask = !mgr->caps.index_uint32 && ib->index_size==4; } else { pipe_resource_reference(&mgr->index_buffer.buffer, NULL); + mgr->incompatible_ib_mask = 0; } pipe->set_index_buffer(pipe, ib); @@ -996,7 +1033,8 @@ static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr) return (mgr->ve->used_vb_mask & ((mgr->user_vb_mask | mgr->incompatible_vb_mask | mgr->ve->incompatible_vb_mask_any) & - mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0; + mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0 || + mgr->incompatible_ib_mask; } static boolean u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf *mgr) @@ -1168,11 +1206,13 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) /* Primitive restart doesn't work when unrolling indices. * We would have to break this drawing operation into several ones. */ /* Use some heuristic to see if unrolling indices improves - * performance. */ - if (!info->primitive_restart && + * performance. Force unroll indices always if the index format is + * incompatible (don't support primitive restart in this case...). */ + if ((!info->primitive_restart && num_vertices > info->count*2 && num_vertices-info->count > 32 && - !u_vbuf_mapping_vertex_buffer_blocks(mgr)) { + !u_vbuf_mapping_vertex_buffer_blocks(mgr)) + || mgr->incompatible_ib_mask) { /*printf("num_vertices=%i count=%i\n", num_vertices, info->count);*/ unroll_indices = TRUE; user_vb_mask &= ~(mgr->nonzero_stride_vb_mask & diff --git a/src/gallium/auxiliary/util/u_vbuf.h b/src/gallium/auxiliary/util/u_vbuf.h index a608184e59..1a878c4d8b 100644 --- a/src/gallium/auxiliary/util/u_vbuf.h +++ b/src/gallium/auxiliary/util/u_vbuf.h @@ -49,6 +49,9 @@ struct u_vbuf_caps { unsigned format_norm32:1; /* PIPE_FORMAT_*32*NORM */ unsigned format_scaled32:1; /* PIPE_FORMAT_*32*SCALED */ + /* Index format CAPs. */ + unsigned index_uint32:1; /* PIPE_FORMAT_I32_UINT */ + /* Whether vertex fetches don't have to be 4-byte-aligned. */ /* TRUE if hardware supports it. */ unsigned buffer_offset_unaligned:1; @@ -57,6 +60,9 @@ struct u_vbuf_caps { /* Whether the driver supports user vertex buffers. */ unsigned user_vertex_buffers:1; + + /* Maximum number of vertex buffers */ + unsigned max_vertex_buffers:7; }; diff --git a/src/gallium/drivers/Makefile.am b/src/gallium/drivers/Makefile.am index f8baa3cf92..c2cd60e7ca 100644 --- a/src/gallium/drivers/Makefile.am +++ b/src/gallium/drivers/Makefile.am @@ -85,6 +85,14 @@ endif ################################################################################ +if HAVE_GALLIUM_ETNA + +SUBDIRS += etna + +endif + +################################################################################ + if NEED_GALLIUM_SOFTPIPE_DRIVER SUBDIRS += softpipe diff --git a/src/gallium/drivers/etna/Makefile.am b/src/gallium/drivers/etna/Makefile.am new file mode 100644 index 0000000000..9777938a82 --- /dev/null +++ b/src/gallium/drivers/etna/Makefile.am @@ -0,0 +1,34 @@ +# Copyright © 2013 W.J. van der Laan +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +noinst_LTLIBRARIES = libetna.la + +AM_CPPFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/include \ + $(GALLIUM_CFLAGS) \ + $(NOUVEAU_CFLAGS) + +libetna_la_SOURCES = $(C_SOURCES) $(CPP_SOURCES) diff --git a/src/gallium/drivers/etna/Makefile.sources b/src/gallium/drivers/etna/Makefile.sources new file mode 100644 index 0000000000..a74ae86847 --- /dev/null +++ b/src/gallium/drivers/etna/Makefile.sources @@ -0,0 +1,16 @@ +C_SOURCES := etna_asm.c \ + etna_blend.c \ + etna_clear_blit.c \ + etna_compiler.c \ + etna_fence.c \ + etna_pipe.c \ + etna_rasterizer.c \ + etna_resource.c \ + etna_screen.c \ + etna_shader.c \ + etna_surface.c \ + etna_texture.c \ + etna_transfer.c \ + etna_zsa.c + +CPP_SOURCES := diff --git a/src/gallium/drivers/etna/etna_asm.c b/src/gallium/drivers/etna/etna_asm.c new file mode 100644 index 0000000000..624da4e551 --- /dev/null +++ b/src/gallium/drivers/etna/etna_asm.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "etna_asm.h" +#include "etna_debug.h" + +#include <etnaviv/isa.xml.h> + +/* Return whether the rgroup is one of the uniforms */ +static inline int rgroup_is_uniform(unsigned rgroup) +{ + return rgroup == INST_RGROUP_UNIFORM_0 || + rgroup == INST_RGROUP_UNIFORM_1; +} + +/** An instruction can only read from one distinct uniform. + * This function verifies this property and returns true if the instruction + * is deemed correct and false otherwise. + */ +static bool check_uniforms(const struct etna_inst *inst) +{ + unsigned uni_rgroup = -1; + unsigned uni_reg = -1; + bool conflict = false; + for(int src=0; src<3; ++src) + { + if(rgroup_is_uniform(inst->src[src].rgroup)) + { + if(uni_reg == -1) /* first uniform used */ + { + uni_rgroup = inst->src[src].rgroup; + uni_reg = inst->src[src].reg; + } else { /* second or later; check that it is a re-use */ + if(uni_rgroup != inst->src[src].rgroup || + uni_reg != inst->src[src].reg) + { + conflict = true; + } + } + } + } + return !conflict; +} + +int etna_assemble(uint32_t *out, const struct etna_inst *inst) +{ + if(inst->imm && inst->src[2].use) + return 1; /* cannot have both src2 and imm */ + + if(!check_uniforms(inst)) + { + DBG("warning: generating instruction that accesses two different uniforms"); + } + + out[0] = VIV_ISA_WORD_0_OPCODE(inst->opcode) | + VIV_ISA_WORD_0_COND(inst->cond) | + (inst->sat ? VIV_ISA_WORD_0_SAT : 0) | + (inst->dst.use ? VIV_ISA_WORD_0_DST_USE : 0) | + VIV_ISA_WORD_0_DST_AMODE(inst->dst.amode) | + VIV_ISA_WORD_0_DST_REG(inst->dst.reg) | + VIV_ISA_WORD_0_DST_COMPS(inst->dst.comps) | + VIV_ISA_WORD_0_TEX_ID(inst->tex.id); + out[1] = VIV_ISA_WORD_1_TEX_AMODE(inst->tex.amode) | + VIV_ISA_WORD_1_TEX_SWIZ(inst->tex.swiz) | + (inst->src[0].use ? VIV_ISA_WORD_1_SRC0_USE : 0) | + VIV_ISA_WORD_1_SRC0_REG(inst->src[0].reg) | + VIV_ISA_WORD_1_SRC0_SWIZ(inst->src[0].swiz) | + (inst->src[0].neg ? VIV_ISA_WORD_1_SRC0_NEG : 0) | + (inst->src[0].abs ? VIV_ISA_WORD_1_SRC0_ABS : 0); + out[2] = VIV_ISA_WORD_2_SRC0_AMODE(inst->src[0].amode) | + VIV_ISA_WORD_2_SRC0_RGROUP(inst->src[0].rgroup) | + (inst->src[1].use ? VIV_ISA_WORD_2_SRC1_USE : 0) | + VIV_ISA_WORD_2_SRC1_REG(inst->src[1].reg) | + VIV_ISA_WORD_2_SRC1_SWIZ(inst->src[1].swiz) | + (inst->src[1].neg ? VIV_ISA_WORD_2_SRC1_NEG : 0) | + (inst->src[1].abs ? VIV_ISA_WORD_2_SRC1_ABS : 0) | + VIV_ISA_WORD_2_SRC1_AMODE(inst->src[1].amode); + out[3] = VIV_ISA_WORD_3_SRC1_RGROUP(inst->src[1].rgroup) | + (inst->src[2].use ? VIV_ISA_WORD_3_SRC2_USE : 0) | + VIV_ISA_WORD_3_SRC2_REG(inst->src[2].reg) | + VIV_ISA_WORD_3_SRC2_SWIZ(inst->src[2].swiz) | + (inst->src[2].neg ? VIV_ISA_WORD_3_SRC2_NEG : 0) | + (inst->src[2].abs ? VIV_ISA_WORD_3_SRC2_ABS : 0) | + VIV_ISA_WORD_3_SRC2_AMODE(inst->src[2].amode) | + VIV_ISA_WORD_3_SRC2_RGROUP(inst->src[2].rgroup); + out[3] |= VIV_ISA_WORD_3_SRC2_IMM(inst->imm); + return 0; +} + +int etna_assemble_set_imm(uint32_t *out, uint32_t imm) +{ + out[3] |= VIV_ISA_WORD_3_SRC2_IMM(imm); + return 0; +} diff --git a/src/gallium/drivers/etna/etna_asm.h b/src/gallium/drivers/etna/etna_asm.h new file mode 100644 index 0000000000..3a4d101404 --- /dev/null +++ b/src/gallium/drivers/etna/etna_asm.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Utilities for generating low-level ISA instructions */ +#ifndef H_ETNA_ASM +#define H_ETNA_ASM +#include <stdint.h> + +/* Size of an instruction in 32-bit words */ +#define ETNA_INST_SIZE (4) +/* Number of source operands per instruction */ +#define ETNA_NUM_SRC (3) + +/*** operands ***/ + +/* destination operand */ +struct etna_inst_dst +{ + unsigned use:1; /* 0: not in use, 1: in use */ + unsigned amode:3; /* INST_AMODE_* */ + unsigned reg:7; /* register number 0..127 */ + unsigned comps:4; /* INST_COMPS_* */ +}; + +/* texture operand */ +struct etna_inst_tex +{ + unsigned id:5; /* sampler id */ + unsigned amode:3; /* INST_AMODE_* */ + unsigned swiz:8; /* INST_SWIZ */ +}; + +/* source operand */ +struct etna_inst_src +{ + unsigned use:1; /* 0: not in use, 1: in use */ + unsigned reg:9; /* register or uniform number 0..511 */ + unsigned swiz:8; /* INST_SWIZ */ + unsigned neg:1; /* negate (flip sign) if set */ + unsigned abs:1; /* absolute (remove sign) if set */ + unsigned amode:3; /* INST_AMODE_* */ + unsigned rgroup:3; /* INST_RGROUP_* */ +}; + +/*** instruction ***/ +struct etna_inst +{ + uint8_t opcode; /* INST_OPCODE_* */ + unsigned cond:5; /* INST_CONDITION_* */ + unsigned sat:1; /* saturate result between 0..1 */ + struct etna_inst_dst dst; /* destination operand */ + struct etna_inst_tex tex; /* texture operand */ + struct etna_inst_src src[ETNA_NUM_SRC]; /* source operand */ + unsigned imm; /* takes place of src[2] for BRANCH/CALL */ +}; + +/** + * Build vivante instruction from structure with + * opcode, cond, sat, dst_use, dst_amode, + * dst_reg, dst_comps, tex_id, tex_amode, tex_swiz, + * src[0-2]_reg, use, swiz, neg, abs, amode, rgroup, + * imm + * + * Return 0 if succesful, and a non-zero + * value otherwise. + */ +int etna_assemble(uint32_t *out, const struct etna_inst *inst); + +/** + * Set field imm of already-assembled instruction. + * This is used for filling in jump destinations in a separate pass. + */ +int etna_assemble_set_imm(uint32_t *out, uint32_t imm); + +#endif diff --git a/src/gallium/drivers/etna/etna_blend.c b/src/gallium/drivers/etna/etna_blend.c new file mode 100644 index 0000000000..4387bba37b --- /dev/null +++ b/src/gallium/drivers/etna/etna_blend.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Blending CSOs */ +#include "etna_blend.h" + +#include "etna_internal.h" +#include "etna_pipe.h" +#include "etna_translate.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include <etnaviv/common.xml.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> + +static void *etna_pipe_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *bs) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_blend_state *cs = CALLOC_STRUCT(compiled_blend_state); + const struct pipe_rt_blend_state *rt0 = &bs->rt[0]; + bool enable = rt0->blend_enable && !(rt0->rgb_src_factor == PIPE_BLENDFACTOR_ONE && rt0->rgb_dst_factor == PIPE_BLENDFACTOR_ZERO && + rt0->alpha_src_factor == PIPE_BLENDFACTOR_ONE && rt0->alpha_dst_factor == PIPE_BLENDFACTOR_ZERO); + bool separate_alpha = enable && !(rt0->rgb_src_factor == rt0->alpha_src_factor && + rt0->rgb_dst_factor == rt0->alpha_dst_factor); + bool full_overwrite = (rt0->colormask == 15) && !enable; + if(enable) + { + cs->PE_ALPHA_CONFIG = + VIVS_PE_ALPHA_CONFIG_BLEND_ENABLE_COLOR | + (separate_alpha ? VIVS_PE_ALPHA_CONFIG_BLEND_SEPARATE_ALPHA : 0) | + VIVS_PE_ALPHA_CONFIG_SRC_FUNC_COLOR(translate_blend_factor(rt0->rgb_src_factor)) | + VIVS_PE_ALPHA_CONFIG_SRC_FUNC_ALPHA(translate_blend_factor(rt0->alpha_src_factor)) | + VIVS_PE_ALPHA_CONFIG_DST_FUNC_COLOR(translate_blend_factor(rt0->rgb_dst_factor)) | + VIVS_PE_ALPHA_CONFIG_DST_FUNC_ALPHA(translate_blend_factor(rt0->alpha_dst_factor)) | + VIVS_PE_ALPHA_CONFIG_EQ_COLOR(translate_blend(rt0->rgb_func)) | + VIVS_PE_ALPHA_CONFIG_EQ_ALPHA(translate_blend(rt0->alpha_func)); + } else { + cs->PE_ALPHA_CONFIG = 0; + } + /* XXX should colormask be used if enable==false? */ + cs->PE_COLOR_FORMAT = + VIVS_PE_COLOR_FORMAT_COMPONENTS(rt0->colormask) | + (full_overwrite ? VIVS_PE_COLOR_FORMAT_OVERWRITE : 0); + cs->PE_LOGIC_OP = + VIVS_PE_LOGIC_OP_OP(bs->logicop_enable ? bs->logicop_func : LOGIC_OP_COPY) /* 1-to-1 mapping */ | + 0x000E4000 /* ??? */; + /* independent_blend_enable not needed: only one rt supported */ + /* XXX alpha_to_coverage / alpha_to_one? */ + /* XXX dither? VIVS_PE_DITHER(...) and/or VIVS_RS_DITHER(...) on resolve */ + if(bs->dither) + { + cs->PE_DITHER[0] = 0x6e4ca280; + cs->PE_DITHER[1] = 0x5d7f91b3; + } else { + cs->PE_DITHER[0] = 0xffffffff; + cs->PE_DITHER[1] = 0xffffffff; + } + return cs; +} + +static void etna_pipe_bind_blend_state(struct pipe_context *pipe, void *bs) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + priv->dirty_bits |= ETNA_STATE_BLEND; + priv->blend_p = bs; + if(bs) + priv->blend = *(struct compiled_blend_state*)bs; +} + +static void etna_pipe_delete_blend_state(struct pipe_context *pipe, void *bs) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + FREE(bs); +} + +void etna_pipe_blend_init(struct pipe_context *pc) +{ + pc->create_blend_state = etna_pipe_create_blend_state; + pc->bind_blend_state = etna_pipe_bind_blend_state; + pc->delete_blend_state = etna_pipe_delete_blend_state; +} diff --git a/src/gallium/drivers/etna/etna_blend.h b/src/gallium/drivers/etna/etna_blend.h new file mode 100644 index 0000000000..9be9e9c6f7 --- /dev/null +++ b/src/gallium/drivers/etna/etna_blend.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Blending CSOs */ +#ifndef H_ETNA_BLEND +#define H_ETNA_BLEND + +struct pipe_context; + +void etna_pipe_blend_init(struct pipe_context *pipe); + +#endif diff --git a/src/gallium/drivers/etna/etna_clear_blit.c b/src/gallium/drivers/etna/etna_clear_blit.c new file mode 100644 index 0000000000..0b0793a1e0 --- /dev/null +++ b/src/gallium/drivers/etna/etna_clear_blit.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Clearing and blitting functionality */ +#include "etna_clear_blit.h" + +#include "etna_internal.h" +#include "etna_pipe.h" +#include "etna_resource.h" +#include "etna_translate.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_blitter.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_surface.h" + +#include <etnaviv/common.xml.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> + +/* Save current state for blitter operation */ +static void etna_pipe_blit_save_state(struct pipe_context *pipe) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + util_blitter_save_vertex_buffer_slot(priv->blitter, &priv->vertex_buffer_s[0]); + util_blitter_save_vertex_elements(priv->blitter, priv->vertex_elements_p); + util_blitter_save_vertex_shader(priv->blitter, priv->vs); + util_blitter_save_rasterizer(priv->blitter, priv->rasterizer_p); + util_blitter_save_viewport(priv->blitter, &priv->viewport_s); + util_blitter_save_scissor(priv->blitter, &priv->scissor_s); + util_blitter_save_fragment_shader(priv->blitter, priv->fs); + util_blitter_save_blend(priv->blitter, priv->blend_p); + util_blitter_save_depth_stencil_alpha(priv->blitter, priv->depth_stencil_alpha_p); + util_blitter_save_stencil_ref(priv->blitter, &priv->stencil_ref_s); + util_blitter_save_sample_mask(priv->blitter, priv->sample_mask_s); + util_blitter_save_framebuffer(priv->blitter, &priv->framebuffer_s); + util_blitter_save_fragment_sampler_states(priv->blitter, + priv->num_fragment_samplers, + (void **)priv->sampler); + util_blitter_save_fragment_sampler_views(priv->blitter, + priv->num_fragment_sampler_views, priv->sampler_view_s); +} + +/* Generate clear command for a surface (non-TS case) */ +void etna_rs_gen_clear_surface(struct compiled_rs_state *rs_state, struct etna_surface *surf, uint32_t clear_value) +{ + uint bs = util_format_get_blocksize(surf->base.format); + uint format = 0; + switch(bs) + { + case 2: format = RS_FORMAT_A1R5G5B5; break; + case 4: format = RS_FORMAT_A8R8G8B8; break; + default: printf("etna_rs_gen_clear_surface: Unhandled clear blocksize: %i (fmt %i)\n", bs, surf->base.format); + format = RS_FORMAT_A8R8G8B8; + assert(0); + } + /* use tiled clear if width is multiple of 16 */ + bool tiled_clear = (surf->surf.padded_width & ETNA_RS_WIDTH_MASK) == 0 && + (surf->surf.padded_height & ETNA_RS_HEIGHT_MASK) == 0; + etna_compile_rs_state(rs_state, &(struct rs_state){ + .source_format = format, + .dest_format = format, + .dest_addr = surf->surf.address, + .dest_stride = surf->surf.stride, + .dest_tiling = tiled_clear ? surf->layout : ETNA_LAYOUT_LINEAR, + .dither = {0xffffffff, 0xffffffff}, + .width = surf->surf.padded_width, /* These must be padded to 16x4 if !LINEAR, otherwise RS will hang */ + .height = surf->surf.padded_height, + .clear_value = {clear_value}, + .clear_mode = VIVS_RS_CLEAR_CONTROL_MODE_ENABLED1, + .clear_bits = 0xffff + }); +} + +static void etna_pipe_clear(struct pipe_context *pipe, + unsigned buffers, + const union pipe_color_union *color, + double depth, + unsigned stencil) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + /* Flush color and depth cache before clearing anything. + * This is especially important when coming from another surface, as otherwise it may clear + * part of the old surface instead. + */ + etna_set_state(priv->ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH); + etna_stall(priv->ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); + /* Flush the TS. This must be done after flushing color and depth, otherwise it can result in crashes + * at least on cubox. */ + etna_set_state(priv->ctx, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH); /* XXX only needed if cbuf or zbuf has TS */ + /* No need to set up the TS here with sync_context. + * RS clear operations (in contrast to resolve and copy) do not require the TS state. + */ + /* Need to update clear command in non-TS (fast clear) case *if* + * clear value is different from previous time. + */ + if(buffers & PIPE_CLEAR_COLOR) + { + for(int idx=0; idx<priv->framebuffer_s.nr_cbufs; ++idx) + { + struct etna_surface *surf = etna_surface(priv->framebuffer_s.cbufs[idx]); + uint32_t new_clear_value = translate_clear_color(surf->base.format, &color[idx]); + if(surf->surf.ts_address) /* TS: use precompiled clear command */ + { + if(unlikely(priv->framebuffer.TS_COLOR_CLEAR_VALUE != new_clear_value)) + { + priv->framebuffer.TS_COLOR_CLEAR_VALUE = new_clear_value; + priv->dirty_bits |= ETNA_STATE_TS; + } + } + else if(unlikely(new_clear_value != surf->level->clear_value)) /* Queue normal RS clear for non-TS surfaces */ + { + etna_rs_gen_clear_surface(&surf->clear_command, surf, new_clear_value); + } + etna_submit_rs_state(priv->ctx, &surf->clear_command); + surf->level->clear_value = new_clear_value; + } + } + if((buffers & PIPE_CLEAR_DEPTHSTENCIL) && priv->framebuffer_s.zsbuf != NULL) + { + struct etna_surface *surf = etna_surface(priv->framebuffer_s.zsbuf); + uint32_t new_clear_value = translate_clear_depth_stencil(surf->base.format, depth, stencil); + if(surf->surf.ts_address) /* TS: use precompiled clear command */ + { + if(unlikely(priv->framebuffer.TS_COLOR_CLEAR_VALUE != new_clear_value)) + { + priv->framebuffer.TS_DEPTH_CLEAR_VALUE = new_clear_value; + priv->dirty_bits |= ETNA_STATE_TS; + } + } else if(unlikely(new_clear_value != surf->level->clear_value)) /* Queue normal RS clear for non-TS surfaces */ + { + etna_rs_gen_clear_surface(&surf->clear_command, surf, new_clear_value); + } + etna_submit_rs_state(priv->ctx, &surf->clear_command); + surf->level->clear_value = new_clear_value; + } + etna_stall(priv->ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); +} + +static void etna_pipe_clear_render_target(struct pipe_context *pipe, + struct pipe_surface *dst, + const union pipe_color_union *color, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + /* XXX could fall back to RS when target area is full screen / resolveable and no TS. */ + etna_pipe_blit_save_state(pipe); + util_blitter_clear_render_target(priv->blitter, dst, color, dstx, dsty, width, height); +} + +static void etna_pipe_clear_depth_stencil(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned clear_flags, + double depth, + unsigned stencil, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + /* XXX could fall back to RS when target area is full screen / resolveable and no TS. */ + etna_pipe_blit_save_state(pipe); + util_blitter_clear_depth_stencil(priv->blitter, dst, clear_flags, depth, stencil, dstx, dsty, width, height); +} + +static void etna_pipe_resource_copy_region(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + /* The resource must be of the same format. */ + assert(src->format == dst->format); + /* Resources with nr_samples > 1 are not allowed. */ + assert(src->nr_samples == 1 && dst->nr_samples == 1); + /* XXX we can use the RS as a literal copy engine here + * the only complexity is tiling; the size of the boxes needs to be aligned to the tile size + * how to handle the case where a resource is copied from/to a non-aligned position? + * from non-aligned: can fall back to rendering-based copy? + * to non-aligned: can fall back to rendering-based copy? + * XXX this goes wrong when source surface is supertiled. + */ + etna_pipe_blit_save_state(pipe); + util_blitter_copy_texture(priv->blitter, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box, + PIPE_MASK_RGBA, false); + etna_resource_touch(pipe, dst); + etna_resource_touch(pipe, src); +} + +static void etna_pipe_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) +{ + /* This is a more extended version of resource_copy_region */ + /* TODO Some cases can be handled by RS; if not, fall back to rendering */ + /* copy block of pixels from info->src to info->dst (resource, level, box, format); + * function is used for scaling, flipping in x and y direction (negative width/height), format conversion, mask and filter + * and even a scissor rectangle + * + * What can the RS do for us: + * convert between tiling formats (layouts) + * downsample 2x in x and y + * convert between a limited number of pixel formats + * + * For the rest, fall back to util_blitter + * XXX this goes wrong when source surface is supertiled. + */ + struct pipe_blit_info info = *blit_info; + struct etna_pipe_context *priv = etna_pipe_context(pipe); + if (info.src.resource->nr_samples > 1 && + info.dst.resource->nr_samples <= 1 && + !util_format_is_depth_or_stencil(info.src.resource->format) && + !util_format_is_pure_integer(info.src.resource->format)) { + DBG("color resolve unimplemented"); + return; + } + if (util_try_blit_via_copy_region(pipe, blit_info)) { + return; /* done */ + } + if (info.mask & PIPE_MASK_S) { + DBG("cannot blit stencil, skipping"); + info.mask &= ~PIPE_MASK_S; + } + + if (!util_blitter_is_blit_supported(priv->blitter, &info)) { + DBG("blit unsupported %s -> %s", + util_format_short_name(info.src.resource->format), + util_format_short_name(info.dst.resource->format)); + return; + } + + etna_pipe_blit_save_state(pipe); + util_blitter_blit(priv->blitter, &info); + etna_resource_touch(pipe, info.src.resource); + etna_resource_touch(pipe, info.dst.resource); +} + + +void etna_pipe_clear_blit_init(struct pipe_context *pc) +{ + struct etna_pipe_context *priv = etna_pipe_context(pc); + pc->clear = etna_pipe_clear; + pc->clear_render_target = etna_pipe_clear_render_target; + pc->clear_depth_stencil = etna_pipe_clear_depth_stencil; + pc->resource_copy_region = etna_pipe_resource_copy_region; + pc->blit = etna_pipe_blit; + + priv->blitter = util_blitter_create(pc); +} + +void etna_pipe_clear_blit_destroy(struct pipe_context *pc) +{ + struct etna_pipe_context *priv = etna_pipe_context(pc); + if (priv->blitter) + util_blitter_destroy(priv->blitter); +} + diff --git a/src/gallium/drivers/etna/etna_clear_blit.h b/src/gallium/drivers/etna/etna_clear_blit.h new file mode 100644 index 0000000000..097c307b5b --- /dev/null +++ b/src/gallium/drivers/etna/etna_clear_blit.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Clearing and blitting functionality */ +#ifndef H_ETNA_CLEAR_BLIT +#define H_ETNA_CLEAR_BLIT + +#include <stdint.h> + +struct pipe_context; +struct etna_surface; +struct compiled_rs_state; + +void etna_rs_gen_clear_surface(struct compiled_rs_state *rs_state, struct etna_surface *surf, uint32_t clear_value); + +void etna_pipe_clear_blit_init(struct pipe_context *pipe); + +void etna_pipe_clear_blit_destroy(struct pipe_context *pipe); + +#endif + + diff --git a/src/gallium/drivers/etna/etna_compiler.c b/src/gallium/drivers/etna/etna_compiler.c new file mode 100644 index 0000000000..d37e5515ff --- /dev/null +++ b/src/gallium/drivers/etna/etna_compiler.c @@ -0,0 +1,1825 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* TGSI->Vivante shader ISA conversion */ + +/* What does the compiler return (see etna_shader_object)? + * 1) instruction data + * 2) input-to-temporary mapping (fixed for ps) + * *) in case of ps, semantic -> varying id mapping + * *) for each varying: number of components used (r, rg, rgb, rgba) + * 3) temporary-to-output mapping (in case of vs, fixed for ps) + * 4) for each input/output: possible semantic (position, color, glpointcoord, ...) + * 5) immediates base offset, immediates data + * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to configure the hw, but useful + * for error checking + * 7) enough information to add the z=(z+w)/2.0 necessary for older chips (output reg id is enough) + * + * Empty shaders are not allowed, should always at least generate a NOP. Also if there is a label + * at the end of the shader, an extra NOP should be generated as jump target. + * + * TODO + * * Allow loops + * * Use an instruction scheduler + * * Avoid using more than one uniform in one instruction (can be used in multiple arguments) + */ +#include "etna_compiler.h" +#include "etna_asm.h" +#include "etna_internal.h" +#include "etna_debug.h" + +#include "tgsi/tgsi_iterate.h" +#include "tgsi/tgsi_strings.h" +#include "tgsi/tgsi_util.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include <etnaviv/etna.h> +#include <etnaviv/etna_util.h> +#include <etnaviv/isa.xml.h> +#include <etnaviv/state_3d.xml.h> + +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +/* Broadcast swizzle to all four components */ +#define INST_SWIZ_BROADCAST(x) \ + (INST_SWIZ_X(x) | INST_SWIZ_Y(x) | INST_SWIZ_Z(x) | INST_SWIZ_W(x)) +/* Identity (NOP) swizzle */ +#define INST_SWIZ_IDENTITY \ + (INST_SWIZ_X(0) | INST_SWIZ_Y(1) | INST_SWIZ_Z(2) | INST_SWIZ_W(3)) + +/* Compose two swizzles (computes swz1.swz2) */ +static inline uint32_t inst_swiz_compose(uint32_t swz1, uint32_t swz2) +{ + return INST_SWIZ_X((swz1 >> (((swz2 >> 0)&3)*2))&3) | + INST_SWIZ_Y((swz1 >> (((swz2 >> 2)&3)*2))&3) | + INST_SWIZ_Z((swz1 >> (((swz2 >> 4)&3)*2))&3) | + INST_SWIZ_W((swz1 >> (((swz2 >> 6)&3)*2))&3); +} + +/* Native register description structure */ +struct etna_native_reg +{ + unsigned valid:1; + unsigned is_tex:1; /* is texture unit, overrides rgroup */ + unsigned rgroup:3; + unsigned id:9; +}; + +struct etna_reg_desc +{ + enum tgsi_file_type file; /* IN, OUT, TEMP, ... */ + int idx; /* index into file */ + bool active; /* used in program */ + int first_use; /* instruction id of first use (scope begin) */ + int last_use; /* instruction id of last use (scope end, inclusive) */ + + struct etna_native_reg native; /* native register to map to */ + unsigned usage_mask:4; /* usage, per channel */ + bool has_semantic; /* register has associated TGSI semantic */ + struct tgsi_declaration_semantic semantic; /* TGSI semantic */ + struct tgsi_declaration_interp interp; /* Interpolation type */ +}; + +/* Label information structure */ +struct etna_compile_label +{ + int inst_idx; /* Instruction id that label points to */ +}; + +enum etna_compile_frame_type { + ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */ +}; + +/* nesting scope frame (LOOP, IF, ...) during compilation + */ +struct etna_compile_frame +{ + enum etna_compile_frame_type type; + struct etna_compile_label *lbl_else; + struct etna_compile_label *lbl_endif; +}; + +/* scratch area for compiling shader, freed after compilation finishes */ +struct etna_compile_data +{ + uint processor; /* TGSI_PROCESSOR_... */ + + /* Register descriptions, per TGSI file, per register index */ + struct etna_reg_desc *file[TGSI_FILE_COUNT]; + + /* Number of registers in each TGSI file (max register+1) */ + uint file_size[TGSI_FILE_COUNT]; + + /* Keep track of TGSI register declarations */ + struct etna_reg_desc decl[ETNA_MAX_DECL]; + uint total_decls; + + /* Bitmap of dead instructions which are removed in a separate pass */ + bool dead_inst[ETNA_MAX_TOKENS]; /* mark dead input instructions */ + + /* Immediate data */ + uint32_t imm_data[ETNA_MAX_IMM]; + uint32_t imm_base; /* base of immediates (in 32 bit units) */ + uint32_t imm_size; /* size of immediates (in 32 bit units) */ + + /* Next free native register, for register allocation */ + uint32_t next_free_native; + + /* Temporary register for use within translated TGSI instruction, + * only allocated when needed. + */ + struct etna_native_reg inner_temp; + + /* Fields for handling nested conditionals */ + struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH]; + int frame_sp; + struct etna_compile_label *lbl_usage[ETNA_MAX_INSTRUCTIONS]; /* label usage reference, per instruction */ + struct etna_compile_label labels[ETNA_MAX_LABELS]; /* XXX use subheap allocation */ + int num_labels; + + /* Code generation */ + int inst_ptr; /* current instruction pointer */ + uint32_t code[ETNA_MAX_INSTRUCTIONS*4]; + + /* I/O */ + + /* Number of varyings (PS only) */ + int num_varyings; + + /* GPU hardware specs */ + const struct etna_pipe_specs *specs; +}; + +/** Register allocation **/ +enum reg_sort_order +{ + FIRST_USE_ASC, + FIRST_USE_DESC, + LAST_USE_ASC, + LAST_USE_DESC +}; + +/* Augmented register description for sorting */ +struct sort_rec +{ + struct etna_reg_desc *ptr; + int key; +}; + +static int sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b) +{ + if(a->key < b->key) return -1; + if(a->key > b->key) return 1; + return 0; +} + +/* create an index on a register set based on certain criteria. */ +static int sort_registers( + struct sort_rec *sorted, + struct etna_reg_desc *regs, + int count, + enum reg_sort_order so) +{ + /* pre-populate keys from active registers */ + int ptr = 0; + for(int idx=0; idx<count; ++idx) + { + /* only interested in active registers now; will only assign inactive ones if no + * space in active ones */ + if(regs[idx].active) + { + sorted[ptr].ptr = ®s[idx]; + switch(so) + { + case FIRST_USE_ASC: sorted[ptr].key = regs[idx].first_use; break; + case LAST_USE_ASC: sorted[ptr].key = regs[idx].last_use; break; + case FIRST_USE_DESC: sorted[ptr].key = -regs[idx].first_use; break; + case LAST_USE_DESC: sorted[ptr].key = -regs[idx].last_use; break; + } + ptr++; + } + } + /* sort index by key */ + qsort(sorted, ptr, sizeof(struct sort_rec), (int (*) (const void *, const void *))sort_rec_compar); + return ptr; +} + +/* Allocate a new, unused, native temp register */ +static struct etna_native_reg alloc_new_native_reg(struct etna_compile_data *cd) +{ + assert(cd->next_free_native < ETNA_MAX_TEMPS); + int rv = cd->next_free_native; + cd->next_free_native++; + return (struct etna_native_reg){ .valid=1, .rgroup=INST_RGROUP_TEMP, .id=rv }; +} + +/* assign TEMPs to native registers */ +static void assign_temporaries_to_native(struct etna_compile_data *cd, struct etna_reg_desc *temps, int num_temps) +{ + for(int idx=0; idx<num_temps; ++idx) + { + temps[idx].native = alloc_new_native_reg(cd); + } +} + +/* assign inputs and outputs to temporaries + * Gallium assumes that the hardware has separate registers for taking input and output, + * however Vivante GPUs use temporaries both for passing in inputs and passing back outputs. + * Try to re-use temporary registers where possible. + */ +static void assign_inouts_to_temporaries(struct etna_compile_data *cd, uint file) +{ + bool mode_inputs = (file == TGSI_FILE_INPUT); + int inout_ptr = 0, num_inouts; + int temp_ptr = 0, num_temps; + struct sort_rec inout_order[ETNA_MAX_TEMPS]; + struct sort_rec temps_order[ETNA_MAX_TEMPS]; + num_inouts = sort_registers(inout_order, + cd->file[file], cd->file_size[file], + mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC); + num_temps = sort_registers(temps_order, + cd->file[TGSI_FILE_TEMPORARY], cd->file_size[TGSI_FILE_TEMPORARY], + mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC); + + while(inout_ptr < num_inouts && temp_ptr < num_temps) + { + struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; + struct etna_reg_desc *temp = temps_order[temp_ptr].ptr; + if(!inout->active || inout->native.valid) /* Skip if already a native register assigned */ + { + inout_ptr++; + continue; + } + /* last usage of this input is before or in same instruction of first use of temporary? */ + if(mode_inputs ? (inout->last_use <= temp->first_use) : + (inout->first_use >= temp->last_use)) + { + /* assign it and advance to next input */ + inout->native = temp->native; + inout_ptr++; + } + temp_ptr++; + } + /* if we couldn't reuse current ones, allocate new temporaries + */ + for(inout_ptr=0; inout_ptr<num_inouts; ++inout_ptr) + { + struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; + if(inout->active && !inout->native.valid) + { + inout->native = alloc_new_native_reg(cd); + } + } +} + +/* Allocate an immediate with a certain value and return the index. If + * there is already an immediate with that value, return that. + */ +static struct etna_inst_src alloc_imm_u32(struct etna_compile_data *cd, uint32_t value) +{ + int idx; + /* Could use a hash table to speed this up */ + for(idx = 0; idx<cd->imm_size; ++idx) + { + if(cd->imm_data[idx] == value) + break; + } + if(idx == cd->imm_size) /* allocate new immediate */ + { + assert(cd->imm_size < ETNA_MAX_IMM); + idx = cd->imm_size++; + cd->imm_data[idx] = value; + } + + /* swizzle so that component with value is returned in all components */ + idx += cd->imm_base; + struct etna_inst_src imm_src = { + .use = 1, + .rgroup = INST_RGROUP_UNIFORM_0, + .reg = idx/4, + .swiz = INST_SWIZ_BROADCAST(idx & 3) + }; + return imm_src; +} + +/* Allocate immediate with a certain float value. If there is already an + * immediate with that value, return that. + */ +static struct etna_inst_src alloc_imm_f32(struct etna_compile_data *cd, float value) +{ + return alloc_imm_u32(cd, etna_f32_to_u32(value)); +} + +/* Pass -- check register file declarations and immediates */ +static void etna_compile_parse_declarations(struct etna_compile_data *cd, const struct tgsi_token *tokens) +{ + struct tgsi_parse_context ctx = {}; + unsigned status = TGSI_PARSE_OK; + status = tgsi_parse_init(&ctx, tokens); + assert(status == TGSI_PARSE_OK); + + cd->processor = ctx.FullHeader.Processor.Processor; + + while(!tgsi_parse_end_of_tokens(&ctx)) + { + tgsi_parse_token(&ctx); + switch(ctx.FullToken.Token.Type) + { + case TGSI_TOKEN_TYPE_DECLARATION: { + const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; + cd->file_size[decl->Declaration.File] = MAX2(cd->file_size[decl->Declaration.File], decl->Range.Last+1); + } break; + case TGSI_TOKEN_TYPE_IMMEDIATE: { /* immediates are handled differently from other files; they are not declared + explicitly, and always add four components */ + const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate; + assert(cd->imm_size <= (ETNA_MAX_IMM-4)); + for(int i=0; i<4; ++i) + { + cd->imm_data[cd->imm_size++] = imm->u[i].Uint; + } + cd->file_size[TGSI_FILE_IMMEDIATE] = cd->imm_size / 4; + } break; + } + } + tgsi_parse_free(&ctx); +} + +static void etna_assign_decls(struct etna_compile_data *cd) +{ + uint idx=0; + for(int x=0; x<TGSI_FILE_COUNT; ++x) + { + cd->file[x] = &cd->decl[idx]; + for(int sub=0; sub<cd->file_size[x]; ++sub) + { + cd->decl[idx].file = x; + cd->decl[idx].idx = sub; + idx++; + } + } + cd->total_decls = idx; +} + +/* Pass -- check usage of temporaries, inputs, outputs */ +static void etna_compile_pass_check_usage(struct etna_compile_data *cd, const struct tgsi_token *tokens) +{ + struct tgsi_parse_context ctx = {}; + unsigned status = TGSI_PARSE_OK; + status = tgsi_parse_init(&ctx, tokens); + assert(status == TGSI_PARSE_OK); + + for(int idx=0; idx<cd->total_decls; ++idx) + { + cd->decl[idx].active = false; + cd->decl[idx].first_use = cd->decl[idx].last_use = -1; + } + + int inst_idx = 0; + while(!tgsi_parse_end_of_tokens(&ctx)) + { + tgsi_parse_token(&ctx); + /* find out max register #s used + * For every register mark first and last instruction index where it's + * used this allows finding ranges where the temporary can be borrowed + * as input and/or output register + * + * XXX in the case of loops this needs special care, as the last usage of a register + * inside a loop means it can still be used on next loop iteration (execution is no longer + * chronological). The register can only be declared "free" after the loop finishes. + * + * Same for inputs: the first usage of a register inside a loop doesn't mean that the register + * won't have been overwritten in previous iteration. The register can only be declared free before the loop + * starts. + * The proper way would be to do full dominator / post-dominator analysis (especially with more complicated + * control flow such as direct branch instructions) but not for now... + */ + switch(ctx.FullToken.Token.Type) + { + case TGSI_TOKEN_TYPE_DECLARATION: { + /* Declaration: fill in file details */ + const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; + for(int idx=decl->Range.First; idx<=decl->Range.Last; ++idx) + { + cd->file[decl->Declaration.File][idx].usage_mask = 0; // we'll compute this ourselves + cd->file[decl->Declaration.File][idx].has_semantic = decl->Declaration.Semantic; + cd->file[decl->Declaration.File][idx].semantic = decl->Semantic; + cd->file[decl->Declaration.File][idx].interp = decl->Interp; + } + } break; + case TGSI_TOKEN_TYPE_INSTRUCTION: { + /* Instruction: iterate over operands of instruction */ + const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; + //printf("instruction: opcode=%i num_src=%i num_dest=%i\n", inst->Instruction.Opcode, + // inst->Instruction.NumSrcRegs, inst->Instruction.NumDstRegs); + /* iterate over destination registers */ + for(int idx=0; idx<inst->Instruction.NumDstRegs; ++idx) + { + struct etna_reg_desc *reg_desc = &cd->file[inst->Dst[idx].Register.File][inst->Dst[idx].Register.Index]; + if(reg_desc->first_use == -1) + reg_desc->first_use = inst_idx; + reg_desc->last_use = inst_idx; + reg_desc->active = true; + } + /* iterate over source registers */ + for(int idx=0; idx<inst->Instruction.NumSrcRegs; ++idx) + { + struct etna_reg_desc *reg_desc = &cd->file[inst->Src[idx].Register.File][inst->Src[idx].Register.Index]; + if(reg_desc->first_use == -1) + reg_desc->first_use = inst_idx; + reg_desc->last_use = inst_idx; + reg_desc->active = true; + /* accumulate usage mask for register, this is used to determine how many slots for varyings + * should be allocated */ + reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx); + } + inst_idx += 1; + } break; + default: + break; + } + } + tgsi_parse_free(&ctx); +} + +/* assign inputs that need to be assigned to specific registers */ +static void assign_special_inputs(struct etna_compile_data *cd) +{ + if(cd->processor == TGSI_PROCESSOR_FRAGMENT) + { + /* never assign t0; writing to it causes fragment to be discarded? */ + cd->next_free_native = 1; + /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */ + for(int idx=0; idx<cd->total_decls; ++idx) + { + struct etna_reg_desc *reg = &cd->decl[idx]; + if(reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION) + { + reg->native.valid = 1; + reg->native.rgroup = INST_RGROUP_TEMP; + reg->native.id = 0; + } + } + } +} + +/* Check that a move instruction does not swizzle any of the components + * that it writes. + */ +static bool etna_mov_check_no_swizzle(const struct tgsi_dst_register dst, const struct tgsi_src_register src) +{ + return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) && + (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) && + (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) && + (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W); + +} + +/* Pass -- optimize outputs + * Mesa tends to generate code like this at the end if their shaders + * MOV OUT[1], TEMP[2] + * MOV OUT[0], TEMP[0] + * MOV OUT[2], TEMP[1] + * Recognize if + * a) there is only a single assignment to an output register and + * b) the temporary is not used after that + * Also recognize direct assignment of IN to OUT (passthrough) + **/ +static void etna_compile_pass_optimize_outputs(struct etna_compile_data *cd, const struct tgsi_token *tokens) +{ + struct tgsi_parse_context ctx = {}; + unsigned status = TGSI_PARSE_OK; + status = tgsi_parse_init(&ctx, tokens); + assert(status == TGSI_PARSE_OK); + + int inst_idx = 0; + while(!tgsi_parse_end_of_tokens(&ctx)) + { + tgsi_parse_token(&ctx); + switch(ctx.FullToken.Token.Type) + { + case TGSI_TOKEN_TYPE_INSTRUCTION: { + const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; + /* iterate over operands */ + switch(inst->Instruction.Opcode) + { + case TGSI_OPCODE_MOV: { + uint out_idx = inst->Dst[0].Register.Index; + uint in_idx = inst->Src[0].Register.Index; + /* assignment of temporary to output -- + * and the output doesn't yet have a native register assigned + * and the last use of the temporary is this instruction + */ + if(inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && + inst->Src[0].Register.File == TGSI_FILE_TEMPORARY && + !cd->file[TGSI_FILE_OUTPUT][out_idx].native.valid && + cd->file[TGSI_FILE_TEMPORARY][in_idx].last_use == inst_idx) + { + cd->file[TGSI_FILE_OUTPUT][out_idx].native = cd->file[TGSI_FILE_TEMPORARY][in_idx].native; + /* prevent temp from being re-used for the rest of the shader */ + cd->file[TGSI_FILE_TEMPORARY][in_idx].last_use = ETNA_MAX_TOKENS; + /* mark this MOV instruction as a no-op */ + cd->dead_inst[inst_idx] = true; + } + /* direct assignment of input to output -- + * and the input or output doesn't yet have a native register assigned + * and the output is only used in this instruction, + * allocate a new register, and associate both input and output to it */ + if(inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && + inst->Src[0].Register.File == TGSI_FILE_INPUT && + !cd->file[TGSI_FILE_INPUT][in_idx].native.valid && + !cd->file[TGSI_FILE_OUTPUT][out_idx].native.valid && + cd->file[TGSI_FILE_OUTPUT][out_idx].last_use == inst_idx && + cd->file[TGSI_FILE_OUTPUT][out_idx].first_use == inst_idx && + etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register)) + { + cd->file[TGSI_FILE_OUTPUT][out_idx].native = cd->file[TGSI_FILE_INPUT][in_idx].native = + alloc_new_native_reg(cd); + /* mark this MOV instruction as a no-op */ + cd->dead_inst[inst_idx] = true; + } + } break; + default: ; + } + inst_idx += 1; + } break; + } + } + tgsi_parse_free(&ctx); +} + +/* emit instruction and append to program */ +static void emit_inst(struct etna_compile_data *cd, const struct etna_inst *inst) +{ + assert(cd->inst_ptr <= ETNA_MAX_INSTRUCTIONS); + etna_assemble(&cd->code[cd->inst_ptr*4], inst); + cd->inst_ptr ++; +} + +/* convert destination operand */ +static struct etna_inst_dst convert_dst(struct etna_compile_data *cd, const struct tgsi_full_dst_register *in) +{ + struct etna_inst_dst rv = { + /// XXX .amode + .use = 1, + .comps = in->Register.WriteMask, + }; + struct etna_native_reg native_reg = cd->file[in->Register.File][in->Register.Index].native; + assert(native_reg.valid && !native_reg.is_tex && native_reg.rgroup == INST_RGROUP_TEMP); /* can only assign to temporaries */ + rv.reg = native_reg.id; + return rv; +} + +/* convert texture operand */ +static struct etna_inst_tex convert_tex(struct etna_compile_data *cd, const struct tgsi_full_src_register *in, const struct tgsi_instruction_texture *tex) +{ + struct etna_inst_tex rv = { + // XXX .amode (to allow for an array of samplers?) + .swiz = INST_SWIZ_IDENTITY + }; + struct etna_native_reg native_reg = cd->file[in->Register.File][in->Register.Index].native; + assert(native_reg.is_tex && native_reg.valid); + rv.id = native_reg.id; + return rv; +} + +/* convert source operand */ +static struct etna_inst_src convert_src(struct etna_compile_data *cd, const struct tgsi_full_src_register *in, uint32_t swizzle) +{ + struct etna_inst_src rv = { + .use = 1, + .swiz = inst_swiz_compose( + INST_SWIZ_X(in->Register.SwizzleX) | INST_SWIZ_Y(in->Register.SwizzleY) | + INST_SWIZ_Z(in->Register.SwizzleZ) | INST_SWIZ_W(in->Register.SwizzleW), + swizzle), + .neg = in->Register.Negate, + .abs = in->Register.Absolute, + // XXX .amode + }; + struct etna_native_reg native_reg = cd->file[in->Register.File][in->Register.Index].native; + assert(native_reg.valid && !native_reg.is_tex); + rv.rgroup = native_reg.rgroup; + rv.reg = native_reg.id; + return rv; +} + +/* convert destination to source operand (for operation in place) + * i.e, + * MUL dst0.x__w, src0.xyzw, 2/PI + * SIN dst0.x__w, dst0.xyzw + */ +static struct etna_inst_src convert_dst_to_src(struct etna_compile_data *cd, const struct tgsi_full_dst_register *in) +{ + struct etna_inst_src rv = { + .use = 1, + .swiz = INST_SWIZ_IDENTITY, /* no swizzle needed, destination does selection */ + .neg = 0, + .abs = 0, + }; + struct etna_native_reg native_reg = cd->file[in->Register.File][in->Register.Index].native; + assert(native_reg.valid && !native_reg.is_tex); + rv.rgroup = native_reg.rgroup; + rv.reg = native_reg.id; + return rv; +} + +/* create a new label */ +static struct etna_compile_label *alloc_new_label(struct etna_compile_data *cd) +{ + assert(cd->num_labels < ETNA_MAX_LABELS); + struct etna_compile_label *rv = &cd->labels[cd->num_labels++]; + rv->inst_idx = -1; /* start by point to no specific instruction */ + return rv; +} + +/* place label at current instruction pointer */ +static void label_place(struct etna_compile_data *cd, struct etna_compile_label *label) +{ + label->inst_idx = cd->inst_ptr; +} + +/* mark label use at current instruction. + * target of the label will be filled in in the marked instruction's src2.imm slot as soon + * as the value becomes known. + */ +static void label_mark_use(struct etna_compile_data *cd, struct etna_compile_label *label) +{ + assert(cd->inst_ptr < ETNA_MAX_INSTRUCTIONS); + cd->lbl_usage[cd->inst_ptr] = label; +} + +/* Get temporary to be used within one TGSI instruction. + * The first time that this function is called the temporary will be allocated. + * Each call to this function will return the same temporary. + */ +static struct etna_native_reg etna_compile_get_inner_temp(struct etna_compile_data *cd) +{ + if(!cd->inner_temp.valid) + cd->inner_temp = alloc_new_native_reg(cd); + return cd->inner_temp; +} + +/* Pass -- compile instructions */ +static void etna_compile_pass_generate_code(struct etna_compile_data *cd, const struct tgsi_token *tokens) +{ + struct tgsi_parse_context ctx = {}; + unsigned status = TGSI_PARSE_OK; + status = tgsi_parse_init(&ctx, tokens); + assert(status == TGSI_PARSE_OK); + + int inst_idx = 0; + while(!tgsi_parse_end_of_tokens(&ctx)) + { + tgsi_parse_token(&ctx); + const struct tgsi_full_instruction *inst = 0; + switch(ctx.FullToken.Token.Type) + { + case TGSI_TOKEN_TYPE_INSTRUCTION: + /* iterate over operands */ + inst = &ctx.FullToken.FullInstruction; + if(cd->dead_inst[inst_idx]) /* skip dead instructions */ + { + inst_idx++; + continue; + } + assert(inst->Instruction.Saturate != TGSI_SAT_MINUS_PLUS_ONE); + int sat = (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + /* Use a naive switch statement to get up and running, later on when we have more experience with + * Vivante instructions generation, this may be shortened greatly by using lookup in a table with patterns. */ + switch(inst->Instruction.Opcode) + { + case TGSI_OPCODE_MOV: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_MOV, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_LIT: { + /* + LOG tmp.x, void, void, src.yyyy + MUL tmp.x, tmp.xxxx, src.wwww, void + LITP dst, src.xxyy, src.xxxx, tmp.xxxx + */ + struct etna_native_reg inner_temp = etna_compile_get_inner_temp(cd); + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_LOG, + .sat = 0, + .dst.use = 1, + .dst.comps = INST_COMPS_X, /* tmp.x */ + .dst.reg = inner_temp.id, + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(1)), /* src.yyyy */ + }); + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_MUL, + .sat = 0, + .dst.use = 1, + .dst.comps = INST_COMPS_X, + .dst.reg = inner_temp.id, + .src[0].use = 1, + .src[0].swiz = INST_SWIZ_BROADCAST(0), /* tmp.xxxx */ + .src[0].neg = 0, + .src[0].abs = 0, + .src[0].rgroup = inner_temp.rgroup, + .src[0].reg = inner_temp.id, + .src[1] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(3)), /* src.wwww */ + }); + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_LITP, + .sat = 0, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], + (INST_SWIZ_X(0) | INST_SWIZ_Y(0) | INST_SWIZ_Z(1) | INST_SWIZ_W(1))), /* src.xxyy */ + .src[1] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(0)), /* src.xxxx */ + .src[2].use = 1, + .src[2].swiz = INST_SWIZ_BROADCAST(0), /* tmp.xxxx */ + .src[2].neg = 0, + .src[2].abs = 0, + .src[2].rgroup = inner_temp.rgroup, + .src[2].reg = inner_temp.id, + }); + } break; + case TGSI_OPCODE_RCP: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_RCP, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_RSQ: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_RSQ, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_EXP: assert(0); break; + case TGSI_OPCODE_LOG: assert(0); break; + case TGSI_OPCODE_MUL: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_MUL, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_ADD: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_ADD, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[2] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_DP2: assert(0); break; /* Either MUL+MAD or DP3 with a zeroed channel, but we don't have a 'zero' swizzle */ + case TGSI_OPCODE_DP3: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_DP3, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_DP4: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_DP4, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_DST: assert(0); break; /* XXX INST_OPCODE_DST */ + case TGSI_OPCODE_MIN: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_SELECT, + .cond = INST_CONDITION_GT, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_MAX: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_SELECT, + .cond = INST_CONDITION_LT, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_STR: { + uint cond = 0; + switch(inst->Instruction.Opcode) + { + case TGSI_OPCODE_SLT: cond = INST_CONDITION_LT; break; + case TGSI_OPCODE_SGE: cond = INST_CONDITION_GE; break; + case TGSI_OPCODE_SEQ: cond = INST_CONDITION_EQ; break; + case TGSI_OPCODE_SGT: cond = INST_CONDITION_GT; break; + case TGSI_OPCODE_SLE: cond = INST_CONDITION_LE; break; + case TGSI_OPCODE_SNE: cond = INST_CONDITION_NE; break; + case TGSI_OPCODE_STR: cond = INST_CONDITION_TRUE; break; + } + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_SET, + .cond = cond, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + }); + } break; + case TGSI_OPCODE_MAD: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_MAD, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + .src[2] = convert_src(cd, &inst->Src[2], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_SFL: assert(0); break; /* SET to 0 */ + case TGSI_OPCODE_SUB: { /* ADD with negated SRC1 */ + struct etna_inst inst_out = { + .opcode = INST_OPCODE_ADD, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[2] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + }; + inst_out.src[2].neg = !inst_out.src[2].neg; + emit_inst(cd, &inst_out); + } break; + case TGSI_OPCODE_LRP: assert(0); break; /* lowered by mesa to (op2 * (1.0f - op0)) + (op1 * op0) */ + case TGSI_OPCODE_CND: assert(0); break; + case TGSI_OPCODE_SQRT: /* only generated if HAS_SQRT_TRIG */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_SQRT, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_DP2A: assert(0); break; + case TGSI_OPCODE_FRC: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_FRC, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_CLAMP: assert(0); break; /* XXX MIN(MAX(...)) */ + case TGSI_OPCODE_FLR: /* XXX HAS_SIGN_FLOOR_CEIL */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_FLOOR, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_CEIL: /* XXX HAS_SIGN_FLOOR_CEIL */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_CEIL, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_SSG: /* XXX HAS_SIGN_FLOOR_CEIL */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_SIGN, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_ROUND: assert(0); break; + case TGSI_OPCODE_EX2: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_EXP, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_LG2: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_LOG, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_POW: assert(0); break; /* lowered by mesa to ex2(y*lg2(x)) */ + case TGSI_OPCODE_XPD: assert(0); break; + case TGSI_OPCODE_ABS: /* XXX can be propagated into uses of destination operand */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_MOV, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[2].abs = 1 + }); + break; + case TGSI_OPCODE_RCC: assert(0); break; + case TGSI_OPCODE_DPH: assert(0); break; /* src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w */ + case TGSI_OPCODE_COS: /* fall through */ + case TGSI_OPCODE_SIN: + if(cd->specs->has_sin_cos_sqrt) + { + /* add divide by PI/2, re-use dest register, this works even in src=dst case + * because second instruction only uses output of first. */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_MUL, + .sat = 0, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), /* any swizzling happens here */ + .src[1] = alloc_imm_u32(cd, 2.0f/M_PI), + }); + emit_inst(cd, &(struct etna_inst) { + .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS ? INST_OPCODE_COS : INST_OPCODE_SIN, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[2] = convert_dst_to_src(cd, &inst->Dst[0]), + }); + } else { + /* XXX fall back to Taylor series if not HAS_SQRT_TRIG, + * see i915_fragprog.c for a good example. + */ + assert(0); + } + break; + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: + emit_inst(cd, &(struct etna_inst) { + .opcode = inst->Instruction.Opcode == TGSI_OPCODE_DDX ? INST_OPCODE_DSX : INST_OPCODE_DSY, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_KILL_IF: + /* discard if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0) */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_TEXKILL, + .cond = INST_CONDITION_LZ, + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY) + }); + break; + case TGSI_OPCODE_KILL: + /* discard always */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_TEXKILL, + .cond = INST_CONDITION_TRUE + }); + break; + case TGSI_OPCODE_PK2H: assert(0); break; + case TGSI_OPCODE_PK2US: assert(0); break; + case TGSI_OPCODE_PK4B: assert(0); break; + case TGSI_OPCODE_PK4UB: assert(0); break; + case TGSI_OPCODE_RFL: assert(0); break; + case TGSI_OPCODE_TEX: + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_TEXLD, + .sat = (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE), + .dst = convert_dst(cd, &inst->Dst[0]), + .tex = convert_tex(cd, &inst->Src[1], &inst->Texture), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */ + struct etna_native_reg temp = etna_compile_get_inner_temp(cd); + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_RCP, + .sat = 0, + .dst.use = 1, + .dst.comps = INST_COMPS_W, /* tmp.w */ + .dst.reg = temp.id, + .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(3)), + }); + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_MUL, + .sat = 0, + .dst.use = 1, + .dst.comps = INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z, /* tmp.xyz */ + .dst.reg = temp.id, + .src[0].use = 1, /* tmp.wwww */ + .src[0].swiz = INST_SWIZ_BROADCAST(3), + .src[0].neg = 0, + .src[0].abs = 0, + .src[0].rgroup = temp.rgroup, + .src[0].reg = temp.id, + .src[1] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), /* src.xyzw */ + }); + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_TEXLD, + .sat = (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE), + .dst = convert_dst(cd, &inst->Dst[0]), + .tex = convert_tex(cd, &inst->Src[1], &inst->Texture), + .src[0].use = 1, /* tmp.xyzw */ + .src[0].swiz = INST_SWIZ_IDENTITY, + .src[0].neg = 0, + .src[0].abs = 0, + .src[0].rgroup = temp.rgroup, + .src[0].reg = temp.id, + }); + } break; + case TGSI_OPCODE_TXB: assert(0); break; /* TEXLDB */ + case TGSI_OPCODE_TXL: assert(0); break; /* TEXLDL */ + case TGSI_OPCODE_UP2H: assert(0); break; + case TGSI_OPCODE_UP2US: assert(0); break; + case TGSI_OPCODE_UP4B: assert(0); break; + case TGSI_OPCODE_UP4UB: assert(0); break; + case TGSI_OPCODE_X2D: assert(0); break; + case TGSI_OPCODE_ARL: assert(0); break; /* floor */ + case TGSI_OPCODE_ARR: assert(0); break; /* round */ + case TGSI_OPCODE_ARA: assert(0); break; /* to be removed according to doc */ + case TGSI_OPCODE_BRA: assert(0); break; /* to be removed according to doc */ + case TGSI_OPCODE_CAL: assert(0); break; /* CALL */ + case TGSI_OPCODE_RET: assert(0); break; + case TGSI_OPCODE_CMP: /* componentwise dst = (src0 < 0) ? src1 : src2 */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_SELECT, + .cond = INST_CONDITION_LZ, + .sat = sat, + .dst = convert_dst(cd, &inst->Dst[0]), + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), + .src[2] = convert_src(cd, &inst->Src[2], INST_SWIZ_IDENTITY), + }); + break; + case TGSI_OPCODE_SCS: assert(0); break; + case TGSI_OPCODE_NRM: assert(0); break; + case TGSI_OPCODE_DIV: assert(0); break; + case TGSI_OPCODE_BRK: assert(0); break; /* break from loop */ + case TGSI_OPCODE_IF: { + struct etna_compile_frame *f = &cd->frame_stack[cd->frame_sp++]; + /* push IF to stack */ + f->type = ETNA_COMPILE_FRAME_IF; + /* create "else" label */ + f->lbl_else = alloc_new_label(cd); + f->lbl_endif = NULL; + /* mark position in instruction stream of label reference so that it can be filled in in next pass */ + label_mark_use(cd, f->lbl_else); + /* create conditional branch to label if src0 EQ 0 */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_BRANCH, + .cond = INST_CONDITION_EQ, + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), + .src[1] = alloc_imm_f32(cd, 0.0f), + /* imm is filled in later */ + }); + } break; + case TGSI_OPCODE_ELSE: { + assert(cd->frame_sp>0); + struct etna_compile_frame *f = &cd->frame_stack[cd->frame_sp-1]; + assert(f->type == ETNA_COMPILE_FRAME_IF); + /* create "endif" label, and branch to endif label */ + f->lbl_endif = alloc_new_label(cd); + label_mark_use(cd, f->lbl_endif); + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_BRANCH, + .cond = INST_CONDITION_TRUE, + /* imm is filled in later */ + }); + /* mark "else" label at this position in instruction stream */ + label_place(cd, f->lbl_else); + } break; + case TGSI_OPCODE_ENDIF: { + assert(cd->frame_sp>0); + struct etna_compile_frame *f = &cd->frame_stack[--cd->frame_sp]; + assert(f->type == ETNA_COMPILE_FRAME_IF); + /* assign "endif" or "else" (if no ELSE) label to current position in instruction stream, pop IF */ + if(f->lbl_endif != NULL) + label_place(cd, f->lbl_endif); + else + label_place(cd, f->lbl_else); + } break; + case TGSI_OPCODE_PUSHA: assert(0); break; + case TGSI_OPCODE_POPA: assert(0); break; + case TGSI_OPCODE_I2F: assert(0); break; + case TGSI_OPCODE_NOT: assert(0); break; + case TGSI_OPCODE_TRUNC: assert(0); break; + case TGSI_OPCODE_SHL: assert(0); break; + case TGSI_OPCODE_AND: assert(0); break; + case TGSI_OPCODE_OR: assert(0); break; + case TGSI_OPCODE_MOD: assert(0); break; + case TGSI_OPCODE_XOR: assert(0); break; + case TGSI_OPCODE_SAD: assert(0); break; + case TGSI_OPCODE_TXF: assert(0); break; + case TGSI_OPCODE_TXQ: assert(0); break; + case TGSI_OPCODE_CONT: assert(0); break; + case TGSI_OPCODE_EMIT: assert(0); break; + case TGSI_OPCODE_ENDPRIM: assert(0); break; + case TGSI_OPCODE_BGNLOOP: assert(0); break; + case TGSI_OPCODE_BGNSUB: assert(0); break; + case TGSI_OPCODE_ENDLOOP: assert(0); break; + case TGSI_OPCODE_ENDSUB: assert(0); break; + case TGSI_OPCODE_TXQ_LZ: assert(0); break; + case TGSI_OPCODE_NOP: break; + case TGSI_OPCODE_NRM4: assert(0); break; + case TGSI_OPCODE_CALLNZ: assert(0); break; + case TGSI_OPCODE_BREAKC: assert(0); break; + case TGSI_OPCODE_END: /* Nothing to do */ break; + case TGSI_OPCODE_F2I: assert(0); break; + case TGSI_OPCODE_IDIV: assert(0); break; + case TGSI_OPCODE_IMAX: assert(0); break; + case TGSI_OPCODE_IMIN: assert(0); break; + case TGSI_OPCODE_INEG: assert(0); break; + case TGSI_OPCODE_ISGE: assert(0); break; + case TGSI_OPCODE_ISHR: assert(0); break; + case TGSI_OPCODE_ISLT: assert(0); break; + case TGSI_OPCODE_F2U: assert(0); break; + case TGSI_OPCODE_U2F: assert(0); break; + case TGSI_OPCODE_UADD: assert(0); break; + case TGSI_OPCODE_UDIV: assert(0); break; + case TGSI_OPCODE_UMAD: assert(0); break; + case TGSI_OPCODE_UMAX: assert(0); break; + case TGSI_OPCODE_UMIN: assert(0); break; + case TGSI_OPCODE_UMOD: assert(0); break; + case TGSI_OPCODE_UMUL: assert(0); break; + case TGSI_OPCODE_USEQ: assert(0); break; + case TGSI_OPCODE_USGE: assert(0); break; + case TGSI_OPCODE_USHR: assert(0); break; + case TGSI_OPCODE_USLT: assert(0); break; + case TGSI_OPCODE_USNE: assert(0); break; + case TGSI_OPCODE_SWITCH: assert(0); break; + case TGSI_OPCODE_CASE: assert(0); break; + case TGSI_OPCODE_DEFAULT: assert(0); break; + case TGSI_OPCODE_ENDSWITCH: assert(0); break; + case TGSI_OPCODE_SAMPLE: assert(0); break; + case TGSI_OPCODE_SAMPLE_I: assert(0); break; + case TGSI_OPCODE_SAMPLE_I_MS: assert(0); break; + case TGSI_OPCODE_SAMPLE_B: assert(0); break; + case TGSI_OPCODE_SAMPLE_C: assert(0); break; + case TGSI_OPCODE_SAMPLE_C_LZ: assert(0); break; + case TGSI_OPCODE_SAMPLE_D: assert(0); break; + case TGSI_OPCODE_SAMPLE_L: assert(0); break; + case TGSI_OPCODE_GATHER4: assert(0); break; + case TGSI_OPCODE_SVIEWINFO: assert(0); break; + case TGSI_OPCODE_SAMPLE_POS: assert(0); break; + case TGSI_OPCODE_SAMPLE_INFO: assert(0); break; + case TGSI_OPCODE_UARL: assert(0); break; + case TGSI_OPCODE_UCMP: assert(0); break; + case TGSI_OPCODE_IABS: assert(0); break; + case TGSI_OPCODE_ISSG: assert(0); break; + case TGSI_OPCODE_LOAD: assert(0); break; + case TGSI_OPCODE_STORE: assert(0); break; + case TGSI_OPCODE_MFENCE: assert(0); break; + case TGSI_OPCODE_LFENCE: assert(0); break; + case TGSI_OPCODE_SFENCE: assert(0); break; + case TGSI_OPCODE_BARRIER: assert(0); break; + case TGSI_OPCODE_ATOMUADD: assert(0); break; + case TGSI_OPCODE_ATOMXCHG: assert(0); break; + case TGSI_OPCODE_ATOMCAS: assert(0); break; + case TGSI_OPCODE_ATOMAND: assert(0); break; + case TGSI_OPCODE_ATOMOR: assert(0); break; + case TGSI_OPCODE_ATOMXOR: assert(0); break; + case TGSI_OPCODE_ATOMUMIN: assert(0); break; + case TGSI_OPCODE_ATOMUMAX: assert(0); break; + case TGSI_OPCODE_ATOMIMIN: assert(0); break; + case TGSI_OPCODE_ATOMIMAX: assert(0); break; + case TGSI_OPCODE_TEX2: assert(0); break; + case TGSI_OPCODE_TXB2: assert(0); break; + case TGSI_OPCODE_TXL2: assert(0); break; + default: + printf("Unhandled instruction %i\n", inst->Instruction.Opcode); + } + inst_idx += 1; + break; + } + } + tgsi_parse_free(&ctx); +} + +/* Look up register by semantic */ +static struct etna_reg_desc *find_decl_by_semantic(struct etna_compile_data *cd, uint file, uint name, uint index) +{ + for(int idx=0; idx<cd->file_size[file]; ++idx) + { + struct etna_reg_desc *reg = &cd->file[file][idx]; + if(reg->semantic.Name == name && reg->semantic.Index == index) + { + return reg; + } + } + return NULL; /* not found */ +} + +/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed: + * - this is a vertex shader + * - and this is an older GPU + */ +static void etna_compile_add_z_div_if_needed(struct etna_compile_data *cd) +{ + if(cd->processor == TGSI_PROCESSOR_VERTEX && cd->specs->vs_need_z_div) + { + /* find position out */ + struct etna_reg_desc *pos_reg = find_decl_by_semantic(cd, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0); + if(pos_reg != NULL) + { + /* + * ADD tX.__z_, tX.zzzz, void, tX.wwww + * MUL tX.__z_, tX.zzzz, 0.5, void + */ + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_ADD, + .dst.use = 1, + .dst.reg = pos_reg->native.id, + .dst.comps = INST_COMPS_Z, + .src[0].use = 1, + .src[0].reg = pos_reg->native.id, + .src[0].swiz = INST_SWIZ_BROADCAST(INST_SWIZ_COMP_Z), + .src[2].use = 1, + .src[2].reg = pos_reg->native.id, + .src[2].swiz = INST_SWIZ_BROADCAST(INST_SWIZ_COMP_W), + }); + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_MUL, + .dst.use = 1, + .dst.reg = pos_reg->native.id, + .dst.comps = INST_COMPS_Z, + .src[0].use = 1, + .src[0].reg = pos_reg->native.id, + .src[0].swiz = INST_SWIZ_BROADCAST(INST_SWIZ_COMP_Z), + .src[1] = alloc_imm_f32(cd, 0.5f), + }); + } + } +} + +/** add a NOP to the shader if + * a) the shader is empty + * or + * b) there is a label at the end if the shader + */ +static void etna_compile_add_nop_if_needed(struct etna_compile_data *cd) +{ + bool label_at_last_inst = false; + for(int idx=0; idx<cd->num_labels; ++idx) + { + if(cd->labels[idx].inst_idx == (cd->inst_ptr-1)) + { + label_at_last_inst = true; + } + } + if(cd->inst_ptr == 0 || label_at_last_inst) + { + emit_inst(cd, &(struct etna_inst) { + .opcode = INST_OPCODE_NOP + }); + } +} + +/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x). + * CONST must be consecutive as const buffers are supposed to be consecutive, and before IMM, as this is + * more convenient because is possible for the compilation process itself to generate extra + * immediates for constants such as pi, one, zero. + */ +static void assign_constants_and_immediates(struct etna_compile_data *cd) +{ + for(int idx=0; idx<cd->file_size[TGSI_FILE_CONSTANT]; ++idx) + { + cd->file[TGSI_FILE_CONSTANT][idx].native.valid = 1; + cd->file[TGSI_FILE_CONSTANT][idx].native.rgroup = INST_RGROUP_UNIFORM_0; + cd->file[TGSI_FILE_CONSTANT][idx].native.id = idx; + } + /* immediates start after the constants */ + cd->imm_base = cd->file_size[TGSI_FILE_CONSTANT] * 4; + for(int idx=0; idx<cd->file_size[TGSI_FILE_IMMEDIATE]; ++idx) + { + cd->file[TGSI_FILE_IMMEDIATE][idx].native.valid = 1; + cd->file[TGSI_FILE_IMMEDIATE][idx].native.rgroup = INST_RGROUP_UNIFORM_0; + cd->file[TGSI_FILE_IMMEDIATE][idx].native.id = cd->imm_base/4 + idx; + } + DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", cd->imm_base, cd->imm_size); +} + +/* Assign declared samplers to native texture units */ +static void assign_texture_units(struct etna_compile_data *cd) +{ + uint tex_base = 0; + if(cd->processor == TGSI_PROCESSOR_VERTEX) + { + tex_base = cd->specs->vertex_sampler_offset; + } + for(int idx=0; idx<cd->file_size[TGSI_FILE_SAMPLER]; ++idx) + { + cd->file[TGSI_FILE_SAMPLER][idx].native.valid = 1; + cd->file[TGSI_FILE_SAMPLER][idx].native.is_tex = 1; // overrides rgroup + cd->file[TGSI_FILE_SAMPLER][idx].native.id = tex_base + idx; + } +} + +/* additional pass to fill in branch targets */ +static void etna_compile_fill_in_labels(struct etna_compile_data *cd) +{ + for(int idx=0; idx<cd->inst_ptr ; ++idx) + { + if(cd->lbl_usage[idx]) + { + etna_assemble_set_imm(&cd->code[idx * 4], cd->lbl_usage[idx]->inst_idx); + } + } +} + +/* compare two etna_native_reg structures, return true if equal */ +static bool cmp_etna_native_reg(const struct etna_native_reg to, const struct etna_native_reg from) +{ + return to.valid == from.valid && to.is_tex == from.is_tex && to.rgroup == from.rgroup && + to.id == from.id; +} + +/* go through all declarations and swap native registers *to* and *from* */ +static void swap_native_registers(struct etna_compile_data *cd, const struct etna_native_reg to, const struct etna_native_reg from) +{ + if(cmp_etna_native_reg(from, to)) + return; /* Nothing to do */ + for(int idx=0; idx<cd->total_decls; ++idx) + { + if(cmp_etna_native_reg(cd->decl[idx].native, from)) + { + cd->decl[idx].native = to; + } else if(cmp_etna_native_reg(cd->decl[idx].native, to)) + { + cd->decl[idx].native = from; + } + } +} + +/* For PS we need to permute so that inputs are always in temporary 0..N-1. + * Semantic POS is always t0. If that semantic is not used, avoid t0. + */ +static void permute_ps_inputs(struct etna_compile_data *cd) +{ + /* Special inputs: + * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION + * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD + */ + uint native_idx = 1; + for(int idx=0; idx<cd->file_size[TGSI_FILE_INPUT]; ++idx) + { + struct etna_reg_desc *reg = &cd->file[TGSI_FILE_INPUT][idx]; + uint input_id; + assert(reg->has_semantic); + if(!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION) + continue; + input_id = native_idx++; + swap_native_registers(cd, (struct etna_native_reg) { + .valid = 1, + .rgroup = INST_RGROUP_TEMP, + .id = input_id + }, cd->file[TGSI_FILE_INPUT][idx].native); + } + cd->num_varyings = native_idx-1; + if(native_idx > cd->next_free_native) + cd->next_free_native = native_idx; +} + +/* fill in ps inputs into shader object */ +static void fill_in_ps_inputs(struct etna_shader_object *sobj, struct etna_compile_data *cd) +{ + sobj->num_inputs = cd->num_varyings; + assert(sobj->num_inputs < ETNA_NUM_INPUTS); + for(int idx=0; idx<cd->file_size[TGSI_FILE_INPUT]; ++idx) + { + struct etna_reg_desc *reg = &cd->file[TGSI_FILE_INPUT][idx]; + if(reg->native.id > 0) + { + int input_id = reg->native.id - 1; + sobj->inputs[input_id].reg = reg->native.id; + sobj->inputs[input_id].semantic = reg->semantic; + if(reg->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */ + sobj->inputs[input_id].pa_attributes = 0x200; + else /* texture coord or other bypasses flat shading */ + sobj->inputs[input_id].pa_attributes = 0x2f1; + /* convert usage mask to number of components (*=wildcard) + * .r (0..1) -> 1 component + * .*g (2..3) -> 2 component + * .**b (4..7) -> 3 components + * .***a (8..15) -> 4 components + */ + sobj->inputs[input_id].num_components = util_last_bit(reg->usage_mask); + } + } + sobj->input_count_unk8 = 31; /* XXX what is this */ +} + +/* fill in output mapping for ps into shader object */ +static void fill_in_ps_outputs(struct etna_shader_object *sobj, struct etna_compile_data *cd) +{ + sobj->num_outputs = 0; + for(int idx=0; idx<cd->file_size[TGSI_FILE_OUTPUT]; ++idx) + { + struct etna_reg_desc *reg = &cd->file[TGSI_FILE_OUTPUT][idx]; + switch(reg->semantic.Name) + { + case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */ + sobj->ps_color_out_reg = reg->native.id; + break; + case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */ + sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */ + break; + default: + assert(0); /* only outputs supported are COLOR and POSITION at the moment */ + } + } +} + +/* fill in inputs for vs into shader object */ +static void fill_in_vs_inputs(struct etna_shader_object *sobj, struct etna_compile_data *cd) +{ + sobj->num_inputs = 0; + for(int idx=0; idx<cd->file_size[TGSI_FILE_INPUT]; ++idx) + { + struct etna_reg_desc *reg = &cd->file[TGSI_FILE_INPUT][idx]; + assert(sobj->num_inputs < ETNA_NUM_INPUTS); + /* XXX exclude inputs with special semantics such as gl_frontFacing */ + sobj->inputs[sobj->num_inputs].reg = reg->native.id; + sobj->inputs[sobj->num_inputs].semantic = reg->semantic; + sobj->inputs[sobj->num_inputs].num_components = util_last_bit(reg->usage_mask); + sobj->num_inputs++; + } + sobj->input_count_unk8 = (sobj->num_inputs + 19)/16; /* XXX what is this */ +} + +/* build two-level output index [Semantic][Index] for fast linking */ +static void build_output_index(struct etna_shader_object *sobj) +{ + int total = 0; + int offset = 0; + for(int name=0; name<TGSI_SEMANTIC_COUNT; ++name) + { + total += sobj->output_count_per_semantic[name]; + } + sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *)); + for(int name=0; name<TGSI_SEMANTIC_COUNT; ++name) + { + sobj->output_per_semantic[name] = &sobj->output_per_semantic_list[offset]; + offset += sobj->output_count_per_semantic[name]; + } + for(int idx=0; idx<sobj->num_outputs; ++idx) + { + sobj->output_per_semantic[sobj->outputs[idx].semantic.Name] + [sobj->outputs[idx].semantic.Index] = &sobj->outputs[idx]; + } +} + +/* fill in outputs for vs into shader object */ +static void fill_in_vs_outputs(struct etna_shader_object *sobj, struct etna_compile_data *cd) +{ + sobj->num_outputs = 0; + for(int idx=0; idx<cd->file_size[TGSI_FILE_OUTPUT]; ++idx) + { + struct etna_reg_desc *reg = &cd->file[TGSI_FILE_OUTPUT][idx]; + assert(sobj->num_inputs < ETNA_NUM_INPUTS); + switch(reg->semantic.Name) + { + case TGSI_SEMANTIC_POSITION: + sobj->vs_pos_out_reg = reg->native.id; + break; + case TGSI_SEMANTIC_PSIZE: + sobj->vs_pointsize_out_reg = reg->native.id; + break; + default: + sobj->outputs[sobj->num_outputs].reg = reg->native.id; + sobj->outputs[sobj->num_outputs].semantic = reg->semantic; + sobj->outputs[sobj->num_outputs].num_components = 4; // XXX reg->num_components; + sobj->num_outputs++; + sobj->output_count_per_semantic[reg->semantic.Name] = MAX2( + reg->semantic.Index + 1, + sobj->output_count_per_semantic[reg->semantic.Name]); + } + } + /* build two-level index for linking */ + build_output_index(sobj); + + /* fill in "mystery meat" load balancing value. This value determines how work is scheduled between VS and PS + * in the unified shader architecture. More precisely, it is determined from the number of VS outputs, as well as chip-specific + * vertex output buffer size, vertex cache size, and the number of shader cores. + * + * XXX this is a conservative estimate, the "optimal" value is only known for sure at link time because some + * outputs may be unused and thus unmapped. Then again, in the general use case with GLSL the vertex and fragment + * shaders are linked already before submitting to Gallium, thus all outputs are used. + */ + int half_out = (cd->file_size[TGSI_FILE_OUTPUT] + 1) / 2; + assert(half_out); + uint32_t b = ((20480/(cd->specs->vertex_output_buffer_size-2*half_out*cd->specs->vertex_cache_size))+9)/10; + uint32_t a = (b+256/(cd->specs->shader_core_count*half_out))/2; + sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a,255)) | + VIVS_VS_LOAD_BALANCING_B(MIN2(b,255)) | + VIVS_VS_LOAD_BALANCING_C(0x3f) | + VIVS_VS_LOAD_BALANCING_D(0x0f); +} + +static bool etna_compile_check_limits(struct etna_compile_data *cd) +{ + int max_uniforms = (cd->processor == TGSI_PROCESSOR_VERTEX) ? + cd->specs->max_vs_uniforms : + cd->specs->max_ps_uniforms; + /* round up number of uniforms, including immediates, in units of four */ + int num_uniforms = cd->imm_base/4 + (cd->imm_size+3)/4; + if(cd->inst_ptr > cd->specs->max_instructions) + { + DBG("Number of instructions (%d) exceeds maximum %d", cd->inst_ptr, cd->specs->max_instructions); + return false; + } + if(cd->next_free_native > cd->specs->max_registers) + { + DBG("Number of registers (%d) exceeds maximum %d", cd->next_free_native, cd->specs->max_registers); + return false; + } + if(num_uniforms > max_uniforms) + { + DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms, max_uniforms); + return false; + } + if(cd->num_varyings > cd->specs->max_varyings) + { + DBG("Number of varyings (%d) exceeds maximum %d", cd->num_varyings, cd->specs->max_varyings); + return false; + } + return true; +} + +int etna_compile_shader_object(const struct etna_pipe_specs *specs, const struct tgsi_token *tokens, + struct etna_shader_object **out) +{ + /* Create scratch space that may be too large to fit on stack + * XXX don't forget to free this on all exit paths. + */ + struct etna_compile_data *cd = CALLOC_STRUCT(etna_compile_data); + cd->specs = specs; + + /* Build a map from gallium register to native registers for files + * CONST, SAMP, IMM, OUT, IN, TEMP. + * SAMP will map as-is for fragment shaders, there will be a +8 offset for vertex shaders. + */ + /* Pass one -- check register file declarations and immediates */ + etna_compile_parse_declarations(cd, tokens); + + etna_assign_decls(cd); + + /* Pass two -- check usage of temporaries, inputs, outputs */ + etna_compile_pass_check_usage(cd, tokens); + + assign_special_inputs(cd); + + /* Assign native temp register to TEMPs */ + assign_temporaries_to_native(cd, cd->file[TGSI_FILE_TEMPORARY], cd->file_size[TGSI_FILE_TEMPORARY]); + + /* optimize outputs */ + etna_compile_pass_optimize_outputs(cd, tokens); + + /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE) + * this is part of RGROUP_INTERNAL + */ + + /* assign inputs: last usage of input should be <= first usage of temp */ + /* potential optimization case: + * if single MOV TEMP[y], IN[x] before which temp y is not used, and after which IN[x] + * is not read, temp[y] can be used as input register as-is + */ + /* sort temporaries by first use + * sort inputs by last usage + * iterate over inputs, temporaries + * if last usage of input <= first usage of temp: + * assign input to temp + * advance input, temporary pointer + * else + * advance temporary pointer + * + * potential problem: instruction with multiple inputs of which one is the temp and the other is the input; + * however, as the temp is not used before this, how would this make sense? uninitialized temporaries have an undefined + * value, so this would be ok + */ + assign_inouts_to_temporaries(cd, TGSI_FILE_INPUT); + + /* assign outputs: first usage of output should be >= last usage of temp */ + /* potential optimization case: + * if single MOV OUT[x], TEMP[y] (with full write mask, or at least writing all components that are used in + * the shader) after which temp y is no longer used temp[y] can be used as output register as-is + * + * potential problem: instruction with multiple outputs of which one is the temp and the other is the output; + * however, as the temp is not used after this, how would this make sense? could just discard the output value + */ + /* sort temporaries by last use + * sort outputs by first usage + * iterate over outputs, temporaries + * if first usage of output >= last usage of temp: + * assign output to temp + * advance output, temporary pointer + * else + * advance temporary pointer + */ + assign_inouts_to_temporaries(cd, TGSI_FILE_OUTPUT); + + assign_constants_and_immediates(cd); + assign_texture_units(cd); + + /* list declarations */ + for(int x=0; x<cd->total_decls; ++x) + { + DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i last_use=%i native=%i usage_mask=%x has_semantic=%i", x, tgsi_file_name(cd->decl[x].file), cd->decl[x].idx, + cd->decl[x].active, + cd->decl[x].first_use, cd->decl[x].last_use, cd->decl[x].native.valid?cd->decl[x].native.id:-1, + cd->decl[x].usage_mask, + cd->decl[x].has_semantic); + if(cd->decl[x].has_semantic) + DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", + tgsi_semantic_names[cd->decl[x].semantic.Name], cd->decl[x].semantic.Index); + } + /* XXX for PS we need to permute so that inputs are always in temporary 0..N-1. + * There is no "switchboard" for varyings (AFAIK!). The output color, however, can be routed + * from an arbitrary temporary. + */ + if(cd->processor == TGSI_PROCESSOR_FRAGMENT) + { + permute_ps_inputs(cd); + } + + /* list declarations */ + for(int x=0; x<cd->total_decls; ++x) + { + DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i last_use=%i native=%i usage_mask=%x has_semantic=%i", x, tgsi_file_name(cd->decl[x].file), cd->decl[x].idx, + cd->decl[x].active, + cd->decl[x].first_use, cd->decl[x].last_use, cd->decl[x].native.valid?cd->decl[x].native.id:-1, + cd->decl[x].usage_mask, + cd->decl[x].has_semantic); + if(cd->decl[x].has_semantic) + DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", + tgsi_semantic_names[cd->decl[x].semantic.Name], cd->decl[x].semantic.Index); + } + + /* pass 3: generate instructions + */ + etna_compile_pass_generate_code(cd, tokens); + etna_compile_add_z_div_if_needed(cd); + etna_compile_add_nop_if_needed(cd); + etna_compile_fill_in_labels(cd); + + if(!etna_compile_check_limits(cd)) + { + FREE(cd); + *out = NULL; + return -1; + } + + /* fill in output structure */ + struct etna_shader_object *sobj = CALLOC_STRUCT(etna_shader_object); + sobj->processor = cd->processor; + sobj->code_size = cd->inst_ptr * 4; + sobj->code = mem_dup(cd->code, cd->inst_ptr * 16); + sobj->num_temps = cd->next_free_native; + sobj->const_base = 0; + sobj->const_size = cd->imm_base; + sobj->imm_base = cd->imm_base; + sobj->imm_size = cd->imm_size; + sobj->imm_data = mem_dup(cd->imm_data, cd->imm_size * 4); + sobj->vs_pos_out_reg = -1; + sobj->vs_pointsize_out_reg = -1; + sobj->ps_color_out_reg = -1; + sobj->ps_depth_out_reg = -1; + if(cd->processor == TGSI_PROCESSOR_VERTEX) + { + fill_in_vs_inputs(sobj, cd); + fill_in_vs_outputs(sobj, cd); + } else if(cd->processor == TGSI_PROCESSOR_FRAGMENT) { + fill_in_ps_inputs(sobj, cd); + fill_in_ps_outputs(sobj, cd); + } + *out = sobj; + FREE(cd); + return 0; +} + +extern const char *tgsi_swizzle_names[]; +void etna_dump_shader_object(const struct etna_shader_object *sobj) +{ + if(sobj->processor == TGSI_PROCESSOR_VERTEX) + { + printf("VERT\n"); + } else { + printf("FRAG\n"); + } + for(int x=0; x<sobj->code_size/4; ++x) + { + printf("| %08x %08x %08x %08x\n", sobj->code[x*4+0], sobj->code[x*4+1], sobj->code[x*4+2], sobj->code[x*4+3]); + } + printf("num temps: %i\n", sobj->num_temps); + printf("num const: %i\n", sobj->const_size); + printf("immediates:\n"); + for(int idx=0; idx<sobj->imm_size; ++idx) + { + printf(" [%i].%s = %f (0x%08x)\n", (idx+sobj->imm_base)/4, tgsi_swizzle_names[idx%4], + *((float*)&sobj->imm_data[idx]), sobj->imm_data[idx]); + } + printf("inputs:\n"); + for(int idx=0; idx<sobj->num_inputs; ++idx) + { + printf(" [%i] name=%s index=%i pa=%08x comps=%i\n", + sobj->inputs[idx].reg, + tgsi_semantic_names[sobj->inputs[idx].semantic.Name], sobj->inputs[idx].semantic.Index, + sobj->inputs[idx].pa_attributes, sobj->inputs[idx].num_components); + } + printf("outputs:\n"); + for(int idx=0; idx<sobj->num_outputs; ++idx) + { + printf(" [%i] name=%s index=%i pa=%08x comps=%i\n", + sobj->outputs[idx].reg, + tgsi_semantic_names[sobj->outputs[idx].semantic.Name], sobj->outputs[idx].semantic.Index, + sobj->outputs[idx].pa_attributes, sobj->outputs[idx].num_components); + } + printf("special:\n"); + if(sobj->processor == TGSI_PROCESSOR_VERTEX) + { + printf(" vs_pos_out_reg=%i\n", sobj->vs_pos_out_reg); + printf(" vs_pointsize_out_reg=%i\n", sobj->vs_pointsize_out_reg); + printf(" vs_load_balancing=0x%08x\n", sobj->vs_load_balancing); + } else { + printf(" ps_color_out_reg=%i\n", sobj->ps_color_out_reg); + printf(" ps_depth_out_reg=%i\n", sobj->ps_depth_out_reg); + } + printf(" input_count_unk8=0x%08x\n", sobj->input_count_unk8); +} + +void etna_destroy_shader_object(struct etna_shader_object *sobj) +{ + if(sobj != NULL) + { + FREE(sobj->code); + FREE(sobj->imm_data); + FREE(sobj->output_per_semantic_list); + FREE(sobj); + } +} + +int etna_link_shader_objects(struct etna_shader_link_info *info, const struct etna_shader_object *vs, const struct etna_shader_object *fs) +{ + /* For each fs input we need to find the associated ps input, which can be found by matching on + * semantic name and index. + * A binary search can be used because the vs outputs are sorted by semantic in fill_in_vs_outputs. + */ + assert(fs->num_inputs < ETNA_NUM_INPUTS); + for(int idx=0; idx<fs->num_inputs; ++idx) + { + struct tgsi_declaration_semantic semantic = fs->inputs[idx].semantic; + if(semantic.Name == TGSI_SEMANTIC_PCOORD) + { + info->varyings_vs_reg[idx] = 0; /* replaced by point coord -- doesn't matter */ + continue; + } + struct etna_shader_inout *match = NULL; + if(semantic.Index < vs->output_count_per_semantic[semantic.Name]) + { + match = vs->output_per_semantic[semantic.Name][semantic.Index]; + } + if(match == NULL) + return 1; /* not found -- link error */ + info->varyings_vs_reg[idx] = match->reg; + } + return 0; +} diff --git a/src/gallium/drivers/etna/etna_compiler.h b/src/gallium/drivers/etna/etna_compiler.h new file mode 100644 index 0000000000..1b2d00fae7 --- /dev/null +++ b/src/gallium/drivers/etna/etna_compiler.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Interface to shader compiler */ +#ifndef H_ETNA_COMPILER +#define H_ETNA_COMPILER +#include "etna_internal.h" + +#include <stdint.h> +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" + +/* XXX some of these such as ETNA_MAX_LABELS are pretty arbitrary limits, may be better to switch + * to dynamic allocation at some point. + */ +#define ETNA_MAX_TEMPS (64) /* max temp register count of all Vivante hw */ +#define ETNA_MAX_TOKENS (2048) +#define ETNA_MAX_IMM (1024) /* max const+imm in 32-bit words */ +#define ETNA_MAX_DECL (2048) /* max declarations */ +#define ETNA_MAX_DEPTH (32) +#define ETNA_MAX_LABELS (64) +#define ETNA_MAX_INSTRUCTIONS (2048) + +struct etna_pipe_specs; + +/* compiler output per input/output */ +struct etna_shader_inout +{ + int reg; /* native register */ + struct tgsi_declaration_semantic semantic; /* tgsi semantic name and index */ + int num_components; + /* varyings */ + uint32_t pa_attributes; /* PA_SHADER_ATTRIBUTES */ +}; + +/* shader object, for linking */ +struct etna_shader_object +{ + uint processor; /* TGSI_PROCESSOR_... */ + uint32_t code_size; /* code size in uint32 words */ + uint32_t *code; + unsigned num_temps; + + uint32_t const_base; /* base of constants (in 32 bit units) */ + uint32_t const_size; /* size of constants, also base of immediates (in 32 bit units) */ + uint32_t imm_base; /* base of immediates (in 32 bit units) */ + uint32_t imm_size; /* size of immediates (in 32 bit units) */ + uint32_t *imm_data; + + /* inputs (for linking) + * for fs, the inputs must be in register 1..N */ + unsigned num_inputs; + struct etna_shader_inout inputs[ETNA_NUM_INPUTS]; + + /* outputs (for linking) */ + unsigned num_outputs; + struct etna_shader_inout outputs[ETNA_NUM_INPUTS]; + /* index into outputs (for linking) */ + int output_count_per_semantic[TGSI_SEMANTIC_COUNT]; + struct etna_shader_inout **output_per_semantic_list; /* list of pointers to outputs */ + struct etna_shader_inout **output_per_semantic[TGSI_SEMANTIC_COUNT]; + + /* special outputs (vs only) */ + int vs_pos_out_reg; /* VS position output */ + int vs_pointsize_out_reg; /* VS point size output */ + uint32_t vs_load_balancing; + + /* special outputs (ps only) */ + int ps_color_out_reg; /* color output register */ + int ps_depth_out_reg; /* depth output register */ + + /* unknown input property (XX_INPUT_COUNT, field UNK8) */ + uint32_t input_count_unk8; +}; + +struct etna_shader_link_info +{ + /* each PS input is annotated with the VS output reg */ + unsigned varyings_vs_reg[ETNA_NUM_INPUTS]; +}; + +/* Entry point to compiler. + * Returns non-zero if compilation fails. + */ +int etna_compile_shader_object(const struct etna_pipe_specs *specs, const struct tgsi_token *tokens, + struct etna_shader_object **out); + +/* Debug dump of shader object */ +void etna_dump_shader_object(const struct etna_shader_object *sobj); + +/* Link two shader objects together, annotates each PS input with the VS + * output register. Returns non-zero if the linking fails. + */ +int etna_link_shader_objects(struct etna_shader_link_info *info, const struct etna_shader_object *vs, const struct etna_shader_object *fs); + +/* Destroy a previously allocated shader object */ +void etna_destroy_shader_object(struct etna_shader_object *obj); + +#endif diff --git a/src/gallium/drivers/etna/etna_debug.h b/src/gallium/drivers/etna/etna_debug.h new file mode 100644 index 0000000000..90d30b3b08 --- /dev/null +++ b/src/gallium/drivers/etna/etna_debug.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* Common debug stuffl */ +#ifndef H_ETNA_DEBUG +#define H_ETNA_DEBUG + +#include <stdint.h> +#include <stdlib.h> +#include "util/u_debug.h" + +/* Logging */ +#define ETNA_DBG_MSGS 0x1 /* Warnings and non-fatal errors */ +#define ETNA_DBG_FRAME_MSGS 0x2 +#define ETNA_DBG_RESOURCE_MSGS 0x4 +#define ETNA_DBG_COMPILER_MSGS 0x8 +#define ETNA_DBG_LINKER_MSGS 0x10 +#define ETNA_DBG_DUMP_SHADERS 0x20 + +/* Bypasses */ +#define ETNA_DBG_NO_TS 0x1000 /* Disable TS */ +#define ETNA_DBG_CFLUSH_ALL 0x2000 /* Flush before every state update + draw call */ +#define ETNA_DBG_MSAA_2X 0x4000 /* Force 2X MSAA for screen */ +#define ETNA_DBG_MSAA_4X 0x8000 /* Force 4X MSAA for screen */ +#define ETNA_DBG_FINISH_ALL 0x10000 /* Finish on every flush */ +#define ETNA_DBG_FLUSH_ALL 0x20000 /* Flush after every rendered primitive */ + +extern uint32_t etna_mesa_debug; /* set in etna_screen.c from ETNA_DEBUG */ + +#define DBG_ENABLED(flag) (etna_mesa_debug & (flag)) + +#define DBG_F(flag, fmt, ...) \ + do { if (etna_mesa_debug & (flag)) \ + debug_printf("%s:%d: "fmt "\n", \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) + +#define DBG(fmt, ...) \ + do { if (etna_mesa_debug & ETNA_DBG_MSGS) \ + debug_printf("%s:%d: "fmt "\n", \ + __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) + + +#endif diff --git a/src/gallium/drivers/etna/etna_fence.c b/src/gallium/drivers/etna/etna_fence.c new file mode 100644 index 0000000000..3660b6bc19 --- /dev/null +++ b/src/gallium/drivers/etna/etna_fence.c @@ -0,0 +1,174 @@ +#include "etna_fence.h" +#include "etna_debug.h" +#include "etna_screen.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_string.h" + +#include <etnaviv/viv.h> +#include <etnaviv/etna.h> +#include <etnaviv/etna_queue.h> + +static void etna_screen_fence_reference(struct pipe_screen *screen_h, + struct pipe_fence_handle **ptr_h, + struct pipe_fence_handle *fence_h ); +static boolean etna_screen_fence_signalled(struct pipe_screen *screen_h, + struct pipe_fence_handle *fence_h); +static boolean etna_screen_fence_finish(struct pipe_screen *screen_h, + struct pipe_fence_handle *fence_h, + uint64_t timeout ); + +int etna_fence_new(struct pipe_screen *screen_h, struct etna_ctx *ctx, struct pipe_fence_handle **fence_p) +{ + struct etna_fence *fence = NULL; + struct etna_screen *screen = etna_screen(screen_h); + int rv; + + /* XXX we do not release the fence_p reference here -- neither do the other drivers, + * and clients don't seem to rely on this. */ + if(fence_p == NULL) + return ETNA_INVALID_ADDR; + assert(*fence_p == NULL); + + /* re-use old fence, if available, and reset it first */ + pipe_mutex_lock(screen->fence_mutex); + if(screen->fence_freelist != NULL) + { + fence = screen->fence_freelist; + screen->fence_freelist = fence->next_free; + fence->next_free = NULL; + } + pipe_mutex_unlock(screen->fence_mutex); + + if(fence != NULL) + { + if((rv = viv_user_signal_signal(ctx->conn, fence->signal, 0)) != VIV_STATUS_OK) + { + printf("Error: could not reset signal %i\n", fence->signal); + etna_screen_destroy_fence(screen_h, fence); + return rv; + } + fence->signalled = false; + } else { + fence = CALLOC_STRUCT(etna_fence); + /* Create signal with manual reset; we want to be able to probe it + * or wait for it without resetting it. + */ + if((rv = viv_user_signal_create(ctx->conn, /* manualReset */ true, &fence->signal)) != VIV_STATUS_OK) + { + FREE(fence); + return rv; + } + } + if((rv = etna_queue_signal(ctx->queue, fence->signal, VIV_WHERE_PIXEL)) != ETNA_OK) + { + printf("%s: error queueing signal %i\n", __func__, fence->signal); + viv_user_signal_destroy(ctx->conn, fence->signal); + FREE(fence); + return rv; + } + pipe_reference_init(&fence->reference, 1); + *fence_p = (struct pipe_fence_handle*)fence; + return ETNA_OK; +} + +static void +debug_describe_fence(char* buf, const struct etna_fence *fence) +{ + util_sprintf(buf, "etna_fence<%i>", fence->signal); +} + +/** + * Reference or unreference a fence. Once the reference count falls to zero, + * the fence will be destroyed or put in the free list to be reused. + */ +static void etna_screen_fence_reference(struct pipe_screen *screen_h, + struct pipe_fence_handle **ptr_h, + struct pipe_fence_handle *fence_h ) +{ + struct etna_screen *screen = etna_screen(screen_h); + struct etna_fence *fence = etna_fence(fence_h); + struct etna_fence **ptr = (struct etna_fence **) ptr_h; + struct etna_fence *old_fence = *ptr; + if (pipe_reference_described(&(*ptr)->reference, &fence->reference, + (debug_reference_descriptor)debug_describe_fence)) + { + if(etna_screen_fence_signalled(screen_h, (struct pipe_fence_handle*)old_fence)) + { + /* If signalled, add old fence to free list, as it can be reused */ + pipe_mutex_lock(screen->fence_mutex); + old_fence->next_free = screen->fence_freelist; + screen->fence_freelist = old_fence; + pipe_mutex_unlock(screen->fence_mutex); + } else { + /* If fence is still to be signalled, destroy it, to prevent it from being + * reused. */ + etna_screen_destroy_fence(screen_h, old_fence); + } + } + *ptr_h = fence_h; +} + +/** + * Poll whether the fence has been signalled. + */ +static boolean etna_screen_fence_signalled(struct pipe_screen *screen_h, + struct pipe_fence_handle *fence_h) +{ + return etna_screen_fence_finish(screen_h, fence_h, 0); +} + +/** + * Wait until the fence has been signalled for the specified timeout in nanoseconds, + * or PIPE_TIMEOUT_INFINITE. + */ +static boolean etna_screen_fence_finish(struct pipe_screen *screen_h, + struct pipe_fence_handle *fence_h, + uint64_t timeout ) +{ + struct etna_screen *screen = etna_screen(screen_h); + struct etna_fence *fence = etna_fence(fence_h); + int rv; + if(fence->signalled) /* avoid a kernel roundtrip */ + return true; + /* nanoseconds to milliseconds */ + rv = viv_user_signal_wait(screen->dev, fence->signal, + timeout == PIPE_TIMEOUT_INFINITE ? VIV_WAIT_INDEFINITE : (timeout / 1000000ULL)); + if(rv != VIV_STATUS_OK && rv != VIV_STATUS_TIMEOUT) + { + printf("%s: error waiting for signal %i", __func__, fence->signal); + } + fence->signalled = (rv != VIV_STATUS_TIMEOUT); + return fence->signalled; +} + +void etna_screen_destroy_fence(struct pipe_screen *screen_h, struct etna_fence *fence) +{ + struct etna_screen *screen = etna_screen(screen_h); + if(viv_user_signal_destroy(screen->dev, fence->signal) != VIV_STATUS_OK) + { + printf("%s: cannot destroy signal %i\n", __func__, fence->signal); + } + FREE(fence); +} + +void etna_screen_destroy_fences(struct pipe_screen *screen_h) +{ + struct etna_screen *screen = etna_screen(screen_h); + struct etna_fence *fence, *next; + pipe_mutex_lock(screen->fence_mutex); + for(fence = screen->fence_freelist; fence != NULL; fence = next) + { + next = fence->next_free; + etna_screen_destroy_fence(screen_h, fence); + } + screen->fence_freelist = NULL; + pipe_mutex_unlock(screen->fence_mutex); +} + +void etna_screen_fence_init(struct pipe_screen *pscreen) +{ + pscreen->fence_reference = etna_screen_fence_reference; + pscreen->fence_signalled = etna_screen_fence_signalled; + pscreen->fence_finish = etna_screen_fence_finish; +} diff --git a/src/gallium/drivers/etna/etna_fence.h b/src/gallium/drivers/etna/etna_fence.h new file mode 100644 index 0000000000..2240528d02 --- /dev/null +++ b/src/gallium/drivers/etna/etna_fence.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Functions dealing with fences */ +#ifndef ETNA_FENCE_H_ +#define ETNA_FENCE_H_ + +#include "pipe/p_state.h" + +struct pipe_screen; +struct pipe_fence_handle; +struct etna_ctx; + +struct etna_fence +{ + struct pipe_reference reference; + int signal; /* signal id from kernel */ + bool signalled; /* cached value of signalled */ + struct etna_fence *next_free; /* if in free list, reference to next free fence */ +}; + +/** Convert generic pipe_fence_handle pointer to etna_fence */ +static INLINE struct etna_fence * +etna_fence(struct pipe_fence_handle *pfence) +{ + return (struct etna_fence *)pfence; +} + +/** + * Create a new fence that will be signalled after GPU completes rendering + * after the next flush. + */ +int etna_fence_new(struct pipe_screen *screen, + struct etna_ctx *ctx, + struct pipe_fence_handle **fence); + +/** + * Destroy a fence. In general, you should call etna_screen_fence_reference instead, + * if there may be other references. + */ +void etna_screen_destroy_fence(struct pipe_screen *screen_h, struct etna_fence *fence); + +/** + * Destroy all fences kept around for re-use in the free list. + */ +void etna_screen_destroy_fences(struct pipe_screen *screen_h); + +void etna_screen_fence_init(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/drivers/etna/etna_internal.h b/src/gallium/drivers/etna/etna_internal.h new file mode 100644 index 0000000000..017da2e37d --- /dev/null +++ b/src/gallium/drivers/etna/etna_internal.h @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* internal definitions */ +#ifndef H_ETNA_INTERNAL +#define H_ETNA_INTERNAL + +#include <stdint.h> +#include <stdbool.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> + +#define ETNA_NUM_INPUTS (16) +#define ETNA_NUM_VARYINGS (16) +#define ETNA_NUM_LOD (14) +#define ETNA_NUM_LAYERS (6) +#define ETNA_MAX_UNIFORMS (256) + +/* RS tiled operations must have width%16 = 0 */ +#define ETNA_RS_WIDTH_MASK (16-1) +/* RS tiled operations must have height%4 = 0 */ +#define ETNA_RS_HEIGHT_MASK (3) + +/* GPU chip 3D specs */ +struct etna_pipe_specs +{ + /* supports SUPERTILE (64x64) tiling? */ + bool can_supertile; + /* number of bits per TS tile */ + unsigned bits_per_tile; + /* clear value for TS (dependent on bits_per_tile) */ + uint32_t ts_clear_value; + /* base of vertex texture units */ + unsigned vertex_sampler_offset; + /* number of fragment sampler units */ + unsigned fragment_sampler_count; + /* number of vertex sampler units */ + unsigned vertex_sampler_count; + /* needs z=(z+w)/2, for older GCxxx */ + bool vs_need_z_div; + /* size of vertex shader output buffer */ + unsigned vertex_output_buffer_size; + /* size of a cached vertex (?) */ + unsigned vertex_cache_size; + /* number of shader cores */ + unsigned shader_core_count; + /* number of vertex streams */ + unsigned stream_count; + /* supports trigonometric instructions */ + bool has_sin_cos_sqrt; + /* can use VS_RANGE, PS_RANGE registers*/ + bool has_shader_range_registers; + /* vertex shader memory address*/ + uint32_t vs_offset; + /* pixel shader memory address*/ + uint32_t ps_offset; + /* vertex/fragment shader max instructions */ + uint32_t max_instructions; + /* maximum number of varyings */ + unsigned max_varyings; + /* maximum number of registers */ + unsigned max_registers; + /* maximum vertex uniforms */ + unsigned max_vs_uniforms; + /* maximum pixel uniforms */ + unsigned max_ps_uniforms; + /* maximum texture size */ + unsigned max_texture_size; + /* maximum texture size */ + unsigned max_rendertarget_size; +}; + +/** Compiled Gallium state. All the different compiled state atoms are woven together and uploaded + * only when it is necessary to synchronize the state, for example before rendering. */ + +/* Compiled pipe_rasterizer_state */ +struct compiled_rasterizer_state +{ + uint32_t PA_CONFIG; + uint32_t PA_LINE_WIDTH; + uint32_t PA_POINT_SIZE; + uint32_t PA_SYSTEM_MODE; + uint32_t SE_DEPTH_SCALE; + uint32_t SE_DEPTH_BIAS; + uint32_t SE_CONFIG; + bool point_size_per_vertex; + bool scissor; +}; + +/* Compiled pipe_depth_stencil_alpha_state */ +struct compiled_depth_stencil_alpha_state +{ + uint32_t PE_DEPTH_CONFIG; + uint32_t PE_ALPHA_OP; + uint32_t PE_STENCIL_OP; + uint32_t PE_STENCIL_CONFIG; +}; + +/* Compiled pipe_blend_state */ +struct compiled_blend_state +{ + uint32_t PE_ALPHA_CONFIG; + uint32_t PE_COLOR_FORMAT; + uint32_t PE_LOGIC_OP; + uint32_t PE_DITHER[2]; +}; + +/* Compiled pipe_blend_color */ +struct compiled_blend_color +{ + uint32_t PE_ALPHA_BLEND_COLOR; +}; + +/* Compiled pipe_stencil_ref */ +struct compiled_stencil_ref +{ + uint32_t PE_STENCIL_CONFIG; + uint32_t PE_STENCIL_CONFIG_EXT; +}; + +/* Compiled pipe_scissor_state */ +struct compiled_scissor_state +{ + uint32_t SE_SCISSOR_LEFT; // fixp + uint32_t SE_SCISSOR_TOP; // fixp + uint32_t SE_SCISSOR_RIGHT; // fixp + uint32_t SE_SCISSOR_BOTTOM; // fixp +}; + +/* Compiled pipe_viewport_state */ +struct compiled_viewport_state +{ + uint32_t PA_VIEWPORT_SCALE_X; + uint32_t PA_VIEWPORT_SCALE_Y; + uint32_t PA_VIEWPORT_SCALE_Z; + uint32_t PA_VIEWPORT_OFFSET_X; + uint32_t PA_VIEWPORT_OFFSET_Y; + uint32_t PA_VIEWPORT_OFFSET_Z; + uint32_t SE_SCISSOR_LEFT; // fixp + uint32_t SE_SCISSOR_TOP; // fixp + uint32_t SE_SCISSOR_RIGHT; // fixp + uint32_t SE_SCISSOR_BOTTOM; // fixp + uint32_t PE_DEPTH_NEAR; + uint32_t PE_DEPTH_FAR; +}; + +/* Compiled sample mask (context->set_sample_mask) */ +struct compiled_sample_mask +{ + uint32_t GL_MULTI_SAMPLE_CONFIG; +}; + +/* Compiled pipe_sampler_state */ +struct compiled_sampler_state +{ + /* sampler offset +4*sampler, interleave when committing state */ + uint32_t TE_SAMPLER_CONFIG0; + uint32_t TE_SAMPLER_CONFIG1; + uint32_t TE_SAMPLER_LOD_CONFIG; + unsigned min_lod, max_lod; +}; + +/* Compiled pipe_sampler_view */ +struct compiled_sampler_view +{ + /* sampler offset +4*sampler, interleave when committing state */ + uint32_t TE_SAMPLER_CONFIG0; + uint32_t TE_SAMPLER_CONFIG1; + uint32_t TE_SAMPLER_SIZE; + uint32_t TE_SAMPLER_LOG_SIZE; + uint32_t TE_SAMPLER_LOD_ADDR[VIVS_TE_SAMPLER_LOD_ADDR__LEN]; + unsigned min_lod, max_lod; /* 5.5 fixp */ +}; + +/* Compiled pipe_framebuffer_state */ +struct compiled_framebuffer_state +{ + struct pipe_surface *cbuf, *zsbuf; /* keep reference to surfaces */ + uint32_t GL_MULTI_SAMPLE_CONFIG; + uint32_t PE_COLOR_FORMAT; + uint32_t PE_DEPTH_CONFIG; + uint32_t PE_DEPTH_ADDR; + uint32_t PE_PIPE_DEPTH_ADDR[2]; + uint32_t PE_DEPTH_STRIDE; + uint32_t PE_HDEPTH_CONTROL; + uint32_t PE_DEPTH_NORMALIZE; + uint32_t PE_COLOR_ADDR; + uint32_t PE_PIPE_COLOR_ADDR[2]; + uint32_t PE_COLOR_STRIDE; + uint32_t SE_SCISSOR_LEFT; // fixp, restricted by scissor state *if* enabled in rasterizer state + uint32_t SE_SCISSOR_TOP; // fixp + uint32_t SE_SCISSOR_RIGHT; // fixp + uint32_t SE_SCISSOR_BOTTOM; // fixp + uint32_t RA_MULTISAMPLE_UNK00E04; + uint32_t RA_MULTISAMPLE_UNK00E10[VIVS_RA_MULTISAMPLE_UNK00E10__LEN]; + uint32_t RA_CENTROID_TABLE[VIVS_RA_CENTROID_TABLE__LEN]; + uint32_t TS_MEM_CONFIG; + uint32_t TS_DEPTH_CLEAR_VALUE; + uint32_t TS_DEPTH_STATUS_BASE; + uint32_t TS_DEPTH_SURFACE_BASE; + uint32_t TS_COLOR_CLEAR_VALUE; + uint32_t TS_COLOR_STATUS_BASE; + uint32_t TS_COLOR_SURFACE_BASE; + bool msaa_mode; // adds input (and possible temp) to PS +}; + +/* Compiled context->create_vertex_elements_state */ +struct compiled_vertex_elements_state +{ + unsigned num_elements; + uint32_t FE_VERTEX_ELEMENT_CONFIG[VIVS_FE_VERTEX_ELEMENT_CONFIG__LEN]; +}; + +/* Compiled context->set_vertex_buffer result */ +struct compiled_set_vertex_buffer +{ + void *logical; /* CPU address of vertex buffer base */ + uint32_t FE_VERTEX_STREAM_CONTROL; + uint32_t FE_VERTEX_STREAM_BASE_ADDR; +}; + +/* Compiled context->set_index_buffer result */ +struct compiled_set_index_buffer +{ + void *logical; + uint32_t FE_INDEX_STREAM_CONTROL; + uint32_t FE_INDEX_STREAM_BASE_ADDR; +}; + +/* Compiled linked VS+PS shader state */ +struct compiled_shader_state +{ + uint32_t RA_CONTROL; + uint32_t PA_ATTRIBUTE_ELEMENT_COUNT; + uint32_t PA_CONFIG; + uint32_t PA_SHADER_ATTRIBUTES[VIVS_PA_SHADER_ATTRIBUTES__LEN]; + uint32_t VS_END_PC; + uint32_t VS_OUTPUT_COUNT; /* number of outputs if point size per vertex disabled */ + uint32_t VS_OUTPUT_COUNT_PSIZE; /* number of outputs of point size per vertex enabled */ + uint32_t VS_INPUT_COUNT; + uint32_t VS_TEMP_REGISTER_CONTROL; + uint32_t VS_OUTPUT[4]; + uint32_t VS_INPUT[4]; + uint32_t VS_LOAD_BALANCING; + uint32_t VS_START_PC; + uint32_t PS_END_PC; + uint32_t PS_OUTPUT_REG; + uint32_t PS_INPUT_COUNT; + uint32_t PS_INPUT_COUNT_MSAA; /* Adds an input */ + uint32_t PS_TEMP_REGISTER_CONTROL; + uint32_t PS_TEMP_REGISTER_CONTROL_MSAA; /* Adds a temporary if needed to make space for extra input */ + uint32_t PS_CONTROL; + uint32_t PS_START_PC; + uint32_t GL_VARYING_TOTAL_COMPONENTS; + uint32_t GL_VARYING_NUM_COMPONENTS; + uint32_t GL_VARYING_COMPONENT_USE[2]; + unsigned vs_inst_mem_size; + unsigned vs_uniforms_size; + unsigned ps_inst_mem_size; + unsigned ps_uniforms_size; + uint32_t *VS_INST_MEM; + uint32_t VS_UNIFORMS[ETNA_MAX_UNIFORMS*4]; + uint32_t *PS_INST_MEM; + uint32_t PS_UNIFORMS[ETNA_MAX_UNIFORMS*4]; +}; + +/* state of all 3d and common registers relevant to etna driver */ +struct etna_3d_state +{ + unsigned num_vertex_elements; /* number of elements in FE_VERTEX_ELEMENT_CONFIG */ + unsigned vs_uniforms_size; + unsigned ps_uniforms_size; + unsigned vs_inst_mem_size; + unsigned ps_inst_mem_size; + + uint32_t /*00600*/ FE_VERTEX_ELEMENT_CONFIG[VIVS_FE_VERTEX_ELEMENT_CONFIG__LEN]; + uint32_t /*00644*/ FE_INDEX_STREAM_BASE_ADDR; + uint32_t /*00648*/ FE_INDEX_STREAM_CONTROL; + uint32_t /*0064C*/ FE_VERTEX_STREAM_BASE_ADDR; + uint32_t /*00650*/ FE_VERTEX_STREAM_CONTROL; + uint32_t /*00680*/ FE_VERTEX_STREAMS_BASE_ADDR[VIVS_FE_VERTEX_STREAMS__LEN]; + uint32_t /*006A0*/ FE_VERTEX_STREAMS_CONTROL[VIVS_FE_VERTEX_STREAMS__LEN]; + + uint32_t /*00800*/ VS_END_PC; + uint32_t /*00804*/ VS_OUTPUT_COUNT; + uint32_t /*00808*/ VS_INPUT_COUNT; + uint32_t /*0080C*/ VS_TEMP_REGISTER_CONTROL; + uint32_t /*00810*/ VS_OUTPUT[VIVS_VS_OUTPUT__LEN]; + uint32_t /*00820*/ VS_INPUT[VIVS_VS_INPUT__LEN]; + uint32_t /*00830*/ VS_LOAD_BALANCING; + uint32_t /*00838*/ VS_START_PC; + uint32_t /*0085C*/ VS_RANGE; + + uint32_t /*00A00*/ PA_VIEWPORT_SCALE_X; + uint32_t /*00A04*/ PA_VIEWPORT_SCALE_Y; + uint32_t /*00A08*/ PA_VIEWPORT_SCALE_Z; + uint32_t /*00A0C*/ PA_VIEWPORT_OFFSET_X; + uint32_t /*00A10*/ PA_VIEWPORT_OFFSET_Y; + uint32_t /*00A14*/ PA_VIEWPORT_OFFSET_Z; + uint32_t /*00A18*/ PA_LINE_WIDTH; + uint32_t /*00A1C*/ PA_POINT_SIZE; + uint32_t /*00A28*/ PA_SYSTEM_MODE; + uint32_t /*00A2C*/ PA_W_CLIP_LIMIT; + uint32_t /*00A30*/ PA_ATTRIBUTE_ELEMENT_COUNT; + uint32_t /*00A34*/ PA_CONFIG; + uint32_t /*00A40*/ PA_SHADER_ATTRIBUTES[VIVS_PA_SHADER_ATTRIBUTES__LEN]; + + uint32_t /*00C00*/ SE_SCISSOR_LEFT; // fixp + uint32_t /*00C04*/ SE_SCISSOR_TOP; // fixp + uint32_t /*00C08*/ SE_SCISSOR_RIGHT; // fixp + uint32_t /*00C0C*/ SE_SCISSOR_BOTTOM; // fixp + uint32_t /*00C10*/ SE_DEPTH_SCALE; + uint32_t /*00C14*/ SE_DEPTH_BIAS; + uint32_t /*00C18*/ SE_CONFIG; + + uint32_t /*00E00*/ RA_CONTROL; + uint32_t /*00E04*/ RA_MULTISAMPLE_UNK00E04; + uint32_t /*00E08*/ RA_EARLY_DEPTH; + uint32_t /*00E10*/ RA_MULTISAMPLE_UNK00E10[VIVS_RA_MULTISAMPLE_UNK00E10__LEN]; + uint32_t /*00E40*/ RA_CENTROID_TABLE[VIVS_RA_CENTROID_TABLE__LEN]; + + uint32_t /*01000*/ PS_END_PC; + uint32_t /*01004*/ PS_OUTPUT_REG; + uint32_t /*01008*/ PS_INPUT_COUNT; + uint32_t /*0100C*/ PS_TEMP_REGISTER_CONTROL; + uint32_t /*01010*/ PS_CONTROL; + uint32_t /*01018*/ PS_START_PC; + uint32_t /*0101C*/ PS_RANGE; + + uint32_t /*01400*/ PE_DEPTH_CONFIG; + uint32_t /*01404*/ PE_DEPTH_NEAR; + uint32_t /*01408*/ PE_DEPTH_FAR; + uint32_t /*0140C*/ PE_DEPTH_NORMALIZE; + uint32_t /*01410*/ PE_DEPTH_ADDR; + uint32_t /*01414*/ PE_DEPTH_STRIDE; + uint32_t /*01418*/ PE_STENCIL_OP; + uint32_t /*0141C*/ PE_STENCIL_CONFIG; + uint32_t /*01420*/ PE_ALPHA_OP; + uint32_t /*01424*/ PE_ALPHA_BLEND_COLOR; + uint32_t /*01428*/ PE_ALPHA_CONFIG; + uint32_t /*0142C*/ PE_COLOR_FORMAT; + uint32_t /*01430*/ PE_COLOR_ADDR; + uint32_t /*01434*/ PE_COLOR_STRIDE; + uint32_t /*01454*/ PE_HDEPTH_CONTROL; + uint32_t /*014A0*/ PE_STENCIL_CONFIG_EXT; + uint32_t /*014A4*/ PE_LOGIC_OP; + uint32_t /*014A8*/ PE_DITHER[2]; + uint32_t /*01460*/ PE_PIPE_COLOR_ADDR[VIVS_PE_PIPE__LEN]; + uint32_t /*01480*/ PE_PIPE_DEPTH_ADDR[VIVS_PE_PIPE__LEN]; + + uint32_t /*01604*/ RS_CONFIG; + uint32_t /*01608*/ RS_SOURCE_ADDR; + uint32_t /*0160C*/ RS_SOURCE_STRIDE; + uint32_t /*01610*/ RS_DEST_ADDR; + uint32_t /*01614*/ RS_DEST_STRIDE; + uint32_t /*01620*/ RS_WINDOW_SIZE; + uint32_t /*01630*/ RS_DITHER[2]; + uint32_t /*0163C*/ RS_CLEAR_CONTROL; + uint32_t /*01640*/ RS_FILL_VALUE[4]; + + uint32_t /*01654*/ TS_MEM_CONFIG; + uint32_t /*01658*/ TS_COLOR_STATUS_BASE; + uint32_t /*0165C*/ TS_COLOR_SURFACE_BASE; + uint32_t /*01660*/ TS_COLOR_CLEAR_VALUE; + uint32_t /*01664*/ TS_DEPTH_STATUS_BASE; + uint32_t /*01668*/ TS_DEPTH_SURFACE_BASE; + uint32_t /*0166C*/ TS_DEPTH_CLEAR_VALUE; + + uint32_t /*016A0*/ RS_EXTRA_CONFIG; + + uint32_t /*02000*/ TE_SAMPLER_CONFIG0[VIVS_TE_SAMPLER__LEN]; + uint32_t /*02040*/ TE_SAMPLER_SIZE[VIVS_TE_SAMPLER__LEN]; + uint32_t /*02080*/ TE_SAMPLER_LOG_SIZE[VIVS_TE_SAMPLER__LEN]; + uint32_t /*020C0*/ TE_SAMPLER_LOD_CONFIG[VIVS_TE_SAMPLER__LEN]; + uint32_t /*021C0*/ TE_SAMPLER_CONFIG1[VIVS_TE_SAMPLER__LEN]; + uint32_t /*02400*/ TE_SAMPLER_LOD_ADDR[VIVS_TE_SAMPLER_LOD_ADDR__LEN][VIVS_TE_SAMPLER__LEN]; + + uint32_t /*03814*/ GL_VERTEX_ELEMENT_CONFIG; + uint32_t /*03818*/ GL_MULTI_SAMPLE_CONFIG; + uint32_t /*0381C*/ GL_VARYING_TOTAL_COMPONENTS; + uint32_t /*03820*/ GL_VARYING_NUM_COMPONENTS; + uint32_t /*03828*/ GL_VARYING_COMPONENT_USE[VIVS_GL_VARYING_COMPONENT_USE__LEN]; + uint32_t /*0384C*/ GL_API_MODE; + + uint32_t /*04000*/ VS_INST_MEM[VIVS_VS_INST_MEM__LEN]; + uint32_t /*05000*/ VS_UNIFORMS[VIVS_VS_UNIFORMS__LEN]; + uint32_t /*06000*/ PS_INST_MEM[VIVS_PS_INST_MEM__LEN]; + uint32_t /*07000*/ PS_UNIFORMS[VIVS_PS_UNIFORMS__LEN]; +}; + + +#endif diff --git a/src/gallium/drivers/etna/etna_pipe.c b/src/gallium/drivers/etna/etna_pipe.c new file mode 100644 index 0000000000..01a9aa1744 --- /dev/null +++ b/src/gallium/drivers/etna/etna_pipe.c @@ -0,0 +1,1333 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Gallium pipe driver + */ +#include "etna_pipe.h" +#include "etna_translate.h" + +#include <etnaviv/common.xml.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> +#include <etnaviv/cmdstream.xml.h> +#include <etnaviv/viv.h> +#include <etnaviv/etna.h> + +#include "etna_blend.h" +#include "etna_clear_blit.h" +#include "etna_compiler.h" +#include "etna_debug.h" +#include "etna_fence.h" +#include "etna_rasterizer.h" +#include "etna_resource.h" +#include "etna_shader.h" +#include "etna_surface.h" +#include "etna_texture.h" +#include "etna_transfer.h" +#include "etna_zsa.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <stdarg.h> +#include <assert.h> +#include <math.h> +#include <errno.h> + +/*********************************************************************/ +/* Context handling */ + +#define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */ + +/* Create bit field that specifies which samplers are active and thus need to be programmed + * 32 bits is enough for 32 samplers. As far as I know this is the upper bound supported on any Vivante hw + * up to GC4000. + */ +static uint32_t active_samplers_bits(struct pipe_context *pipe) +{ + struct etna_pipe_context *restrict e = etna_pipe_context(pipe); + unsigned num_fragment_samplers = MIN2(e->num_fragment_samplers, e->num_fragment_sampler_views); + unsigned num_vertex_samplers = MIN2(e->num_vertex_samplers, e->num_vertex_sampler_views); + uint32_t active_samplers = etna_bits_ones(num_fragment_samplers) | + etna_bits_ones(num_vertex_samplers) << e->specs.vertex_sampler_offset; + return active_samplers; +} + +/* Reset / re-upload context. + * + * This pushes the current register state in pipe->gpu3d to the GPU. + * The function is used to initialize the GPU in a predictable state + * at the beginning of rendering, as well as to create a context + * buffer for the kernel driver. + */ +static void reset_context(struct pipe_context *restrict pipe) +{ + struct etna_pipe_context *restrict e = etna_pipe_context(pipe); + struct etna_ctx *restrict ctx = e->ctx; + +#define EMIT_STATE(state_name, dest_field) \ + ETNA_COALESCE_STATE_UPDATE(state_name, e->gpu3d.dest_field, 0) + +#define EMIT_STATE_FIXP(state_name, dest_field) \ + ETNA_COALESCE_STATE_UPDATE(state_name, e->gpu3d.dest_field, 1) + + uint32_t last_reg, last_fixp, span_start; + ETNA_COALESCE_STATE_OPEN(ETNA_3D_CONTEXT_SIZE); + /* multi sample config is set first, and outside of the normal sorting + * order, as changing the multisample state clobbers PS.INPUT_COUNT (and + * possibly PS.TEMP_REGISTER_CONTROL). + */ + /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, GL_MULTI_SAMPLE_CONFIG); + /* below code generated by gen_weave_state.py, keep this in sync with sync_context! */ + /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here directly + * or indirectly */ + for(int x=0; x<e->gpu3d.num_vertex_elements; ++x) + { + /*00600*/ EMIT_STATE(FE_VERTEX_ELEMENT_CONFIG(x), FE_VERTEX_ELEMENT_CONFIG[x]); + } + /*00644*/ EMIT_STATE(FE_INDEX_STREAM_BASE_ADDR, FE_INDEX_STREAM_BASE_ADDR); + /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, FE_INDEX_STREAM_CONTROL); + /*0064C*/ EMIT_STATE(FE_VERTEX_STREAM_BASE_ADDR, FE_VERTEX_STREAM_BASE_ADDR); + /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, FE_VERTEX_STREAM_CONTROL); + for(int x=0; x<8; ++x) + { + /*00680*/ EMIT_STATE(FE_VERTEX_STREAMS_BASE_ADDR(x), FE_VERTEX_STREAMS_BASE_ADDR[x]); + } + for(int x=0; x<8; ++x) + { + /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), FE_VERTEX_STREAMS_CONTROL[x]); + } + /*00800*/ EMIT_STATE(VS_END_PC, VS_END_PC); + /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, VS_OUTPUT_COUNT); + /*00808*/ EMIT_STATE(VS_INPUT_COUNT, VS_INPUT_COUNT); + /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, VS_TEMP_REGISTER_CONTROL); + for(int x=0; x<4; ++x) + { + /*00810*/ EMIT_STATE(VS_OUTPUT(x), VS_OUTPUT[x]); + } + for(int x=0; x<4; ++x) + { + /*00820*/ EMIT_STATE(VS_INPUT(x), VS_INPUT[x]); + } + /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, VS_LOAD_BALANCING); + /*00838*/ EMIT_STATE(VS_START_PC, VS_START_PC); + if (e->specs.has_shader_range_registers) + { + /*0085C*/ EMIT_STATE(VS_RANGE, VS_RANGE); + } + /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, PA_VIEWPORT_SCALE_X); + /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, PA_VIEWPORT_SCALE_Y); + /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, PA_VIEWPORT_SCALE_Z); + /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, PA_VIEWPORT_OFFSET_X); + /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, PA_VIEWPORT_OFFSET_Y); + /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, PA_VIEWPORT_OFFSET_Z); + /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, PA_LINE_WIDTH); + /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, PA_POINT_SIZE); + /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, PA_SYSTEM_MODE); + /*00A2C*/ EMIT_STATE(PA_W_CLIP_LIMIT, PA_W_CLIP_LIMIT); + /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, PA_ATTRIBUTE_ELEMENT_COUNT); + /*00A34*/ EMIT_STATE(PA_CONFIG, PA_CONFIG); + for(int x=0; x<10; ++x) + { + /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), PA_SHADER_ATTRIBUTES[x]); + } + /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, SE_SCISSOR_LEFT); + /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, SE_SCISSOR_TOP); + /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, SE_SCISSOR_RIGHT); + /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, SE_SCISSOR_BOTTOM); + /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, SE_DEPTH_SCALE); + /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, SE_DEPTH_BIAS); + /*00C18*/ EMIT_STATE(SE_CONFIG, SE_CONFIG); + /*00E00*/ EMIT_STATE(RA_CONTROL, RA_CONTROL); + /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, RA_MULTISAMPLE_UNK00E04); + /*00E08*/ EMIT_STATE(RA_EARLY_DEPTH, RA_EARLY_DEPTH); + for(int x=0; x<4; ++x) + { + /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), RA_MULTISAMPLE_UNK00E10[x]); + } + for(int x=0; x<16; ++x) + { + /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), RA_CENTROID_TABLE[x]); + } + /*01000*/ EMIT_STATE(PS_END_PC, PS_END_PC); + /*01004*/ EMIT_STATE(PS_OUTPUT_REG, PS_OUTPUT_REG); + /*01008*/ EMIT_STATE(PS_INPUT_COUNT, PS_INPUT_COUNT); + /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL, PS_TEMP_REGISTER_CONTROL); + /*01010*/ EMIT_STATE(PS_CONTROL, PS_CONTROL); + /*01018*/ EMIT_STATE(PS_START_PC, PS_START_PC); + if (e->specs.has_shader_range_registers) + { + /*0101C*/ EMIT_STATE(PS_RANGE, PS_RANGE); + } + /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, PE_DEPTH_CONFIG); + /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, PE_DEPTH_NEAR); + /*01408*/ EMIT_STATE(PE_DEPTH_FAR, PE_DEPTH_FAR); + /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, PE_DEPTH_NORMALIZE); + /*01410*/ EMIT_STATE(PE_DEPTH_ADDR, PE_DEPTH_ADDR); + /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, PE_DEPTH_STRIDE); + /*01418*/ EMIT_STATE(PE_STENCIL_OP, PE_STENCIL_OP); + /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, PE_STENCIL_CONFIG); + /*01420*/ EMIT_STATE(PE_ALPHA_OP, PE_ALPHA_OP); + /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, PE_ALPHA_BLEND_COLOR); + /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, PE_ALPHA_CONFIG); + /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, PE_COLOR_FORMAT); + /*01430*/ EMIT_STATE(PE_COLOR_ADDR, PE_COLOR_ADDR); + /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE); + /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL); + for(int x=0; x<8; ++x) + { + /*01460*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(x), PE_PIPE_COLOR_ADDR[x]); + } + for(int x=0; x<8; ++x) + { + /*01480*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(x), PE_PIPE_DEPTH_ADDR[x]); + } + /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, PE_STENCIL_CONFIG_EXT); + /*014A4*/ EMIT_STATE(PE_LOGIC_OP, PE_LOGIC_OP); + for(int x=0; x<2; ++x) + { + /*014A8*/ EMIT_STATE(PE_DITHER(x), PE_DITHER[x]); + } + /*01654*/ EMIT_STATE(TS_MEM_CONFIG, TS_MEM_CONFIG); + /*01658*/ EMIT_STATE(TS_COLOR_STATUS_BASE, TS_COLOR_STATUS_BASE); + /*0165C*/ EMIT_STATE(TS_COLOR_SURFACE_BASE, TS_COLOR_SURFACE_BASE); + /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, TS_COLOR_CLEAR_VALUE); + /*01664*/ EMIT_STATE(TS_DEPTH_STATUS_BASE, TS_DEPTH_STATUS_BASE); + /*01668*/ EMIT_STATE(TS_DEPTH_SURFACE_BASE, TS_DEPTH_SURFACE_BASE); + /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, TS_DEPTH_CLEAR_VALUE); + for(int x=0; x<12; ++x) + { + /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), TE_SAMPLER_CONFIG0[x]); + } + for(int x=0; x<12; ++x) + { + /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), TE_SAMPLER_SIZE[x]); + } + for(int x=0; x<12; ++x) + { + /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), TE_SAMPLER_LOG_SIZE[x]); + } + for(int x=0; x<12; ++x) + { + /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x), TE_SAMPLER_LOD_CONFIG[x]); + } + for(int x=0; x<12; ++x) + { + /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), TE_SAMPLER_CONFIG1[x]); + } + for(int y=0; y<14; ++y) + { + for(int x=0; x<12; ++x) + { + /*02400*/ EMIT_STATE(TE_SAMPLER_LOD_ADDR(x, y), TE_SAMPLER_LOD_ADDR[y][x]); + } + } + /*03814*/ EMIT_STATE(GL_VERTEX_ELEMENT_CONFIG, GL_VERTEX_ELEMENT_CONFIG); + /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, GL_VARYING_TOTAL_COMPONENTS); + /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, GL_VARYING_NUM_COMPONENTS); + for(int x=0; x<2; ++x) + { + /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), GL_VARYING_COMPONENT_USE[x]); + } + /*0384C*/ EMIT_STATE(GL_API_MODE, GL_API_MODE); + ETNA_COALESCE_STATE_CLOSE(); + /* end only EMIT_STATE */ +#undef EMIT_STATE +#undef EMIT_STATE_FIXP + /* re-submit current shader program and uniforms */ + /*04000 or 0C000*/ + etna_set_state_multi(ctx, e->specs.vs_offset, e->gpu3d.vs_inst_mem_size, e->gpu3d.VS_INST_MEM); + /*06000 or 0D000*/ + etna_set_state_multi(ctx, e->specs.ps_offset, e->gpu3d.ps_inst_mem_size, e->gpu3d.PS_INST_MEM); + /*05000*/ etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(0), e->gpu3d.vs_uniforms_size, e->gpu3d.VS_UNIFORMS); + /*07000*/ etna_set_state_multi(ctx, VIVS_PS_UNIFORMS(0), e->gpu3d.ps_uniforms_size, e->gpu3d.PS_UNIFORMS); +} + +/* Weave state before draw operation. This function merges all the compiled state blocks under + * the context into one device register state. Parts of this state that are changed since + * last call (dirty) will be uploaded as state changes in the command buffer. + */ +static void sync_context(struct pipe_context *restrict pipe) +{ + struct etna_pipe_context *restrict e = etna_pipe_context(pipe); + struct etna_ctx *restrict ctx = e->ctx; + uint32_t active_samplers = active_samplers_bits(pipe); + uint32_t dirty = e->dirty_bits; + + /* CSOs must be bound before calling this */ + assert(e->blend_p && e->rasterizer_p && e->depth_stencil_alpha_p && e->vertex_elements_p); + + /* Pre-processing: re-link shader if needed. + */ + if(unlikely((dirty & ETNA_STATE_SHADER)) && e->vs && e->fs) + { + /* re-link vs and fs if needed */ + etna_link_shaders(pipe, &e->shader_state, e->vs, e->fs); + } + assert(!e->vs || e->vertex_elements.num_elements == e->vs->num_inputs); + + /* Pre-processing: see what caches we need to flush before making state + * changes. + */ + uint32_t to_flush = 0; + if(unlikely(dirty & (ETNA_STATE_BLEND))) + { + /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. + */ + if((e->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) != + (e->blend.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE)) + to_flush |= VIVS_GL_FLUSH_CACHE_COLOR; + } + if(unlikely(dirty & (ETNA_STATE_TEXTURE_CACHES))) + to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE; + if(DBG_ENABLED(ETNA_DBG_CFLUSH_ALL)) + to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH; + if(to_flush) + { + etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, to_flush); + etna_stall(ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); + } + + /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader + * state to make sure it is always rewritten. */ + if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) + { + if((e->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) != + (e->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK)) + { + /* XXX what does the GPU set these states to on MSAA samples change? Does it do the right thing? + * (increase/decrease as necessary) or something else? Just set some invalid value until we know for + * sure. */ + e->gpu3d.PS_INPUT_COUNT = 0xffffffff; + e->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff; + } + } + + /* + * Cached state update emission. + * The etna_3d_state structure e->gpu3d is used to keep the current context. + * State is only emitted if the new value of the register is different from the cached value + * in the context. Update the state afterwards. + */ +#define EMIT_STATE(state_name, dest_field, src_value) \ + if(e->gpu3d.dest_field != (src_value)) { \ + ETNA_COALESCE_STATE_UPDATE(state_name, src_value, 0) \ + e->gpu3d.dest_field = (src_value); \ + } + +#define EMIT_STATE_FIXP(state_name, dest_field, src_value) \ + if(e->gpu3d.dest_field != (src_value)) { \ + ETNA_COALESCE_STATE_UPDATE(state_name, src_value, 1) \ + e->gpu3d.dest_field = (src_value); \ + } + + /* Update vertex elements. This is different from any of the other states, in that + * a) the number of vertex elements written matters: so write only active ones + * b) the vertex element states must all be written: do not skip entries that stay the same + */ + if(dirty & (ETNA_STATE_VERTEX_ELEMENTS)) + { + if(e->gpu3d.num_vertex_elements != e->vertex_elements.num_elements || + memcmp(e->gpu3d.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG, e->gpu3d.num_vertex_elements * 4)) + { + /* Special case: vertex elements must always be sent in full if changed */ + /*00600*/ etna_set_state_multi(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(0), e->vertex_elements.num_elements, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG); + memcpy(e->gpu3d.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.num_elements * 4); + + e->gpu3d.num_vertex_elements = e->vertex_elements.num_elements; + } + } + + /* The following code is originally generated by gen_merge_state.py, to + * emit state in increasing order of address (this makes it possible to merge + * consecutive register updates into one SET_STATE command) + * + * There have been some manual changes, where the weaving operation is not + * simply bitwise or: + * - scissor fixp + * - num vertex elements + * - scissor handling + * - num samplers + * - texture lod + * - ETNA_STATE_TS + * - removed ETNA_STATE_BASE_SETUP statements -- these are guaranteed to not change anyway + * - PS / framebuffer interaction for MSAA + * - move update of GL_MULTI_SAMPLE_CONFIG first + * - add unlikely()/likely() + */ + uint32_t last_reg, last_fixp, span_start; + ETNA_COALESCE_STATE_OPEN(ETNA_3D_CONTEXT_SIZE); + /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here directly + * or indirectly */ + /* multi sample config is set first, and outside of the normal sorting + * order, as changing the multisample state clobbers PS.INPUT_COUNT (and + * possibly PS.TEMP_REGISTER_CONTROL). + */ + if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER | ETNA_STATE_SAMPLE_MASK))) + { + /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, GL_MULTI_SAMPLE_CONFIG, e->sample_mask.GL_MULTI_SAMPLE_CONFIG | e->framebuffer.GL_MULTI_SAMPLE_CONFIG); + } + if(likely(dirty & (ETNA_STATE_INDEX_BUFFER))) + { + /*00644*/ EMIT_STATE(FE_INDEX_STREAM_BASE_ADDR, FE_INDEX_STREAM_BASE_ADDR, e->index_buffer.FE_INDEX_STREAM_BASE_ADDR); + /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, FE_INDEX_STREAM_CONTROL, e->index_buffer.FE_INDEX_STREAM_CONTROL); + } + if(likely(dirty & (ETNA_STATE_VERTEX_BUFFERS))) + { + /*0064C*/ EMIT_STATE(FE_VERTEX_STREAM_BASE_ADDR, FE_VERTEX_STREAM_BASE_ADDR, e->vertex_buffer[0].FE_VERTEX_STREAM_BASE_ADDR); + /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, FE_VERTEX_STREAM_CONTROL, e->vertex_buffer[0].FE_VERTEX_STREAM_CONTROL); + if (e->specs.has_shader_range_registers) + { + for(int x=0; x<8; ++x) + { + /*00680*/ EMIT_STATE(FE_VERTEX_STREAMS_BASE_ADDR(x), FE_VERTEX_STREAMS_BASE_ADDR[x], e->vertex_buffer[x].FE_VERTEX_STREAM_BASE_ADDR); + } + for(int x=0; x<8; ++x) + { + /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), FE_VERTEX_STREAMS_CONTROL[x], e->vertex_buffer[x].FE_VERTEX_STREAM_CONTROL); + } + } + } + if(unlikely(dirty & (ETNA_STATE_SHADER))) + { + /*00800*/ EMIT_STATE(VS_END_PC, VS_END_PC, e->shader_state.VS_END_PC); + } + if(unlikely(dirty & (ETNA_STATE_SHADER | ETNA_STATE_RASTERIZER))) + { + /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, VS_OUTPUT_COUNT, + e->rasterizer.point_size_per_vertex ? e->shader_state.VS_OUTPUT_COUNT_PSIZE : e->shader_state.VS_OUTPUT_COUNT); + } + if(unlikely(dirty & (ETNA_STATE_VERTEX_ELEMENTS | ETNA_STATE_SHADER))) + { + /*00808*/ EMIT_STATE(VS_INPUT_COUNT, VS_INPUT_COUNT, VIVS_VS_INPUT_COUNT_COUNT(e->vertex_elements.num_elements) | e->shader_state.VS_INPUT_COUNT); + } + if(unlikely(dirty & (ETNA_STATE_SHADER))) + { + /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, VS_TEMP_REGISTER_CONTROL, e->shader_state.VS_TEMP_REGISTER_CONTROL); + for(int x=0; x<4; ++x) + { + /*00810*/ EMIT_STATE(VS_OUTPUT(x), VS_OUTPUT[x], e->shader_state.VS_OUTPUT[x]); + } + for(int x=0; x<4; ++x) + { + /*00820*/ EMIT_STATE(VS_INPUT(x), VS_INPUT[x], e->shader_state.VS_INPUT[x]); + } + /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, VS_LOAD_BALANCING, e->shader_state.VS_LOAD_BALANCING); + /*00838*/ EMIT_STATE(VS_START_PC, VS_START_PC, e->shader_state.VS_START_PC); + if (e->specs.has_shader_range_registers) + { + /*0085C*/ EMIT_STATE(VS_RANGE, VS_RANGE, (e->shader_state.vs_inst_mem_size/4-1)<<16); + } + } + if(unlikely(dirty & (ETNA_STATE_VIEWPORT))) + { + /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, PA_VIEWPORT_SCALE_X, e->viewport.PA_VIEWPORT_SCALE_X); + /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, PA_VIEWPORT_SCALE_Y, e->viewport.PA_VIEWPORT_SCALE_Y); + /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, PA_VIEWPORT_SCALE_Z, e->viewport.PA_VIEWPORT_SCALE_Z); + /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, PA_VIEWPORT_OFFSET_X, e->viewport.PA_VIEWPORT_OFFSET_X); + /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, PA_VIEWPORT_OFFSET_Y, e->viewport.PA_VIEWPORT_OFFSET_Y); + /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, PA_VIEWPORT_OFFSET_Z, e->viewport.PA_VIEWPORT_OFFSET_Z); + } + if(unlikely(dirty & (ETNA_STATE_RASTERIZER))) + { + /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, PA_LINE_WIDTH, e->rasterizer.PA_LINE_WIDTH); + /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, PA_POINT_SIZE, e->rasterizer.PA_POINT_SIZE); + /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, PA_SYSTEM_MODE, e->rasterizer.PA_SYSTEM_MODE); + } + if(unlikely(dirty & (ETNA_STATE_SHADER))) + { + /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, PA_ATTRIBUTE_ELEMENT_COUNT, e->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT); + } + if(unlikely(dirty & (ETNA_STATE_RASTERIZER | ETNA_STATE_SHADER))) + { + /*00A34*/ EMIT_STATE(PA_CONFIG, PA_CONFIG, e->rasterizer.PA_CONFIG & e->shader_state.PA_CONFIG); + } + if(unlikely(dirty & (ETNA_STATE_SHADER))) + { + for(int x=0; x<10; ++x) + { + /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), PA_SHADER_ATTRIBUTES[x], e->shader_state.PA_SHADER_ATTRIBUTES[x]); + } + } + if(unlikely(dirty & (ETNA_STATE_SCISSOR | ETNA_STATE_FRAMEBUFFER | ETNA_STATE_RASTERIZER | ETNA_STATE_VIEWPORT))) + { + /* this is a bit of a mess: rasterizer.scissor determines whether to use only the + * framebuffer scissor, or specific scissor state, and the viewport clips too so the logic + * spans four CSOs + */ + uint32_t scissor_left = MAX2(e->framebuffer.SE_SCISSOR_LEFT, e->viewport.SE_SCISSOR_LEFT); + uint32_t scissor_top = MAX2(e->framebuffer.SE_SCISSOR_TOP, e->viewport.SE_SCISSOR_TOP); + uint32_t scissor_right = MIN2(e->framebuffer.SE_SCISSOR_RIGHT, e->viewport.SE_SCISSOR_RIGHT); + uint32_t scissor_bottom = MIN2(e->framebuffer.SE_SCISSOR_BOTTOM, e->viewport.SE_SCISSOR_BOTTOM); + if(e->rasterizer.scissor) + { + scissor_left = MAX2(e->scissor.SE_SCISSOR_LEFT, scissor_left); + scissor_top = MAX2(e->scissor.SE_SCISSOR_TOP, scissor_top); + scissor_right = MIN2(e->scissor.SE_SCISSOR_RIGHT, scissor_right); + scissor_bottom = MIN2(e->scissor.SE_SCISSOR_BOTTOM, scissor_bottom); + } + /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, SE_SCISSOR_LEFT, scissor_left); + /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, SE_SCISSOR_TOP, scissor_top); + /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, SE_SCISSOR_RIGHT, scissor_right); + /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, SE_SCISSOR_BOTTOM, scissor_bottom); + } + if(unlikely(dirty & (ETNA_STATE_RASTERIZER))) + { + /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, SE_DEPTH_SCALE, e->rasterizer.SE_DEPTH_SCALE); + /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, SE_DEPTH_BIAS, e->rasterizer.SE_DEPTH_BIAS); + /*00C18*/ EMIT_STATE(SE_CONFIG, SE_CONFIG, e->rasterizer.SE_CONFIG); + } + if(unlikely(dirty & (ETNA_STATE_SHADER))) + { + /*00E00*/ EMIT_STATE(RA_CONTROL, RA_CONTROL, e->shader_state.RA_CONTROL); + } + if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) + { + /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, RA_MULTISAMPLE_UNK00E04, e->framebuffer.RA_MULTISAMPLE_UNK00E04); + for(int x=0; x<4; ++x) + { + /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), RA_MULTISAMPLE_UNK00E10[x], e->framebuffer.RA_MULTISAMPLE_UNK00E10[x]); + } + for(int x=0; x<16; ++x) + { + /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), RA_CENTROID_TABLE[x], e->framebuffer.RA_CENTROID_TABLE[x]); + } + } + if(unlikely(dirty & (ETNA_STATE_SHADER | ETNA_STATE_FRAMEBUFFER))) + { + /*01000*/ EMIT_STATE(PS_END_PC, PS_END_PC, e->shader_state.PS_END_PC); + /*01004*/ EMIT_STATE(PS_OUTPUT_REG, PS_OUTPUT_REG, e->shader_state.PS_OUTPUT_REG); + /*01008*/ EMIT_STATE(PS_INPUT_COUNT, PS_INPUT_COUNT, + e->framebuffer.msaa_mode ? + e->shader_state.PS_INPUT_COUNT_MSAA : + e->shader_state.PS_INPUT_COUNT); + /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL, PS_TEMP_REGISTER_CONTROL, + e->framebuffer.msaa_mode ? + e->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA : + e->shader_state.PS_TEMP_REGISTER_CONTROL); + /*01010*/ EMIT_STATE(PS_CONTROL, PS_CONTROL, e->shader_state.PS_CONTROL); + /*01018*/ EMIT_STATE(PS_START_PC, PS_START_PC, e->shader_state.PS_START_PC); + if (e->specs.has_shader_range_registers) + { + /*0101C*/ EMIT_STATE(PS_RANGE, PS_RANGE, ((e->shader_state.ps_inst_mem_size/4-1+0x100)<<16) | 0x100); + } + } + if(unlikely(dirty & (ETNA_STATE_DSA | ETNA_STATE_FRAMEBUFFER))) + { + /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, PE_DEPTH_CONFIG, e->depth_stencil_alpha.PE_DEPTH_CONFIG | e->framebuffer.PE_DEPTH_CONFIG); + } + if(unlikely(dirty & (ETNA_STATE_VIEWPORT))) + { + /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, PE_DEPTH_NEAR, e->viewport.PE_DEPTH_NEAR); + /*01408*/ EMIT_STATE(PE_DEPTH_FAR, PE_DEPTH_FAR, e->viewport.PE_DEPTH_FAR); + } + if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) + { + /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, PE_DEPTH_NORMALIZE, e->framebuffer.PE_DEPTH_NORMALIZE); + + if (ctx->conn->chip.pixel_pipes == 1) + { + /*01410*/ EMIT_STATE(PE_DEPTH_ADDR, PE_DEPTH_ADDR, e->framebuffer.PE_DEPTH_ADDR); + } + + /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, PE_DEPTH_STRIDE, e->framebuffer.PE_DEPTH_STRIDE); + } + if(unlikely(dirty & (ETNA_STATE_DSA))) + { + /*01418*/ EMIT_STATE(PE_STENCIL_OP, PE_STENCIL_OP, e->depth_stencil_alpha.PE_STENCIL_OP); + } + if(unlikely(dirty & (ETNA_STATE_DSA | ETNA_STATE_STENCIL_REF))) + { + /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, PE_STENCIL_CONFIG, e->depth_stencil_alpha.PE_STENCIL_CONFIG | e->stencil_ref.PE_STENCIL_CONFIG); + } + if(unlikely(dirty & (ETNA_STATE_DSA))) + { + /*01420*/ EMIT_STATE(PE_ALPHA_OP, PE_ALPHA_OP, e->depth_stencil_alpha.PE_ALPHA_OP); + } + if(unlikely(dirty & (ETNA_STATE_BLEND_COLOR))) + { + /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, PE_ALPHA_BLEND_COLOR, e->blend_color.PE_ALPHA_BLEND_COLOR); + } + if(unlikely(dirty & (ETNA_STATE_BLEND))) + { + /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, PE_ALPHA_CONFIG, e->blend.PE_ALPHA_CONFIG); + } + if(unlikely(dirty & (ETNA_STATE_BLEND | ETNA_STATE_FRAMEBUFFER))) + { + /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, PE_COLOR_FORMAT, e->blend.PE_COLOR_FORMAT | e->framebuffer.PE_COLOR_FORMAT); + } + if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) + { + if (ctx->conn->chip.pixel_pipes == 1) + { + /*01430*/ EMIT_STATE(PE_COLOR_ADDR, PE_COLOR_ADDR, e->framebuffer.PE_COLOR_ADDR); + /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE, e->framebuffer.PE_COLOR_STRIDE); + /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL, e->framebuffer.PE_HDEPTH_CONTROL); + } + else if (ctx->conn->chip.pixel_pipes == 2) + { + /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE, e->framebuffer.PE_COLOR_STRIDE); + /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL, e->framebuffer.PE_HDEPTH_CONTROL); + /*01460*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(0), PE_PIPE_COLOR_ADDR[0], e->framebuffer.PE_PIPE_COLOR_ADDR[0]); + /*01464*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(1), PE_PIPE_COLOR_ADDR[1], e->framebuffer.PE_PIPE_COLOR_ADDR[1]); + /*01480*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(0), PE_PIPE_DEPTH_ADDR[0], e->framebuffer.PE_PIPE_DEPTH_ADDR[0]); + /*01484*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(1), PE_PIPE_DEPTH_ADDR[1], e->framebuffer.PE_PIPE_DEPTH_ADDR[1]); + } + } + if(unlikely(dirty & (ETNA_STATE_STENCIL_REF))) + { + /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, PE_STENCIL_CONFIG_EXT, e->stencil_ref.PE_STENCIL_CONFIG_EXT); + } + if(unlikely(dirty & (ETNA_STATE_BLEND))) + { + /*014A4*/ EMIT_STATE(PE_LOGIC_OP, PE_LOGIC_OP, e->blend.PE_LOGIC_OP); + for(int x=0; x<2; ++x) + { + /*014A8*/ EMIT_STATE(PE_DITHER(x), PE_DITHER[x], e->blend.PE_DITHER[x]); + } + } + if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER | ETNA_STATE_TS))) + { + /*01654*/ EMIT_STATE(TS_MEM_CONFIG, TS_MEM_CONFIG, e->framebuffer.TS_MEM_CONFIG); + /*01658*/ EMIT_STATE(TS_COLOR_STATUS_BASE, TS_COLOR_STATUS_BASE, e->framebuffer.TS_COLOR_STATUS_BASE); + /*0165C*/ EMIT_STATE(TS_COLOR_SURFACE_BASE, TS_COLOR_SURFACE_BASE, e->framebuffer.TS_COLOR_SURFACE_BASE); + /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, TS_COLOR_CLEAR_VALUE, e->framebuffer.TS_COLOR_CLEAR_VALUE); + /*01664*/ EMIT_STATE(TS_DEPTH_STATUS_BASE, TS_DEPTH_STATUS_BASE, e->framebuffer.TS_DEPTH_STATUS_BASE); + /*01668*/ EMIT_STATE(TS_DEPTH_SURFACE_BASE, TS_DEPTH_SURFACE_BASE, e->framebuffer.TS_DEPTH_SURFACE_BASE); + /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, TS_DEPTH_CLEAR_VALUE, e->framebuffer.TS_DEPTH_CLEAR_VALUE); + } + if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS | ETNA_STATE_SAMPLERS))) + { + for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x) + { + /* set active samplers to their configuration value (determined by both the sampler state and sampler view), + * set inactive sampler config to 0 */ + /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), TE_SAMPLER_CONFIG0[x], + ((1<<x) & active_samplers)?(e->sampler[x].TE_SAMPLER_CONFIG0 | e->sampler_view[x].TE_SAMPLER_CONFIG0):0); + } + } + if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS))) + { + for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x) + { + if((1<<x) & active_samplers) + { + /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), TE_SAMPLER_SIZE[x], e->sampler_view[x].TE_SAMPLER_SIZE); + } + } + for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x) + { + if((1<<x) & active_samplers) + { + /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), TE_SAMPLER_LOG_SIZE[x], e->sampler_view[x].TE_SAMPLER_LOG_SIZE); + } + } + } + if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS | ETNA_STATE_SAMPLERS))) + { + for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x) + { + if((1<<x) & active_samplers) + { + /* min and max lod is determined both by the sampler and the view */ + /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x), TE_SAMPLER_LOD_CONFIG[x], + e->sampler[x].TE_SAMPLER_LOD_CONFIG | + VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(e->sampler[x].max_lod, e->sampler_view[x].max_lod)) | + VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(e->sampler[x].min_lod, e->sampler_view[x].min_lod))); + } + } + for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x) + { + if((1<<x) & active_samplers) + { + /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), TE_SAMPLER_CONFIG1[x], + e->sampler[x].TE_SAMPLER_CONFIG1 | e->sampler_view[x].TE_SAMPLER_CONFIG1); + } + } + } + if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS))) + { + for(int y=0; y<VIVS_TE_SAMPLER_LOD_ADDR__LEN; ++y) + { + for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x) + { + if((1<<x) & active_samplers) + { + /*02400*/ EMIT_STATE(TE_SAMPLER_LOD_ADDR(x, y), TE_SAMPLER_LOD_ADDR[y][x], e->sampler_view[x].TE_SAMPLER_LOD_ADDR[y]); + } + } + } + } + if(unlikely(dirty & (ETNA_STATE_SHADER))) + { + /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, GL_VARYING_TOTAL_COMPONENTS, e->shader_state.GL_VARYING_TOTAL_COMPONENTS); + /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, GL_VARYING_NUM_COMPONENTS, e->shader_state.GL_VARYING_NUM_COMPONENTS); + for(int x=0; x<2; ++x) + { + /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), GL_VARYING_COMPONENT_USE[x], e->shader_state.GL_VARYING_COMPONENT_USE[x]); + } + } + ETNA_COALESCE_STATE_CLOSE(); + /* end only EMIT_STATE */ + /**** Large dynamically-sized state ****/ + if(dirty & (ETNA_STATE_SHADER)) + { + /* Special case: a new shader was loaded; simply re-load all uniforms and shader code at once */ + /*04000 or 0C000*/ + etna_set_state_multi(ctx, e->specs.vs_offset, e->shader_state.vs_inst_mem_size, e->shader_state.VS_INST_MEM); + /*06000 or 0D000*/ + etna_set_state_multi(ctx, e->specs.ps_offset, e->shader_state.ps_inst_mem_size, e->shader_state.PS_INST_MEM); + /*05000*/ etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(0), e->shader_state.vs_uniforms_size, e->shader_state.VS_UNIFORMS); + /*07000*/ etna_set_state_multi(ctx, VIVS_PS_UNIFORMS(0), e->shader_state.ps_uniforms_size, e->shader_state.PS_UNIFORMS); + + /* Copy uniforms to gpu3d, so that incremental updates to uniforms are possible as long as the + * same shader remains bound */ + e->gpu3d.vs_uniforms_size = e->shader_state.vs_uniforms_size; + e->gpu3d.ps_uniforms_size = e->shader_state.ps_uniforms_size; + e->gpu3d.vs_inst_mem_size = e->shader_state.vs_inst_mem_size; + e->gpu3d.ps_inst_mem_size = e->shader_state.ps_inst_mem_size; + memcpy(e->gpu3d.VS_UNIFORMS, e->shader_state.VS_UNIFORMS, e->shader_state.vs_uniforms_size * 4); + memcpy(e->gpu3d.PS_UNIFORMS, e->shader_state.PS_UNIFORMS, e->shader_state.ps_uniforms_size * 4); + memcpy(e->gpu3d.VS_INST_MEM, e->shader_state.VS_INST_MEM, e->shader_state.vs_inst_mem_size * 4); + memcpy(e->gpu3d.PS_INST_MEM, e->shader_state.PS_INST_MEM, e->shader_state.ps_inst_mem_size * 4); + } + else + { + /* If new uniforms loaded with current shader, only submit what changed */ + if(dirty & (ETNA_STATE_VS_UNIFORMS)) + { + ETNA_COALESCE_STATE_OPEN(e->shader_state.vs_uniforms_size); /* worst case */ + for(int x=0; x<e->shader_state.vs_uniforms_size; ++x) + { + /*05000*/ EMIT_STATE(VS_UNIFORMS(x), VS_UNIFORMS[x], e->shader_state.VS_UNIFORMS[x]); + } + ETNA_COALESCE_STATE_CLOSE(); + } + if(dirty & (ETNA_STATE_PS_UNIFORMS)) + { + ETNA_COALESCE_STATE_OPEN(e->shader_state.ps_uniforms_size); /* worst case */ + for(int x=0; x<e->shader_state.ps_uniforms_size; ++x) + { + /*07000*/ EMIT_STATE(PS_UNIFORMS(x), PS_UNIFORMS[x], e->shader_state.PS_UNIFORMS[x]); + } + ETNA_COALESCE_STATE_CLOSE(); + } + } + /**** End of state update ****/ +#undef EMIT_STATE +#undef EMIT_STATE_FIXP + /**** Post processing ****/ + if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER | ETNA_STATE_TS))) + { + /* Wait rasterizer until RS (PE) finished configuration. */ + etna_stall(ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); + } + + e->dirty_bits = 0; +} + +/** Build new explicit context for etna. This is a command buffer that contains + * all commands needed to set up the GPU to current state, to be used after a context + * switch (when multiple processes are using the GPU at once). + * + * This function is called as callback by etna_flush for kernel drivers + * that require an explicit context) + */ +static int update_context(void *pipe, struct etna_ctx *ctx, enum etna_pipe *initial_pipe, enum etna_pipe *final_pipe) +{ + reset_context((struct pipe_context*) pipe); + *initial_pipe = ETNA_PIPE_3D; + *final_pipe = ETNA_PIPE_3D; + return ETNA_OK; +} + +/*********************************************************************/ + +/** Destroy etna pipe. After calling this the pipe object must never be + * used again. + */ +static void etna_pipe_destroy(struct pipe_context *pipe) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + etna_pipe_clear_blit_destroy(pipe); + etna_pipe_transfer_destroy(pipe); + etna_free(priv->ctx); + FREE(pipe); +} + +/** Main draw function. Draw primitives from a vertex buffer object, + * using optonally an index buffer. + */ +static void etna_pipe_draw_vbo(struct pipe_context *pipe, + const struct pipe_draw_info *info) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + if(priv->vertex_elements_p == NULL || priv->vertex_elements.num_elements == 0) + return; /* Nothing to do */ + int prims = u_decomposed_prims_for_vertices(info->mode, info->count); + if(unlikely(prims <= 0)) + { + DBG("Invalid draw primitive mode=%i or no primitives to be drawn", info->mode); + return; + } + /* First, sync state, then emit DRAW_PRIMITIVES or DRAW_INDEXED_PRIMITIVES */ + sync_context(pipe); + if(info->indexed) + { + etna_draw_indexed_primitives(priv->ctx, translate_draw_mode(info->mode), + info->start, prims, info->index_bias); + } else + { + etna_draw_primitives(priv->ctx, translate_draw_mode(info->mode), + info->start, prims); + } + if(DBG_ENABLED(ETNA_DBG_FLUSH_ALL)) + { + pipe->flush(pipe, NULL, 0); + } +} + +/** Create vertex element states, which define a layout for fetching + * vertices for rendering. + */ +static void *etna_pipe_create_vertex_elements_state(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_vertex_elements_state *cs = CALLOC_STRUCT(compiled_vertex_elements_state); + /* XXX could minimize number of consecutive stretches here by sorting, and + * permuting the inputs in shader or does Mesa do this already? */ + + /* Check that vertex element binding is compatible with hardware; thus + * elements[idx].vertex_buffer_index are < stream_count. If not, the binding + * uses more streams than is supported, and u_vbuf should have done some reorganization + * for compatibility. + */ + bool incompatible = false; + for(unsigned idx=0; idx<num_elements; ++idx) + { + if(elements[idx].vertex_buffer_index >= priv->specs.stream_count || + elements[idx].instance_divisor > 0) + incompatible = true; + } + cs->num_elements = num_elements; + if(incompatible || num_elements == 0) + { + DBG("Error: zero vertex elements, or more vertex buffers used than supported"); + FREE(cs); + return NULL; + } + unsigned start_offset = 0; /* start of current consecutive stretch */ + bool nonconsecutive = true; /* previous value of nonconsecutive */ + for(unsigned idx=0; idx<num_elements; ++idx) + { + unsigned element_size = util_format_get_blocksize(elements[idx].src_format); + unsigned end_offset = elements[idx].src_offset + element_size; + if(nonconsecutive) + start_offset = elements[idx].src_offset; + assert(element_size != 0 && end_offset <= 256); /* maximum vertex size is 256 bytes */ + /* check whether next element is consecutive to this one */ + nonconsecutive = (idx == (num_elements-1)) || + elements[idx+1].vertex_buffer_index != elements[idx].vertex_buffer_index || + end_offset != elements[idx+1].src_offset; + cs->FE_VERTEX_ELEMENT_CONFIG[idx] = + (nonconsecutive ? VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE : 0) | + translate_vertex_format_type(elements[idx].src_format, false) | + VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(util_format_get_nr_components(elements[idx].src_format)) | + translate_vertex_format_normalize(elements[idx].src_format) | + VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN(ENDIAN_MODE_NO_SWAP) | + VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(elements[idx].vertex_buffer_index) | + VIVS_FE_VERTEX_ELEMENT_CONFIG_START(elements[idx].src_offset) | + VIVS_FE_VERTEX_ELEMENT_CONFIG_END(end_offset - start_offset); + } + return cs; +} + +static void etna_pipe_bind_vertex_elements_state(struct pipe_context *pipe, void *ve) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + priv->dirty_bits |= ETNA_STATE_VERTEX_ELEMENTS; + priv->vertex_elements_p = ve; + if(ve) + priv->vertex_elements = *(struct compiled_vertex_elements_state*)ve; +} + +static void etna_pipe_delete_vertex_elements_state(struct pipe_context *pipe, void *ve) +{ + struct compiled_vertex_elements_state *cs = (struct compiled_vertex_elements_state*)ve; + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + FREE(cs); +} + +static void etna_pipe_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bc) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_blend_color *cs = &priv->blend_color; + cs->PE_ALPHA_BLEND_COLOR = + VIVS_PE_ALPHA_BLEND_COLOR_R(etna_cfloat_to_uint8(bc->color[0])) | + VIVS_PE_ALPHA_BLEND_COLOR_G(etna_cfloat_to_uint8(bc->color[1])) | + VIVS_PE_ALPHA_BLEND_COLOR_B(etna_cfloat_to_uint8(bc->color[2])) | + VIVS_PE_ALPHA_BLEND_COLOR_A(etna_cfloat_to_uint8(bc->color[3])); + priv->dirty_bits |= ETNA_STATE_BLEND_COLOR; +} + +static void etna_pipe_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *sr) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_stencil_ref *cs = &priv->stencil_ref; + + priv->stencil_ref_s = *sr; + + cs->PE_STENCIL_CONFIG = + VIVS_PE_STENCIL_CONFIG_REF_FRONT(sr->ref_value[0]); + /* rest of bits weaved in from depth_stencil_alpha */ + cs->PE_STENCIL_CONFIG_EXT = + VIVS_PE_STENCIL_CONFIG_EXT_REF_BACK(sr->ref_value[0]); + priv->dirty_bits |= ETNA_STATE_STENCIL_REF; +} + +static void etna_pipe_set_sample_mask(struct pipe_context *pipe, + unsigned sample_mask) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_sample_mask *cs = &priv->sample_mask; + + priv->sample_mask_s = sample_mask; + + cs->GL_MULTI_SAMPLE_CONFIG = + /* to be merged with render target state */ + VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(sample_mask); + priv->dirty_bits |= ETNA_STATE_SAMPLE_MASK; +} + +static void etna_pipe_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *sv) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_framebuffer_state *cs = &priv->framebuffer; + int nr_samples_color = -1; + int nr_samples_depth = -1; + + + /* Set up TS as well. Warning: this state is used by both the RS and PE */ + uint32_t ts_mem_config = 0; + if(sv->nr_cbufs > 0) /* at least one color buffer? */ + { + struct etna_surface *cbuf = etna_surface(sv->cbufs[0]); + bool color_supertiled = (cbuf->layout & 2)!=0; + assert(cbuf->layout & 1); /* Cannot render to linear surfaces */ + pipe_surface_reference(&cs->cbuf, &cbuf->base); + cs->PE_COLOR_FORMAT = + VIVS_PE_COLOR_FORMAT_FORMAT(translate_rt_format(cbuf->base.format, false)) | + (color_supertiled ? VIVS_PE_COLOR_FORMAT_SUPER_TILED : 0); + /* XXX VIVS_PE_COLOR_FORMAT_OVERWRITE and the rest comes from blend_state / depth_stencil_alpha */ + /* merged with depth_stencil_alpha */ + if (priv->ctx->conn->chip.pixel_pipes == 1) + { + cs->PE_COLOR_ADDR = cbuf->surf.address; + } + else if (priv->ctx->conn->chip.pixel_pipes == 2) + { + cs->PE_PIPE_COLOR_ADDR[0] = cbuf->surf.address; + cs->PE_PIPE_COLOR_ADDR[1] = cbuf->surf.address; /* TODO */ + } + cs->PE_COLOR_STRIDE = cbuf->surf.stride; + if(cbuf->surf.ts_address) + { + ts_mem_config |= VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR; + cs->TS_COLOR_CLEAR_VALUE = cbuf->level->clear_value; + cs->TS_COLOR_STATUS_BASE = cbuf->surf.ts_address; + cs->TS_COLOR_SURFACE_BASE = cbuf->surf.address; + } + /* MSAA */ + if(cbuf->base.texture->nr_samples > 1) + ts_mem_config |= VIVS_TS_MEM_CONFIG_MSAA | translate_msaa_format(cbuf->base.format, false); + nr_samples_color = cbuf->base.texture->nr_samples; + } else { + pipe_surface_reference(&cs->cbuf, NULL); + cs->PE_COLOR_FORMAT = 0; /* Is this enough to render without color? */ + } + + if(sv->zsbuf != NULL) + { + struct etna_surface *zsbuf = etna_surface(sv->zsbuf); + pipe_surface_reference(&cs->zsbuf, &zsbuf->base); + assert(zsbuf->layout & 1); /* Cannot render to linear surfaces */ + uint32_t depth_format = translate_depth_format(zsbuf->base.format, false); + unsigned depth_bits = depth_format == VIVS_PE_DEPTH_CONFIG_DEPTH_FORMAT_D16 ? 16 : 24; + bool depth_supertiled = (zsbuf->layout & 2)!=0; + cs->PE_DEPTH_CONFIG = + depth_format | + (depth_supertiled ? VIVS_PE_DEPTH_CONFIG_SUPER_TILED : 0) | + VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_Z; + /* VIVS_PE_DEPTH_CONFIG_ONLY_DEPTH */ + /* merged with depth_stencil_alpha */ + if (priv->ctx->conn->chip.pixel_pipes == 1) + { + cs->PE_DEPTH_ADDR = zsbuf->surf.address; + } + else if (priv->ctx->conn->chip.pixel_pipes == 2) + { + cs->PE_PIPE_DEPTH_ADDR[0] = zsbuf->surf.address; + cs->PE_PIPE_DEPTH_ADDR[1] = zsbuf->surf.address; /* TODO */ + } + cs->PE_DEPTH_STRIDE = zsbuf->surf.stride; + cs->PE_HDEPTH_CONTROL = VIVS_PE_HDEPTH_CONTROL_FORMAT_DISABLED; + cs->PE_DEPTH_NORMALIZE = etna_f32_to_u32(exp2f(depth_bits) - 1.0f); + if(zsbuf->surf.ts_address) + { + ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR; + cs->TS_DEPTH_CLEAR_VALUE = zsbuf->level->clear_value; + cs->TS_DEPTH_STATUS_BASE = zsbuf->surf.ts_address; + cs->TS_DEPTH_SURFACE_BASE = zsbuf->surf.address; + } + ts_mem_config |= (depth_bits == 16 ? VIVS_TS_MEM_CONFIG_DEPTH_16BPP : 0); + /* MSAA */ + if(zsbuf->base.texture->nr_samples > 1) + /* XXX VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION; + * Disable without MSAA for now, as it causes corruption in glquake. */ + ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION; + nr_samples_depth = zsbuf->base.texture->nr_samples; + } else { + pipe_surface_reference(&cs->zsbuf, NULL); + cs->PE_DEPTH_CONFIG = VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_NONE; + } + + /* MSAA setup */ + if(nr_samples_depth != -1 && nr_samples_color != -1 && + nr_samples_depth != nr_samples_color) + { + printf("%s: Number of samples in color and depth texture must match (%i and %i respectively)\n", __func__, + nr_samples_color, nr_samples_depth); + } + switch(MAX2(nr_samples_depth, nr_samples_color)) + { + case 0: + case 1: /* Are 0 and 1 samples allowed? */ + cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_NONE; + cs->msaa_mode = false; + break; + case 2: + cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_2X; + cs->msaa_mode = true; /* Add input to PS */ + cs->RA_MULTISAMPLE_UNK00E04 = 0x0; + cs->RA_MULTISAMPLE_UNK00E10[0] = 0x0000aa22; + cs->RA_CENTROID_TABLE[0] = 0x66aa2288; + cs->RA_CENTROID_TABLE[1] = 0x88558800; + cs->RA_CENTROID_TABLE[2] = 0x88881100; + cs->RA_CENTROID_TABLE[3] = 0x33888800; + break; + case 4: + cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_4X; + cs->msaa_mode = true; /* Add input to PS */ + cs->RA_MULTISAMPLE_UNK00E04 = 0x0; + cs->RA_MULTISAMPLE_UNK00E10[0] = 0xeaa26e26; + cs->RA_MULTISAMPLE_UNK00E10[1] = 0xe6ae622a; + cs->RA_MULTISAMPLE_UNK00E10[2] = 0xaaa22a22; + cs->RA_CENTROID_TABLE[0] = 0x4a6e2688; + cs->RA_CENTROID_TABLE[1] = 0x888888a2; + cs->RA_CENTROID_TABLE[2] = 0x888888ea; + cs->RA_CENTROID_TABLE[3] = 0x888888c6; + cs->RA_CENTROID_TABLE[4] = 0x46622a88; + cs->RA_CENTROID_TABLE[5] = 0x888888ae; + cs->RA_CENTROID_TABLE[6] = 0x888888e6; + cs->RA_CENTROID_TABLE[7] = 0x888888ca; + cs->RA_CENTROID_TABLE[8] = 0x262a2288; + cs->RA_CENTROID_TABLE[9] = 0x886688a2; + cs->RA_CENTROID_TABLE[10] = 0x888866aa; + cs->RA_CENTROID_TABLE[11] = 0x668888a6; + break; + } + + /* Scissor setup */ + cs->SE_SCISSOR_LEFT = 0; /* affected by rasterizer and scissor state as well */ + cs->SE_SCISSOR_TOP = 0; + cs->SE_SCISSOR_RIGHT = (sv->width << 16)-1; + cs->SE_SCISSOR_BOTTOM = (sv->height << 16)-1; + + cs->TS_MEM_CONFIG = ts_mem_config; + + priv->dirty_bits |= ETNA_STATE_FRAMEBUFFER; + priv->framebuffer_s = *sv; /* keep copy of original structure */ +} + +static void etna_pipe_set_scissor_states( struct pipe_context *pipe, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_scissor_state *ss) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_scissor_state *cs = &priv->scissor; + priv->scissor_s = *ss; + cs->SE_SCISSOR_LEFT = (ss->minx << 16); + cs->SE_SCISSOR_TOP = (ss->miny << 16); + cs->SE_SCISSOR_RIGHT = (ss->maxx << 16)-1; + cs->SE_SCISSOR_BOTTOM = (ss->maxy << 16)-1; + /* note that this state is only used when rasterizer_state->scissor is on */ + priv->dirty_bits |= ETNA_STATE_SCISSOR; +} + +static void etna_pipe_set_viewport_states( struct pipe_context *pipe, + unsigned start_slot, + unsigned num_scissors, + const struct pipe_viewport_state *vs) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_viewport_state *cs = &priv->viewport; + priv->viewport_s = *vs; + /** + * For Vivante GPU, viewport z transformation is 0..1 to 0..1 instead of -1..1 to 0..1. + * scaling and translation to 0..1 already happened, so remove that + * + * z' = (z * 2 - 1) * scale + translate + * = z * (2 * scale) + (translate - scale) + * + * scale' = 2 * scale + * translate' = translate - scale + */ + cs->PA_VIEWPORT_SCALE_X = etna_f32_to_fixp16(vs->scale[0]); /* must be fixp as v4 state deltas assume it is */ + cs->PA_VIEWPORT_SCALE_Y = etna_f32_to_fixp16(vs->scale[1]); + cs->PA_VIEWPORT_SCALE_Z = etna_f32_to_u32(vs->scale[2] * 2.0f); + cs->PA_VIEWPORT_OFFSET_X = etna_f32_to_fixp16(vs->translate[0]); + cs->PA_VIEWPORT_OFFSET_Y = etna_f32_to_fixp16(vs->translate[1]); + cs->PA_VIEWPORT_OFFSET_Z = etna_f32_to_u32(vs->translate[2] - vs->scale[2]); + + /* Compute scissor rectangle (fixp) from viewport. + * Make sure left is always < right and top always < bottom. + */ + cs->SE_SCISSOR_LEFT = etna_f32_to_fixp16(MAX2(vs->translate[0] - vs->scale[0], 0.0f)); + cs->SE_SCISSOR_TOP = etna_f32_to_fixp16(MAX2(vs->translate[1] - vs->scale[1], 0.0f)); + cs->SE_SCISSOR_RIGHT = etna_f32_to_fixp16(MAX2(vs->translate[0] + vs->scale[0], 0.0f)); + cs->SE_SCISSOR_BOTTOM = etna_f32_to_fixp16(MAX2(vs->translate[1] + vs->scale[1], 0.0f)); + if(cs->SE_SCISSOR_LEFT > cs->SE_SCISSOR_RIGHT) + { + uint32_t tmp = cs->SE_SCISSOR_RIGHT; + cs->SE_SCISSOR_RIGHT = cs->SE_SCISSOR_LEFT; + cs->SE_SCISSOR_LEFT = tmp; + } + if(cs->SE_SCISSOR_TOP > cs->SE_SCISSOR_BOTTOM) + { + uint32_t tmp = cs->SE_SCISSOR_BOTTOM; + cs->SE_SCISSOR_BOTTOM = cs->SE_SCISSOR_TOP; + cs->SE_SCISSOR_TOP = tmp; + } + + cs->PE_DEPTH_NEAR = etna_f32_to_u32(0.0); /* not affected if depth mode is Z (as in GL) */ + cs->PE_DEPTH_FAR = etna_f32_to_u32(1.0); + priv->dirty_bits |= ETNA_STATE_VIEWPORT; +} + +static void etna_pipe_set_vertex_buffers( struct pipe_context *pipe, + unsigned start_slot, + unsigned num_buffers, + const struct pipe_vertex_buffer *vb) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + assert((start_slot + num_buffers) <= PIPE_MAX_ATTRIBS); + struct pipe_vertex_buffer zero_vb = {}; + for(unsigned idx=0; idx<num_buffers; ++idx) + { + unsigned slot = start_slot + idx; /* copy from vb[idx] to priv->...[slot] */ + const struct pipe_vertex_buffer *vbi = vb ? &vb[idx] : &zero_vb; + struct compiled_set_vertex_buffer *cs = &priv->vertex_buffer[slot]; + assert(!vbi->user_buffer); /* XXX support user_buffer using etna_usermem_map */ + /* copy pipe_vertex_buffer structure and take reference */ + priv->vertex_buffer_s[slot].stride = vbi->stride; + priv->vertex_buffer_s[slot].buffer_offset = vbi->buffer_offset; + pipe_resource_reference(&priv->vertex_buffer_s[slot].buffer, vbi->buffer); + priv->vertex_buffer_s[slot].user_buffer = vbi->user_buffer; + /* determine addresses */ + viv_addr_t gpu_addr = 0; + cs->logical = 0; + if(vbi->buffer) /* GPU buffer */ + { + gpu_addr = etna_resource(vbi->buffer)->levels[0].address + vbi->buffer_offset; + cs->logical = etna_resource(vbi->buffer)->levels[0].logical + vbi->buffer_offset; + } + /* compiled state */ + cs->FE_VERTEX_STREAM_CONTROL = FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(vbi->stride); + cs->FE_VERTEX_STREAM_BASE_ADDR = gpu_addr; + + etna_resource_touch(pipe, vbi->buffer); + } + + priv->dirty_bits |= ETNA_STATE_VERTEX_BUFFERS; +} + +static void etna_pipe_set_index_buffer( struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_set_index_buffer *cs = &priv->index_buffer; + if(ib == NULL) + { + pipe_resource_reference(&priv->index_buffer_s.buffer, NULL); /* update reference to buffer */ + cs->logical = NULL; + cs->FE_INDEX_STREAM_CONTROL = 0; + cs->FE_INDEX_STREAM_BASE_ADDR = 0; + } else + { + assert(ib->buffer); /* XXX user_buffer using etna_usermem_map */ + pipe_resource_reference(&priv->index_buffer_s.buffer, ib->buffer); /* update reference to buffer */ + priv->index_buffer_s.index_size = ib->index_size; + priv->index_buffer_s.offset = ib->offset; + priv->index_buffer_s.user_buffer = ib->user_buffer; + + cs->FE_INDEX_STREAM_CONTROL = + translate_index_size(ib->index_size); + cs->FE_INDEX_STREAM_BASE_ADDR = etna_resource(ib->buffer)->levels[0].address + ib->offset; + cs->logical = etna_resource(ib->buffer)->levels[0].logical + ib->offset; + + etna_resource_touch(pipe, ib->buffer); + } + priv->dirty_bits |= ETNA_STATE_INDEX_BUFFER; +} + +static void etna_pipe_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + enum pipe_flush_flags flags) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + if(fence) + { + if(etna_fence_new(pipe->screen, priv->ctx, fence) != ETNA_OK) + { + printf("Error: %s: could not create fence\n", __func__); + } + } + if(etna_flush(priv->ctx) != ETNA_OK) + { + printf("Error: %s: etna_flush failed, GPU may be in unpredictable state\n", __func__); + } + if(DBG_ENABLED(ETNA_DBG_FINISH_ALL)) + { + if(etna_finish(priv->ctx) != ETNA_OK) + { + printf("Error: %s: etna_finish failed, GPU may be in unpredictable state\n", __func__); + abort(); + } + } +} + +static void etna_pipe_set_clip_state(struct pipe_context *pipe, const struct pipe_clip_state *pcs) +{ + /* NOOP */ +} + +static void etna_pipe_set_polygon_stipple(struct pipe_context *pctx, + const struct pipe_poly_stipple *stipple) +{ + /* NOP */ +} + +struct pipe_context *etna_new_pipe_context(struct viv_conn *dev, const struct etna_pipe_specs *specs, struct pipe_screen *screen, void *priv) +{ + struct etna_pipe_context *ectx = CALLOC_STRUCT(etna_pipe_context); + if(ectx == NULL) + return NULL; + struct pipe_context *pc = &ectx->base; + + pc->priv = priv; + pc->screen = screen; + + if(etna_create(dev, &ectx->ctx) < 0) + { + FREE(pc); + return NULL; + } + etna_set_context_cb(ectx->ctx, update_context, ectx); + + /* context ctxate setup */ + ectx->dirty_bits = 0xffffffff; + ectx->conn = dev; + ectx->specs = *specs; + + /* Set sensible defaults for state */ + ectx->gpu3d.PA_W_CLIP_LIMIT = 0x34000001; + ectx->gpu3d.GL_VERTEX_ELEMENT_CONFIG = 0x1; + ectx->gpu3d.GL_API_MODE = VIVS_GL_API_MODE_OPENGL; + ectx->gpu3d.RA_EARLY_DEPTH = 0x00000031; /* enable */ + + /* fill in vtable entries one by one */ + pc->destroy = etna_pipe_destroy; + pc->draw_vbo = etna_pipe_draw_vbo; + /* XXX render_condition */ + /* XXX create_query */ + /* XXX destroy_query */ + /* XXX begin_query */ + /* XXX end_query */ + /* XXX get_query_result */ + pc->create_vertex_elements_state = etna_pipe_create_vertex_elements_state; + pc->bind_vertex_elements_state = etna_pipe_bind_vertex_elements_state; + pc->delete_vertex_elements_state = etna_pipe_delete_vertex_elements_state; + pc->set_blend_color = etna_pipe_set_blend_color; + pc->set_stencil_ref = etna_pipe_set_stencil_ref; + pc->set_sample_mask = etna_pipe_set_sample_mask; + pc->set_clip_state = etna_pipe_set_clip_state; + pc->set_framebuffer_state = etna_pipe_set_framebuffer_state; + pc->set_polygon_stipple = etna_pipe_set_polygon_stipple; + pc->set_scissor_states = etna_pipe_set_scissor_states; + pc->set_viewport_states = etna_pipe_set_viewport_states; + pc->set_vertex_buffers = etna_pipe_set_vertex_buffers; + pc->set_index_buffer = etna_pipe_set_index_buffer; + /* XXX create_stream_output_target */ + /* XXX stream_output_target_destroy */ + /* XXX set_stream_output_targets */ + pc->flush = etna_pipe_flush; + /* XXX create_video_decoder */ + /* XXX create_video_buffer */ + /* XXX create_compute_state */ + /* XXX bind_compute_state */ + /* XXX delete_compute_state */ + /* XXX set_compute_resources */ + /* XXX set_global_binding */ + /* XXX launch_grid */ + + etna_pipe_blend_init(pc); + etna_pipe_rasterizer_init(pc); + etna_pipe_shader_init(pc); + etna_pipe_surface_init(pc); + etna_pipe_texture_init(pc); + etna_pipe_transfer_init(pc); + etna_pipe_zsa_init(pc); + etna_pipe_clear_blit_init(pc); + + /* Reset GPU to initial state */ + reset_context(pc); + return pc; +} diff --git a/src/gallium/drivers/etna/etna_pipe.h b/src/gallium/drivers/etna/etna_pipe.h new file mode 100644 index 0000000000..3e9074a1e6 --- /dev/null +++ b/src/gallium/drivers/etna/etna_pipe.h @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Gallium driver main header file + */ +#ifndef H_ETNA_PIPE +#define H_ETNA_PIPE + +#include <stdint.h> +#include <etnaviv/etna.h> +#include <etnaviv/etna_mem.h> +#include <etnaviv/etna_rs.h> +#include <etnaviv/etna_tex.h> + +#include "etna_internal.h" +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "util/u_slab.h" + +struct pipe_screen; + +struct etna_shader_input +{ + int vs_reg; /* VS input register */ +}; + +enum etna_varying_special { + ETNA_VARYING_VSOUT = 0, /* from VS */ + ETNA_VARYING_POINTCOORD, /* point texture coord */ +}; + +struct etna_shader_varying +{ + int num_components; + enum etna_varying_special special; + int pa_attributes; + int vs_reg; /* VS output register */ +}; + +struct etna_resource_level +{ + unsigned width, padded_width; + unsigned height, padded_height; + unsigned offset; /* offset into memory area */ + unsigned size; /* size of memory area */ + + uint32_t address; /* cached GPU pointers to LODs */ + void *logical; /* cached CPU pointer */ + uint32_t ts_address; + uint32_t ts_size; + uint32_t clear_value; /* clear value of resource level (mainly for TS) */ + uint32_t stride; /* VIVS_PE_(COLOR|DEPTH)_STRIDE */ + uint32_t layer_stride; +}; + +struct etna_resource +{ + struct pipe_resource base; + + /* only lod 0 used for non-texture buffers */ + /* Layout for surface (tiled, multitiled, split tiled, ...) */ + enum etna_surface_layout layout; + /* Horizontal alignment for texture unit (TEXTURE_HALIGN_*) */ + unsigned halign; + struct etna_vidmem *surface; /* Surface video memory */ + struct etna_vidmem *ts; /* Tile status video memory */ + + struct etna_resource_level levels[ETNA_NUM_LOD]; + struct etna_pipe_context *last_ctx; /* Last bound context */ +}; + +struct etna_surface +{ + struct pipe_surface base; + + enum etna_surface_layout layout; + struct etna_resource_level surf; + struct compiled_rs_state clear_command; + /* Keep pointer to resource level, for fast clear */ + struct etna_resource_level *level; +}; + +struct etna_sampler_view +{ + struct pipe_sampler_view base; + + struct compiled_sampler_view *internal; +}; + +struct etna_transfer +{ + struct pipe_transfer base; + + /* Pointer to buffer (same pointer as returned by transfer_map) */ + void *buffer; + /* If true, transfer happens in-place. buffer is not allocated separately but + * points into the actual resource, and thus does not need to be copied or freed. + */ + bool in_place; +}; + +/* group all current CSOs, for dirty bits */ +enum +{ + ETNA_STATE_BLEND = (1<<1), + ETNA_STATE_SAMPLERS = (1<<2), + ETNA_STATE_RASTERIZER = (1<<3), + ETNA_STATE_DSA = (1<<4), + ETNA_STATE_VERTEX_ELEMENTS = (1<<5), + ETNA_STATE_BLEND_COLOR = (1<<6), + ETNA_STATE_STENCIL_REF = (1<<7), + ETNA_STATE_SAMPLE_MASK = (1<<8), + ETNA_STATE_VIEWPORT = (1<<9), + ETNA_STATE_FRAMEBUFFER = (1<<10), + ETNA_STATE_SCISSOR = (1<<11), + ETNA_STATE_SAMPLER_VIEWS = (1<<12), + ETNA_STATE_VERTEX_BUFFERS = (1<<13), + ETNA_STATE_INDEX_BUFFER = (1<<14), + ETNA_STATE_SHADER = (1<<15), + ETNA_STATE_VS_UNIFORMS = (1<<16), + ETNA_STATE_PS_UNIFORMS = (1<<17), + ETNA_STATE_TS = (1<<18), /* set after clear and when RS blit operations from other surface affect TS */ + ETNA_STATE_TEXTURE_CACHES = (1<<19) /* set when texture has been modified/uploaded */ +}; + +/* private opaque context structure */ +struct etna_pipe_context +{ + struct pipe_context base; + struct viv_conn *conn; + struct etna_ctx *ctx; + unsigned dirty_bits; + struct etna_pipe_specs specs; + struct util_slab_mempool transfer_pool; + struct blitter_context *blitter; + + /* compiled bindable state */ + struct compiled_blend_state blend; + unsigned num_vertex_samplers; + unsigned num_fragment_samplers; + struct compiled_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct compiled_rasterizer_state rasterizer; + struct compiled_depth_stencil_alpha_state depth_stencil_alpha; + struct compiled_vertex_elements_state vertex_elements; + struct compiled_shader_state shader_state; + + /* compiled parameter-like state */ + struct compiled_blend_color blend_color; + struct compiled_stencil_ref stencil_ref; + struct compiled_sample_mask sample_mask; + struct compiled_framebuffer_state framebuffer; + struct compiled_scissor_state scissor; + struct compiled_viewport_state viewport; + unsigned num_fragment_sampler_views; + unsigned num_vertex_sampler_views; + struct compiled_sampler_view sampler_view[PIPE_MAX_SAMPLERS]; + struct compiled_set_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct compiled_set_index_buffer index_buffer; + + /* pointers to the bound state. these are mainly kept around for the blitter. */ + struct compiled_blend_state *blend_p; + struct compiled_sampler_state *sampler_p[PIPE_MAX_SAMPLERS]; + struct compiled_rasterizer_state *rasterizer_p; + struct compiled_depth_stencil_alpha_state *depth_stencil_alpha_p; + struct compiled_vertex_elements_state *vertex_elements_p; + struct etna_shader_object *vs; + struct etna_shader_object *fs; + + /* saved parameter-like state. these are mainly kept around for the blitter. */ + struct pipe_framebuffer_state framebuffer_s; + unsigned sample_mask_s; + struct pipe_stencil_ref stencil_ref_s; + struct pipe_viewport_state viewport_s; + struct pipe_scissor_state scissor_s; + struct pipe_sampler_view *sampler_view_s[PIPE_MAX_SAMPLERS]; + struct pipe_vertex_buffer vertex_buffer_s[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer_s; + struct pipe_constant_buffer vs_cbuf_s; + struct pipe_constant_buffer fs_cbuf_s; + + /* cached state of entire GPU */ + struct etna_3d_state gpu3d; +}; + +static INLINE struct etna_pipe_context * +etna_pipe_context(struct pipe_context *p) +{ + return (struct etna_pipe_context *)p; +} + +static INLINE struct etna_resource * +etna_resource(struct pipe_resource *p) +{ + return (struct etna_resource *)p; +} + +static INLINE struct etna_surface * +etna_surface(struct pipe_surface *p) +{ + return (struct etna_surface *)p; +} + +static INLINE struct etna_sampler_view * +etna_sampler_view(struct pipe_sampler_view *p) +{ + return (struct etna_sampler_view *)p; +} + +static INLINE struct etna_transfer * +etna_transfer(struct pipe_transfer *p) +{ + return (struct etna_transfer *)p; +} + +struct pipe_context *etna_new_pipe_context(struct viv_conn *dev, const struct etna_pipe_specs *specs, struct pipe_screen *scr, void *priv); + +#endif diff --git a/src/gallium/drivers/etna/etna_rasterizer.c b/src/gallium/drivers/etna/etna_rasterizer.c new file mode 100644 index 0000000000..e7f757a4ae --- /dev/null +++ b/src/gallium/drivers/etna/etna_rasterizer.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Rasterizer CSOs */ +#include "etna_rasterizer.h" + +#include "etna_internal.h" +#include "etna_pipe.h" +#include "etna_translate.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include <etnaviv/common.xml.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> + +static void *etna_pipe_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rs) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_rasterizer_state *cs = CALLOC_STRUCT(compiled_rasterizer_state); + if(rs->fill_front != rs->fill_back) + { + printf("Different front and back fill mode not supported\n"); + } + cs->PA_CONFIG = + (rs->flatshade ? VIVS_PA_CONFIG_SHADE_MODEL_FLAT : VIVS_PA_CONFIG_SHADE_MODEL_SMOOTH) | + translate_cull_face(rs->cull_face, rs->front_ccw) | + translate_polygon_mode(rs->fill_front) | + (rs->point_quad_rasterization ? VIVS_PA_CONFIG_POINT_SPRITE_ENABLE : 0) | + (rs->point_size_per_vertex ? VIVS_PA_CONFIG_POINT_SIZE_ENABLE : 0); + cs->PA_LINE_WIDTH = etna_f32_to_u32(rs->line_width / 2.0f); + cs->PA_POINT_SIZE = etna_f32_to_u32(rs->point_size); + cs->SE_DEPTH_SCALE = etna_f32_to_u32(rs->offset_scale); + cs->SE_DEPTH_BIAS = etna_f32_to_u32(rs->offset_units) / 65535.0f; + cs->SE_CONFIG = + (rs->line_last_pixel ? VIVS_SE_CONFIG_LAST_PIXEL_ENABLE : 0); + /* XXX anything else? */ + /* XXX bottom_edge_rule */ + cs->PA_SYSTEM_MODE = + (rs->half_pixel_center ? (VIVS_PA_SYSTEM_MODE_UNK0 | VIVS_PA_SYSTEM_MODE_UNK4) : 0); + /* rs->scissor overrides the scissor, defaulting to the whole framebuffer, with the scissor state */ + cs->scissor = rs->scissor; + /* point size per vertex adds a vertex shader output */ + cs->point_size_per_vertex = rs->point_size_per_vertex; + + assert(!rs->clip_halfz); /* could be supported with shader magic, actually D3D z is default on older gc */ + return cs; +} + +static void etna_pipe_bind_rasterizer_state(struct pipe_context *pipe, void *rs) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + priv->dirty_bits |= ETNA_STATE_RASTERIZER; + priv->rasterizer_p = rs; + if(rs) + priv->rasterizer = *(struct compiled_rasterizer_state*)rs; +} + +static void etna_pipe_delete_rasterizer_state(struct pipe_context *pipe, void *rs) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + FREE(rs); +} + +void etna_pipe_rasterizer_init(struct pipe_context *pc) +{ + pc->create_rasterizer_state = etna_pipe_create_rasterizer_state; + pc->bind_rasterizer_state = etna_pipe_bind_rasterizer_state; + pc->delete_rasterizer_state = etna_pipe_delete_rasterizer_state; +} diff --git a/src/gallium/drivers/etna/etna_rasterizer.h b/src/gallium/drivers/etna/etna_rasterizer.h new file mode 100644 index 0000000000..c6452c524a --- /dev/null +++ b/src/gallium/drivers/etna/etna_rasterizer.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Rasterizer CSOs */ +#ifndef H_ETNA_RASTERIZER +#define H_ETNA_RASTERIZER + +struct pipe_context; + +void etna_pipe_rasterizer_init(struct pipe_context *pipe); + +#endif diff --git a/src/gallium/drivers/etna/etna_resource.c b/src/gallium/drivers/etna/etna_resource.c new file mode 100644 index 0000000000..5a5e736a57 --- /dev/null +++ b/src/gallium/drivers/etna/etna_resource.c @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Resource handling. + */ +#include "etna_resource.h" + +#include "etna_pipe.h" +#include "etna_screen.h" +#include "etna_debug.h" +#include "etna_translate.h" + +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_inlines.h" +#include "util/u_transfer.h" /* u_default_resource_get_handle */ + +/* Associate an resource with this context when it is bound in any way + * (vertex buffer, index buffer, texture, surface, blit). + */ +void etna_resource_touch(struct pipe_context *pipe, struct pipe_resource *resource_) +{ + struct etna_pipe_context *ectx = etna_pipe_context(pipe); + struct etna_resource *resource = etna_resource(resource_); + if(resource == NULL) + return; + resource->last_ctx = ectx; +} + +bool etna_screen_resource_alloc_ts(struct pipe_screen *screen, struct etna_resource *resource) +{ + struct etna_screen *priv = etna_screen(screen); + size_t rt_ts_size; + assert(!resource->ts); + /* TS only for level 0 -- XXX is this formula correct? */ + rt_ts_size = align(resource->levels[0].size*priv->specs.bits_per_tile/0x80, 0x100); + if(rt_ts_size == 0) + return true; + + DBG_F(ETNA_DBG_RESOURCE_MSGS, "%p: Allocating tile status of size %i", resource, rt_ts_size); + struct etna_vidmem *rt_ts = 0; + if(unlikely(etna_vidmem_alloc_linear(priv->dev, &rt_ts, rt_ts_size, VIV_SURF_TILE_STATUS, VIV_POOL_DEFAULT, true)!=ETNA_OK)) + { + printf("Problem allocating tile status for resource\n"); + return false; + } + resource->ts = rt_ts; + resource->levels[0].ts_address = resource->ts->address; + resource->levels[0].ts_size = resource->ts->size; + /* It is important to initialize the TS to zero, as random pattern + * can result in crashes. Do this on the CPU as this only happens once + * per surface anyway and it's a small area, so it may not be worth + * queuing this to the GPU. + */ + memset(rt_ts->logical, 0, rt_ts_size); + return true; +} + + +static boolean etna_screen_can_create_resource(struct pipe_screen *pscreen, + const struct pipe_resource *templat) +{ + struct etna_screen *screen = etna_screen(pscreen); + if(!translate_samples_to_xyscale(templat->nr_samples, NULL, NULL, NULL)) + return false; + if(templat->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_SAMPLER_VIEW)) + { + uint max_size = (templat->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) ? + screen->specs.max_rendertarget_size : + screen->specs.max_texture_size; + if(templat->width0 > max_size || templat->height0 > max_size) + return false; + } + return true; +} + +static struct pipe_resource * etna_screen_resource_from_handle(struct pipe_screen *screen, + const struct pipe_resource *templat, + struct winsys_handle *handle) +{ + DBG("unimplemented etna_screen_resource_from_handle"); + return NULL; +} + +/* Allocate 2D texture or render target resource + */ +static struct pipe_resource * etna_screen_resource_create(struct pipe_screen *screen, + const struct pipe_resource *templat) +{ + struct etna_screen *priv = etna_screen(screen); + assert(templat); + unsigned element_size = util_format_get_blocksize(templat->format); + if(!element_size) + return NULL; + + /* Check input */ + if(templat->target == PIPE_TEXTURE_CUBE) + { + assert(templat->array_size == 6); + } else if (templat->target == PIPE_BUFFER) + { + assert(templat->format == PIPE_FORMAT_R8_UNORM); /* bytes; want TYPELESS or similar */ + assert(templat->array_size == 1); + assert(templat->height0 == 1); + assert(templat->depth0 == 1); + assert(templat->array_size == 1); + assert(templat->last_level == 0); + } else + { + assert(templat->array_size == 1); + } + assert(templat->width0 != 0); + assert(templat->height0 != 0); + assert(templat->depth0 != 0); + assert(templat->array_size != 0); + + /* Figure out what tiling to use -- for now, assume that textures cannot be supertiled, and cannot be linear. + * There is a feature flag SUPERTILED_TEXTURE (not supported on any known hw) that may allow this, as well + * as LINEAR_TEXTURE_SUPPORT (supported on gc880 and gc2000 at least), but not sure how it works. + * Buffers always have LINEAR layout. + */ + unsigned layout = ETNA_LAYOUT_LINEAR; + if(templat->target != PIPE_BUFFER) + { + if(!(templat->bind & PIPE_BIND_SAMPLER_VIEW) && priv->specs.can_supertile) + layout = ETNA_LAYOUT_SUPER_TILED; + else + layout = ETNA_LAYOUT_TILED; + } + /* XXX multi tiled formats */ + + /* Determine scaling for antialiasing, allow override using debug flag */ + int nr_samples = templat->nr_samples; + if((templat->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) && + !(templat->bind & PIPE_BIND_SAMPLER_VIEW)) + { + if(DBG_ENABLED(ETNA_DBG_MSAA_2X)) + nr_samples = 2; + if(DBG_ENABLED(ETNA_DBG_MSAA_4X)) + nr_samples = 4; + } + int msaa_xscale = 1, msaa_yscale = 1; + if(!translate_samples_to_xyscale(nr_samples, &msaa_xscale, &msaa_yscale, NULL)) + { + /* Number of samples not supported */ + assert(0); + } + + /* Determine needed padding (alignment of height/width) */ + unsigned paddingX = 0, paddingY = 0; + unsigned halign = TEXTURE_HALIGN_FOUR; + etna_layout_multiple(layout, + priv->dev->chip.pixel_pipes, + (templat->bind & PIPE_BIND_SAMPLER_VIEW) && !VIV_FEATURE(priv->dev, chipMinorFeatures1, TEXTURE_HALIGN), + &paddingX, &paddingY, &halign); + assert(paddingX && paddingY); + + /* determine mipmap levels */ + struct etna_resource *resource = CALLOC_STRUCT(etna_resource); + int max_mip_level = templat->last_level; + if(unlikely(max_mip_level >= ETNA_NUM_LOD)) /* max LOD supported by hw */ + max_mip_level = ETNA_NUM_LOD - 1; + + /* take care about DXTx formats, which have a divSize of non-1x1 + * also: lower mipmaps are still 4x4 due to tiling. In as sense, compressed formats are already tiled. + * XXX UYVY formats? + */ + unsigned divSizeX = util_format_get_blockwidth(templat->format); + unsigned divSizeY = util_format_get_blockheight(templat->format); + unsigned ix = 0; + unsigned x = templat->width0, y = templat->height0; + unsigned offset = 0; + while(true) + { + struct etna_resource_level *mip = &resource->levels[ix]; + mip->width = x; + mip->height = y; + mip->padded_width = align(x * msaa_xscale, paddingX); + mip->padded_height = align(y * msaa_yscale, paddingY); + mip->stride = align(mip->padded_width, divSizeX)/divSizeX * element_size; + mip->offset = offset; + mip->layer_stride = align(mip->padded_width, divSizeX)/divSizeX * + align(mip->padded_height, divSizeY)/divSizeY * element_size; + mip->size = templat->array_size * mip->layer_stride; + offset += mip->size; + if(ix == max_mip_level || (x == 1 && y == 1)) + break; // stop at last level + x = (x+1)>>1; + y = (y+1)>>1; + ix += 1; + } + + /* Determine memory size, and whether to create a tile status */ + size_t rt_size = offset; + + /* determine memory type */ + enum viv_surf_type memtype = VIV_SURF_UNKNOWN; + if(templat->bind & PIPE_BIND_SAMPLER_VIEW) + memtype = VIV_SURF_TEXTURE; + else if(templat->bind & PIPE_BIND_RENDER_TARGET) + memtype = VIV_SURF_RENDER_TARGET; + else if(templat->bind & PIPE_BIND_DEPTH_STENCIL) + memtype = VIV_SURF_DEPTH; + else if(templat->bind & PIPE_BIND_INDEX_BUFFER) + memtype = VIV_SURF_INDEX; + else if(templat->bind & PIPE_BIND_VERTEX_BUFFER) + memtype = VIV_SURF_VERTEX; + + DBG_F(ETNA_DBG_RESOURCE_MSGS, "%p: Allocate surface of %ix%i (padded to %ix%i) of format %s (%i bpe %ix%i), size %08x flags %08x, memtype %i", + resource, + templat->width0, templat->height0, resource->levels[0].padded_width, resource->levels[0].padded_height, util_format_name(templat->format), + element_size, divSizeX, divSizeY, rt_size, templat->bind, memtype); + + struct etna_vidmem *rt = 0; + if(unlikely(etna_vidmem_alloc_linear(priv->dev, &rt, rt_size, memtype, VIV_POOL_DEFAULT, true) != ETNA_OK)) + { + printf("Problem allocating video memory for resource\n"); + return NULL; + } + + resource->base = *templat; + resource->base.last_level = ix; /* real last mipmap level */ + resource->base.screen = screen; + resource->base.nr_samples = nr_samples; + resource->layout = layout; + resource->halign = halign; + resource->surface = rt; + resource->ts = 0; /* TS is only created when first bound to surface */ + pipe_reference_init(&resource->base.reference, 1); + + for(unsigned ix=0; ix<=resource->base.last_level; ++ix) + { + struct etna_resource_level *mip = &resource->levels[ix]; + mip->address = resource->surface->address + mip->offset; + mip->logical = resource->surface->logical + mip->offset; + DBG_F(ETNA_DBG_RESOURCE_MSGS, " %08x level %i: %ix%i (%i) stride=%i layer_stride=%i", + (int)mip->address, ix, (int)mip->width, (int)mip->height, (int)mip->size, + (int)mip->stride, (int)mip->layer_stride); + } + + return &resource->base; +} + +static void etna_screen_resource_destroy(struct pipe_screen *screen, + struct pipe_resource *resource_) +{ + struct etna_screen *priv = etna_screen(screen); + struct etna_resource *resource = etna_resource(resource_); + if(resource == NULL) + return; + if(resource->last_ctx != NULL) + { + /* XXX This could fail when multiple contexts share this resource, + * (the last one to bind it will "own" it) or fail miserably if + * the context was since destroyed. + */ + struct etna_pipe_context *ectx = resource->last_ctx; + DBG_F(ETNA_DBG_RESOURCE_MSGS, "%p: resource queued destroyed (%ix%ix%i)", resource, resource_->width0, resource_->height0, resource_->depth0); + etna_vidmem_queue_free(ectx->ctx->queue, resource->surface); + etna_vidmem_queue_free(ectx->ctx->queue, resource->ts); + } else { + DBG_F(ETNA_DBG_RESOURCE_MSGS, "%p: resource destroyed (%ix%ix%i)", resource, resource_->width0, resource_->height0, resource_->depth0); + etna_vidmem_free(priv->dev, resource->surface); + etna_vidmem_free(priv->dev, resource->ts); + } + FREE(resource); +} + +void etna_screen_resource_init(struct pipe_screen *pscreen) +{ + pscreen->can_create_resource = etna_screen_can_create_resource; + pscreen->resource_create = etna_screen_resource_create; + pscreen->resource_from_handle = etna_screen_resource_from_handle; + pscreen->resource_get_handle = u_default_resource_get_handle; + pscreen->resource_destroy = etna_screen_resource_destroy; +} diff --git a/src/gallium/drivers/etna/etna_resource.h b/src/gallium/drivers/etna/etna_resource.h new file mode 100644 index 0000000000..bbf4720225 --- /dev/null +++ b/src/gallium/drivers/etna/etna_resource.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Resource handling. + */ +#ifndef H_ETNA_RESOURCE +#define H_ETNA_RESOURCE + +#include "pipe/p_state.h" + +struct pipe_screen; +struct etna_resource; + +void etna_resource_touch(struct pipe_context *pipe, struct pipe_resource *resource_); + +/* Allocate Tile Status for an etna resource. + * Tile status is a cache of the clear status per tile. This means a smaller surface + * has to be cleared which is faster. This is also called "fast clear". + */ +bool etna_screen_resource_alloc_ts(struct pipe_screen *screen, struct etna_resource *resource); + +void etna_screen_resource_init(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/drivers/etna/etna_screen.c b/src/gallium/drivers/etna/etna_screen.c new file mode 100644 index 0000000000..1252f58241 --- /dev/null +++ b/src/gallium/drivers/etna/etna_screen.c @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "etna_screen.h" +#include "etna_pipe.h" +#include "etna_compiler.h" +#include "etna_translate.h" +#include "etna_debug.h" +#include "etna_fence.h" +#include "etna_resource.h" + +#include <etnaviv/etna_rs.h> +#include <etnaviv/viv.h> +#include <etnaviv/etna.h> +#include <etnaviv/etna_util.h> + +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_transfer.h" +#include "util/u_math.h" +#include "util/u_inlines.h" + +#include <stdio.h> + +uint32_t etna_mesa_debug = 0; + +/* Set debug flags from ETNA_DEBUG environment variable */ +static void etna_set_debug_flags(const char *str) +{ + struct option { + const char *name; + uint32_t flag; + }; + static const struct option opts[] = { + { "dbg_msgs", ETNA_DBG_MSGS }, + { "frame_msgs", ETNA_DBG_FRAME_MSGS }, + { "resource_msgs", ETNA_DBG_RESOURCE_MSGS }, + { "compiler_msgs", ETNA_DBG_COMPILER_MSGS }, + { "linker_msgs", ETNA_DBG_LINKER_MSGS }, + { "dump_shaders", ETNA_DBG_DUMP_SHADERS }, + { "no_ts", ETNA_DBG_NO_TS }, + { "cflush_all", ETNA_DBG_CFLUSH_ALL }, + { "msaa2x", ETNA_DBG_MSAA_2X }, + { "msaa4x", ETNA_DBG_MSAA_4X }, + { "finish_all", ETNA_DBG_FINISH_ALL }, + { "flush_all", ETNA_DBG_FLUSH_ALL } + }; + int i; + + if (!str) + return; + + for (i = 0; i < Elements(opts); i++) { + if (strstr(str, opts[i].name)) + etna_mesa_debug |= opts[i].flag; + } +} + +static void etna_screen_destroy( struct pipe_screen *screen ) +{ + struct etna_screen *priv = etna_screen(screen); + etna_screen_destroy_fences(screen); + pipe_mutex_destroy(priv->fence_mutex); + FREE(screen); +} + +static const char *etna_screen_get_name( struct pipe_screen *screen ) +{ + struct etna_screen *priv = etna_screen(screen); + return priv->name; +} + +static const char *etna_screen_get_vendor( struct pipe_screen *screen ) +{ + return "etnaviv"; +} + +static int etna_screen_get_param( struct pipe_screen *screen, enum pipe_cap param ) +{ + struct etna_screen *priv = etna_screen(screen); + switch (param) { + /* Supported features (boolean caps). */ + case PIPE_CAP_TWO_SIDED_STENCIL: + case PIPE_CAP_ANISOTROPIC_FILTER: + case PIPE_CAP_POINT_SPRITE: + case PIPE_CAP_TEXTURE_SHADOW_MAP: + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: /* FS coordinates start in upper left */ + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: /* Pixel center on 0.5 */ + case PIPE_CAP_SM3: + case PIPE_CAP_SEAMLESS_CUBE_MAP: /* ??? */ + case PIPE_CAP_TEXTURE_BARRIER: + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_USER_CONSTANT_BUFFERS: /* constant buffers can be user buffers; they end up in command stream anyway */ + case PIPE_CAP_TGSI_TEXCOORD: /* explicit TEXCOORD and POINTCOORD semantics */ + return 1; + + /* Memory */ + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 256; + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 4; /* XXX could easily be supported */ + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 120; + + case PIPE_CAP_NPOT_TEXTURES: /* MUST be supported with GLES 2.0: what the capability specifies is filtering support */ + return true; /* VIV_FEATURE(priv->dev, chipMinorFeatures1, NON_POWER_OF_TWO); */ + + case PIPE_CAP_MAX_VERTEX_BUFFERS: + return priv->specs.stream_count; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_LITTLE; /* on most Viv hw this is configurable (feature ENDIANNESS_CONFIG) */ + + /* Unsupported features. */ + case PIPE_CAP_TEXTURE_SWIZZLE: /* XXX supported on gc2000 */ + case PIPE_CAP_COMPUTE: /* XXX supported on gc2000 */ + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: /* only one colorbuffer supported, so mixing makes no sense */ + case PIPE_CAP_PRIMITIVE_RESTART: /* primitive restart index AFAIK not supported */ + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: /* no floating point buffer support */ + case PIPE_CAP_CONDITIONAL_RENDER: /* no occlusion queries */ + case PIPE_CAP_TGSI_INSTANCEID: /* no idea, really */ + case PIPE_CAP_START_INSTANCE: /* instancing not supported AFAIK */ + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: /* instancing not supported AFAIK */ + case PIPE_CAP_SHADER_STENCIL_EXPORT: /* Fragment shader cannot export stencil value */ + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: /* no dual-source supported */ + case PIPE_CAP_TEXTURE_MULTISAMPLE: /* no texture multisample */ + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: /* only mirrored repeat */ + case PIPE_CAP_INDEP_BLEND_ENABLE: + case PIPE_CAP_INDEP_BLEND_FUNC: + case PIPE_CAP_DEPTH_CLIP_DISABLE: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + case PIPE_CAP_SCALED_RESOLVE: /* Should be possible to support */ + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: /* Don't skip strict max uniform limit check */ + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_CLAMPED: + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return 0; + + /* Stream output. */ + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 0; + + /* Texturing. */ + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 14; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: /* 3D textures not supported */ + return 0; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 0; + case PIPE_CAP_MAX_COMBINED_SAMPLERS: + return priv->specs.fragment_sampler_count + priv->specs.vertex_sampler_count; + case PIPE_CAP_CUBE_MAP_ARRAY: + return 0; + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return 0; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + + /* Render targets. */ + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + + /* Viewports and scissors. */ + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + + /* Timer queries. */ + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_QUERY_TIMESTAMP: + return 0; + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + return 0; + + /* Preferences */ + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + + default: + DBG("unknown param %d", param); + return 0; + } +} + +static float etna_screen_get_paramf( struct pipe_screen *screen, enum pipe_capf param ) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 8192.0f; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0f; + default: + DBG("unknown paramf %d", param); + return 0; + } +} + +static int etna_screen_get_shader_param( struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param ) +{ + struct etna_screen *priv = etna_screen(screen); + switch(shader) + { + case PIPE_SHADER_FRAGMENT: + case PIPE_SHADER_VERTEX: + break; + case PIPE_SHADER_COMPUTE: + case PIPE_SHADER_GEOMETRY: + /* maybe we could emulate.. */ + return 0; + default: + DBG("unknown shader type %d", shader); + return 0; + } + + switch (param) { + case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: + case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS: + return ETNA_MAX_TOKENS; + case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: + return ETNA_MAX_DEPTH; /* XXX */ + case PIPE_SHADER_CAP_MAX_INPUTS: + return 16; /* XXX this amount is reserved */ + case PIPE_SHADER_CAP_MAX_TEMPS: + return 64; /* Max native temporaries. */ + case PIPE_SHADER_CAP_MAX_ADDRS: + return 1; /* Max native address registers */ + case PIPE_SHADER_CAP_MAX_CONSTS: + /* Absolute maximum on ideal hardware is 256 (as that's how much register space is reserved); + * immediates are included in here, so actual space available for constants will always be less. + * Also the amount of registers really available depends on the hw. + * XXX see also: viv_specs.num_constants, if this is 0 we need to come up with some default value. + */ + return 256; + case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: + return 1; + case PIPE_SHADER_CAP_MAX_PREDS: + return 0; /* nothing uses this */ + case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED: + return 1; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; + case PIPE_SHADER_CAP_SUBROUTINES: + return 0; + case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: + return VIV_FEATURE(priv->dev, chipMinorFeatures0, HAS_SQRT_TRIG); + case PIPE_SHADER_CAP_TGSI_POW_SUPPORTED: + return false; + case PIPE_SHADER_CAP_TGSI_LRP_SUPPORTED: + return false; + case PIPE_SHADER_CAP_INTEGERS: /* XXX supported on gc2000 but not yet implemented */ + return 0; + case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: + return shader==PIPE_SHADER_FRAGMENT ? priv->specs.fragment_sampler_count : + priv->specs.vertex_sampler_count; + case PIPE_SHADER_CAP_PREFERRED_IR: + return PIPE_SHADER_IR_TGSI; + default: + DBG("unknown shader param %d", param); + return 0; + } + return 0; +} + +static int etna_screen_get_video_param( struct pipe_screen *screen, + enum pipe_video_profile profile, + enum pipe_video_cap param ) +{ + DBG("unimplemented etna_screen_get_video_param"); + return 0; +} + +static int etna_screen_get_compute_param(struct pipe_screen *screen, + enum pipe_compute_cap param, + void *ret) +{ + DBG("unimplemented etna_screen_get_compute_param"); + return 0; +} + +static uint64_t etna_screen_get_timestamp(struct pipe_screen *screen) +{ + DBG("unimplemented etna_screen_get_timestamp"); + return 0; +} + +static struct pipe_context * etna_screen_context_create( struct pipe_screen *screen, + void *priv ) +{ + struct etna_screen *es = etna_screen(screen); + struct pipe_context *ctx = etna_new_pipe_context(es->dev, &es->specs, screen, priv); + return ctx; +} + +static boolean etna_screen_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + struct etna_screen *priv = etna_screen(screen); + unsigned allowed = 0; + if (target >= PIPE_MAX_TEXTURE_TYPES) + { + return FALSE; + } + + if (usage & PIPE_BIND_RENDER_TARGET) + { + /* if render target, must be RS-supported format */ + if(translate_rt_format(format, true) != ETNA_NO_MATCH) + { + /* Validate MSAA; number of samples must be allowed, and render target must have + * MSAA'able format. + */ + if(sample_count > 1) + { + if(translate_samples_to_xyscale(sample_count, NULL, NULL, NULL) && + translate_msaa_format(format, true) != ETNA_NO_MATCH) + { + allowed |= PIPE_BIND_RENDER_TARGET; + } + } else { + allowed |= PIPE_BIND_RENDER_TARGET; + } + } + } + if (usage & PIPE_BIND_DEPTH_STENCIL) + { + /* must be supported depth format */ + if(translate_depth_format(format, true) != ETNA_NO_MATCH) + { + allowed |= PIPE_BIND_DEPTH_STENCIL; + } + } + if (usage & PIPE_BIND_SAMPLER_VIEW) + { + /* must be supported texture format */ + if(sample_count < 2 && translate_texture_format(format, true) != ETNA_NO_MATCH) + { + allowed |= PIPE_BIND_SAMPLER_VIEW; + } + } + if (usage & PIPE_BIND_VERTEX_BUFFER) + { + /* must be supported vertex format */ + if(translate_vertex_format_type(format, true) == ETNA_NO_MATCH) + { + allowed |= PIPE_BIND_VERTEX_BUFFER; + } + } + if (usage & PIPE_BIND_INDEX_BUFFER) + { + /* must be supported index format */ + if(format == PIPE_FORMAT_I8_UINT || + format == PIPE_FORMAT_I16_UINT || + (format == PIPE_FORMAT_I32_UINT && VIV_FEATURE(priv->dev, chipFeatures, 32_BIT_INDICES))) + { + allowed |= PIPE_BIND_INDEX_BUFFER; + } + } + /* Always allowed */ + allowed |= usage & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED | PIPE_BIND_TRANSFER_READ | PIPE_BIND_TRANSFER_WRITE); + + return usage == allowed; +} + +static boolean etna_screen_is_video_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_video_profile profile ) +{ + DBG("unimplemented etna_screen_is_video_format_supported"); + return false; +} + +/* XXX this should use a blit or resource copy, when implemented, instead + * of programming the RS directly. + */ +static void etna_screen_flush_frontbuffer( struct pipe_screen *screen, + struct pipe_resource *resource, + unsigned level, unsigned layer, + void *winsys_drawable_handle ) +{ + struct etna_rs_target *drawable = (struct etna_rs_target *)winsys_drawable_handle; + struct etna_resource *rt_resource = etna_resource(resource); + struct etna_pipe_context *ectx = rt_resource->last_ctx; + struct pipe_fence_handle **fence = 0; + assert(level <= resource->last_level && layer < resource->array_size); + assert(ectx); + struct etna_ctx *ctx = ectx->ctx; + + /* release previous fence, make reference to fence if we need one */ + screen->fence_reference(screen, &drawable->fence, NULL); + if(drawable->want_fence) + fence = &drawable->fence; + + etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR); + etna_stall(ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); + + /* Set up color TS to source surface before blit, if needed */ + if(rt_resource->levels[level].ts_address != ectx->gpu3d.TS_COLOR_STATUS_BASE) + { + if(rt_resource->levels[level].ts_address) + { + etna_set_state_multi(ctx, VIVS_TS_MEM_CONFIG, 4, (uint32_t[]) { + ectx->gpu3d.TS_MEM_CONFIG = VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR, /* XXX |= VIVS_TS_MEM_CONFIG_MSAA | translate_msaa_format(cbuf->format) */ + ectx->gpu3d.TS_COLOR_STATUS_BASE = rt_resource->levels[level].ts_address, + ectx->gpu3d.TS_COLOR_SURFACE_BASE = rt_resource->levels[level].address, + ectx->gpu3d.TS_COLOR_CLEAR_VALUE = rt_resource->levels[level].clear_value + }); + } else { + etna_set_state(ctx, VIVS_TS_MEM_CONFIG, 0x00000000); + ectx->gpu3d.TS_MEM_CONFIG = 0; + } + ectx->dirty_bits |= ETNA_STATE_TS; + } + + int msaa_xscale=1, msaa_yscale=1; + if(!translate_samples_to_xyscale(resource->nr_samples, &msaa_xscale, &msaa_yscale, NULL)) + return; + + /* Kick off RS here */ + struct compiled_rs_state copy_to_screen; + etna_compile_rs_state(©_to_screen, &(struct rs_state){ + .source_format = translate_rt_format(rt_resource->base.format, false), + .source_tiling = rt_resource->layout, + .source_addr = rt_resource->levels[level].address, + .source_stride = rt_resource->levels[level].stride, + .dest_format = drawable->rs_format, + .dest_tiling = ETNA_LAYOUT_LINEAR, + .dest_addr = drawable->addr, + .dest_stride = drawable->stride, + .downsample_x = msaa_xscale > 1, + .downsample_y = msaa_yscale > 1, + .swap_rb = drawable->swap_rb, + .dither = {0xffffffff, 0xffffffff}, // XXX dither when going from 24 to 16 bit? + .clear_mode = VIVS_RS_CLEAR_CONTROL_MODE_DISABLED, + .width = drawable->width * msaa_xscale, + .height = drawable->height * msaa_yscale + }); + etna_submit_rs_state(ctx, ©_to_screen); + DBG_F(ETNA_DBG_FRAME_MSGS, + "Queued RS command to flush screen from %08x to %08x stride=%08x width=%i height=%i, ctx %p", + rt_resource->levels[0].address, + drawable->addr, drawable->stride, + drawable->width, drawable->height, ctx); + ectx->base.flush(&ectx->base, fence, 0); +} + +struct pipe_screen * +etna_screen_create(struct viv_conn *dev) +{ + struct etna_screen *screen = CALLOC_STRUCT(etna_screen); + struct pipe_screen *pscreen = &screen->base; + screen->dev = dev; + + etna_set_debug_flags(getenv("ETNA_DEBUG")); + + /* Set up driver identification */ + snprintf(screen->name, ETNA_SCREEN_NAME_LEN, "Vivante GC%x rev %04x, %s", + dev->chip.chip_model, dev->chip.chip_revision, dev->kernel_driver.name); + + /* Determine specs for device */ + screen->specs.can_supertile = VIV_FEATURE(dev, chipMinorFeatures0, SUPER_TILED); + screen->specs.bits_per_tile = VIV_FEATURE(dev, chipMinorFeatures0, 2BITPERTILE)?2:4; + screen->specs.ts_clear_value = VIV_FEATURE(dev, chipMinorFeatures0, 2BITPERTILE)?0x55555555:0x11111111; + screen->specs.vertex_sampler_offset = 8; /* vertex and fragment samplers live in one address space, with vertex shaders at this offset */ + screen->specs.fragment_sampler_count = 8; + screen->specs.vertex_sampler_count = 4; + screen->specs.vs_need_z_div = dev->chip.chip_model < 0x1000 && dev->chip.chip_model != 0x880; + screen->specs.vertex_output_buffer_size = dev->chip.vertex_output_buffer_size; + screen->specs.vertex_cache_size = dev->chip.vertex_cache_size; + screen->specs.shader_core_count = dev->chip.shader_core_count; + screen->specs.stream_count = dev->chip.stream_count; + screen->specs.has_sin_cos_sqrt = VIV_FEATURE(dev, chipMinorFeatures0, HAS_SQRT_TRIG); + screen->specs.has_shader_range_registers = dev->chip.chip_model >= 0x1000 || dev->chip.chip_model == 0x880; + if (dev->chip.instruction_count > 256) /* unified instruction memory? */ + { + screen->specs.vs_offset = 0xC000; + screen->specs.ps_offset = 0xD000; //like vivante driver + screen->specs.max_instructions = 256; + } else { + screen->specs.vs_offset = 0x4000; + screen->specs.ps_offset = 0x6000; + screen->specs.max_instructions = dev->chip.instruction_count/2; + } + screen->specs.max_varyings = dev->chip.varyings_count; + screen->specs.max_registers = dev->chip.register_max; + if (dev->chip.chip_model < chipModel_GC4000) /* from QueryShaderCaps in kernel driver */ + { + screen->specs.max_vs_uniforms = 168; + screen->specs.max_ps_uniforms = 64; + } else { + screen->specs.max_vs_uniforms = 256; + screen->specs.max_ps_uniforms = 256; + } + + screen->specs.max_texture_size = VIV_FEATURE(dev, chipMinorFeatures0, TEXTURE_8K)?8192:4096; + screen->specs.max_rendertarget_size = VIV_FEATURE(dev, chipMinorFeatures0, RENDERTARGET_8K)?8192:4096; + + /* Initialize vtable */ + pscreen->destroy = etna_screen_destroy; + pscreen->get_name = etna_screen_get_name; + pscreen->get_vendor = etna_screen_get_vendor; + pscreen->get_param = etna_screen_get_param; + pscreen->get_paramf = etna_screen_get_paramf; + pscreen->get_shader_param = etna_screen_get_shader_param; + pscreen->get_video_param = etna_screen_get_video_param; + pscreen->get_compute_param = etna_screen_get_compute_param; + pscreen->get_timestamp = etna_screen_get_timestamp; + pscreen->context_create = etna_screen_context_create; + pscreen->is_format_supported = etna_screen_is_format_supported; + pscreen->is_video_format_supported = etna_screen_is_video_format_supported; + pscreen->flush_frontbuffer = etna_screen_flush_frontbuffer; + + etna_screen_fence_init(pscreen); + etna_screen_resource_init(pscreen); + + pipe_mutex_init(screen->fence_mutex); + + return pscreen; +} diff --git a/src/gallium/drivers/etna/etna_screen.h b/src/gallium/drivers/etna/etna_screen.h new file mode 100644 index 0000000000..f45d881122 --- /dev/null +++ b/src/gallium/drivers/etna/etna_screen.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef ETNA_SCREEN_H_ +#define ETNA_SCREEN_H_ + +#include "etna_internal.h" + +#include "pipe/p_screen.h" +#include "os/os_thread.h" + +struct viv_conn; + +#define ETNA_SCREEN_NAME_LEN (64) +/* Gallium screen structure for etna driver. + */ +struct etna_screen { + struct pipe_screen base; + char name[ETNA_SCREEN_NAME_LEN]; + struct viv_conn *dev; + struct etna_pipe_specs specs; + + pipe_mutex fence_mutex; + struct etna_fence *fence_freelist; +}; + +/* Resolve target. + * Used by etna_screen_flush_frontbuffer + */ +struct etna_rs_target +{ + unsigned rs_format; + bool swap_rb; + unsigned width, height; + size_t addr; /* GPU address */ + size_t stride; + bool want_fence; /* should flush_frontbuffer return a fence? */ + struct pipe_fence_handle *fence; +}; + +static INLINE struct etna_screen * +etna_screen(struct pipe_screen *pscreen) +{ + return (struct etna_screen *)pscreen; +} + +struct pipe_screen * +etna_screen_create(struct viv_conn *dev); + +#endif diff --git a/src/gallium/drivers/etna/etna_shader.c b/src/gallium/drivers/etna/etna_shader.c new file mode 100644 index 0000000000..f29df24585 --- /dev/null +++ b/src/gallium/drivers/etna/etna_shader.c @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Shader state handling. + */ +#include "etna_shader.h" + +#include "etna_pipe.h" +#include "etna_compiler.h" +#include "etna_debug.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include <etnaviv/state_3d.xml.h> + +/* Fetch uniforms from user buffer, if bound, and mark respective uniform + * bank as dirty. */ +static void etna_fetch_uniforms(struct pipe_context *pipe, uint shader) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct pipe_constant_buffer *buf = NULL; + switch(shader) + { + case PIPE_SHADER_VERTEX: + buf = &priv->vs_cbuf_s; + if(buf->user_buffer) + { + memcpy(priv->shader_state.VS_UNIFORMS, buf->user_buffer, MIN2(buf->buffer_size, priv->vs->const_size * 4)); + priv->dirty_bits |= ETNA_STATE_VS_UNIFORMS; + } + break; + case PIPE_SHADER_FRAGMENT: + buf = &priv->fs_cbuf_s; + if(buf->user_buffer) + { + memcpy(priv->shader_state.PS_UNIFORMS, buf->user_buffer, MIN2(buf->buffer_size, priv->fs->const_size * 4)); + priv->dirty_bits |= ETNA_STATE_PS_UNIFORMS; + } + break; + default: printf("Unhandled shader type %i\n", shader); + } +} + + +/* Link vs and fs together: fill in shader_state from vs and fs + * as this function is called every time a new fs or vs is bound, the goal is to do + * little processing as possible here, and to precompute as much as possible in the + * vs/fs shader_object. + * XXX we could cache the link result for a certain set of VS/PS; usually a pair + * of VS and PS will be used together anyway. + */ +void etna_link_shaders(struct pipe_context *pipe, + struct compiled_shader_state *cs, + const struct etna_shader_object *vs, const struct etna_shader_object *fs) +{ + assert(vs->processor == TGSI_PROCESSOR_VERTEX); + assert(fs->processor == TGSI_PROCESSOR_FRAGMENT); +#ifdef DEBUG + if(DBG_ENABLED(ETNA_DBG_DUMP_SHADERS)) + { + etna_dump_shader_object(vs); + etna_dump_shader_object(fs); + } +#endif + /* set last_varying_2x flag if the last varying has 1 or 2 components */ + bool last_varying_2x = false; + if(fs->num_inputs>0 && fs->inputs[fs->num_inputs-1].num_components <= 2) + last_varying_2x = true; + + cs->RA_CONTROL = VIVS_RA_CONTROL_UNK0 | + (last_varying_2x ? VIVS_RA_CONTROL_LAST_VARYING_2X : 0); + + cs->PA_ATTRIBUTE_ELEMENT_COUNT = VIVS_PA_ATTRIBUTE_ELEMENT_COUNT_COUNT(fs->num_inputs); + for(int idx=0; idx<fs->num_inputs; ++idx) + cs->PA_SHADER_ATTRIBUTES[idx] = fs->inputs[idx].pa_attributes; + + cs->VS_END_PC = vs->code_size / 4; + cs->VS_OUTPUT_COUNT = fs->num_inputs + 1; /* position + varyings */ + /* Number of vertex elements determines number of VS inputs. Otherwise, the GPU crashes */ + cs->VS_INPUT_COUNT = VIVS_VS_INPUT_COUNT_UNK8(vs->input_count_unk8); + cs->VS_TEMP_REGISTER_CONTROL = + VIVS_VS_TEMP_REGISTER_CONTROL_NUM_TEMPS(vs->num_temps); + + /* link vs outputs to fs inputs */ + struct etna_shader_link_info link = {}; + if(etna_link_shader_objects(&link, vs, fs)) + { + assert(0); /* linking failed: some fs inputs do not have corresponding vs outputs */ + } + DBG_F(ETNA_DBG_LINKER_MSGS, "link result:"); + for(int idx=0; idx<fs->num_inputs; ++idx) + { + DBG_F(ETNA_DBG_LINKER_MSGS," %i -> %i", link.varyings_vs_reg[idx], idx+1); + } + + /* vs outputs (varyings) */ + uint32_t vs_output[16] = {0}; + int varid = 0; + vs_output[varid++] = vs->vs_pos_out_reg; + for(int idx=0; idx<fs->num_inputs; ++idx) + vs_output[varid++] = link.varyings_vs_reg[idx]; + if(vs->vs_pointsize_out_reg >= 0) + vs_output[varid++] = vs->vs_pointsize_out_reg; /* pointsize is last */ + + for(int idx=0; idx<4; ++idx) + { + cs->VS_OUTPUT[idx] =(vs_output[idx*4+0] << 0) | (vs_output[idx*4+1] << 8) | + (vs_output[idx*4+2] << 16) | (vs_output[idx*4+3] << 24); + } + + if(vs->vs_pointsize_out_reg != -1) + { + /* vertex shader outputs point coordinate, provide extra output and make sure PA config is + * not masked */ + cs->PA_CONFIG = ~0; + cs->VS_OUTPUT_COUNT_PSIZE = cs->VS_OUTPUT_COUNT + 1; + } else { + /* vertex shader does not output point coordinate, make sure thate POINT_SIZE_ENABLE is masked + * and no extra output is given */ + cs->PA_CONFIG = ~VIVS_PA_CONFIG_POINT_SIZE_ENABLE; + cs->VS_OUTPUT_COUNT_PSIZE = cs->VS_OUTPUT_COUNT; + } + + /* vs inputs (attributes) */ + uint32_t vs_input[4] = {0}; + for(int idx=0; idx<vs->num_inputs; ++idx) + vs_input[idx/4] |= vs->inputs[idx].reg << ((idx%4)*8); + for(int idx=0; idx<4; ++idx) + cs->VS_INPUT[idx] = vs_input[idx]; + + cs->VS_LOAD_BALANCING = vs->vs_load_balancing; + cs->VS_START_PC = 0; + + cs->PS_END_PC = fs->code_size / 4; + cs->PS_OUTPUT_REG = fs->ps_color_out_reg; + cs->PS_INPUT_COUNT = VIVS_PS_INPUT_COUNT_COUNT(fs->num_inputs + 1) | /* Number of inputs plus position */ + VIVS_PS_INPUT_COUNT_UNK8(fs->input_count_unk8); + cs->PS_TEMP_REGISTER_CONTROL = + VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(MAX2(fs->num_temps, fs->num_inputs + 1)); + cs->PS_CONTROL = VIVS_PS_CONTROL_UNK1; /* XXX when can we set BYPASS? */ + cs->PS_START_PC = 0; + + /* Precompute PS_INPUT_COUNT and TEMP_REGISTER_CONTROL in the case of MSAA mode, avoids + * some fumbling in sync_context. + */ + cs->PS_INPUT_COUNT_MSAA = VIVS_PS_INPUT_COUNT_COUNT(fs->num_inputs + 2) | /* MSAA adds another input */ + VIVS_PS_INPUT_COUNT_UNK8(fs->input_count_unk8); + cs->PS_TEMP_REGISTER_CONTROL_MSAA = + VIVS_PS_TEMP_REGISTER_CONTROL_NUM_TEMPS(MAX2(fs->num_temps, fs->num_inputs + 2)); + + uint32_t total_components = 0; + uint32_t num_components = 0; + uint32_t component_use[2] = {0}; + for(int idx=0; idx<fs->num_inputs; ++idx) + { + num_components |= fs->inputs[idx].num_components << ((idx%8)*4); + for(int comp=0; comp<fs->inputs[idx].num_components; ++comp) + { + unsigned use = VARYING_COMPONENT_USE_USED; + if(fs->inputs[idx].semantic.Name == TGSI_SEMANTIC_PCOORD) + { + if(comp == 0) + use = VARYING_COMPONENT_USE_POINTCOORD_X; + else if(comp == 1) + use = VARYING_COMPONENT_USE_POINTCOORD_Y; + } + /* 16 components per uint32 */ + component_use[total_components/16] |= use << ((total_components%16)*2); + total_components += 1; + } + } + cs->GL_VARYING_TOTAL_COMPONENTS = VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM(align(total_components, 2)); + cs->GL_VARYING_NUM_COMPONENTS = num_components; + cs->GL_VARYING_COMPONENT_USE[0] = component_use[0]; + cs->GL_VARYING_COMPONENT_USE[1] = component_use[1]; + + /* reference instruction memory */ + cs->vs_inst_mem_size = vs->code_size; + cs->VS_INST_MEM = vs->code; + cs->ps_inst_mem_size = fs->code_size; + cs->PS_INST_MEM = fs->code; + + /* uniforms layout -- first constants, then immediates */ + cs->vs_uniforms_size = vs->const_size + vs->imm_size; + memcpy(&cs->VS_UNIFORMS[vs->imm_base], vs->imm_data, vs->imm_size*4); + + cs->ps_uniforms_size = fs->const_size + fs->imm_size; + memcpy(&cs->PS_UNIFORMS[fs->imm_base], fs->imm_data, fs->imm_size*4); + + /* fetch any previous uniforms from buffer */ + etna_fetch_uniforms(pipe, PIPE_SHADER_VERTEX); + etna_fetch_uniforms(pipe, PIPE_SHADER_FRAGMENT); +} + +static void etna_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + struct pipe_constant_buffer *buf) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + if(buf == NULL) /* Unbinding constant buffer */ + { + if(likely(index == 0)) + { + switch(shader) + { + case PIPE_SHADER_VERTEX: priv->vs_cbuf_s.user_buffer = 0; break; + case PIPE_SHADER_FRAGMENT: priv->fs_cbuf_s.user_buffer = 0; break; + default: printf("Unhandled shader type %i\n", shader); + } + } else { + printf("Unhandled buffer index %i\n", index); + } + } else { + assert(buf->buffer == NULL && buf->user_buffer != NULL); + assert(priv->vs && priv->fs); + /* support only user buffer for now */ + if(likely(index == 0)) + { + /* copy only up to shader-specific constant size; never overwrite immediates */ + switch(shader) + { + case PIPE_SHADER_VERTEX: priv->vs_cbuf_s = *buf; break; + case PIPE_SHADER_FRAGMENT: priv->fs_cbuf_s = *buf; break; + default: printf("Unhandled shader type %i\n", shader); + } + etna_fetch_uniforms(pipe, shader); + } else { + printf("Unhandled buffer index %i\n", index); + } + } +} + +static void *etna_pipe_create_shader_state(struct pipe_context *pipe, const struct pipe_shader_state *pss) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct etna_shader_object *out = NULL; + if(etna_compile_shader_object(&priv->specs, pss->tokens, &out) != ETNA_OK) + return NULL; + else + return out; +} + +static void etna_pipe_delete_shader_state(struct pipe_context *pipe, void *ss) +{ + etna_destroy_shader_object((struct etna_shader_object*)ss); +} + +static void etna_pipe_bind_fs_state(struct pipe_context *pipe, void *fss_) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct etna_shader_object *fss = (struct etna_shader_object*)fss_; + priv->dirty_bits |= ETNA_STATE_SHADER | ETNA_STATE_PS_UNIFORMS; + assert(fss == NULL || fss->processor == TGSI_PROCESSOR_FRAGMENT); + priv->fs = fss; +} + +static void etna_pipe_bind_vs_state(struct pipe_context *pipe, void *vss_) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct etna_shader_object *vss = (struct etna_shader_object*)vss_; + priv->dirty_bits |= ETNA_STATE_SHADER | ETNA_STATE_VS_UNIFORMS; + assert(vss == NULL || vss->processor == TGSI_PROCESSOR_VERTEX); + priv->vs = vss; +} + +void etna_pipe_shader_init(struct pipe_context *pc) +{ + pc->create_fs_state = etna_pipe_create_shader_state; + pc->bind_fs_state = etna_pipe_bind_fs_state; + pc->delete_fs_state = etna_pipe_delete_shader_state; + pc->create_vs_state = etna_pipe_create_shader_state; + pc->bind_vs_state = etna_pipe_bind_vs_state; + pc->delete_vs_state = etna_pipe_delete_shader_state; + /* XXX create_gs_state */ + /* XXX bind_gs_state */ + /* XXX delete_gs_state */ + pc->set_constant_buffer = etna_set_constant_buffer; +} diff --git a/src/gallium/drivers/etna/etna_shader.h b/src/gallium/drivers/etna/etna_shader.h new file mode 100644 index 0000000000..32c38fbb19 --- /dev/null +++ b/src/gallium/drivers/etna/etna_shader.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Shader state handling. + */ +#ifndef H_ETNA_SHADER + +#include "pipe/p_state.h" + +struct etna_shader_object; +struct compiled_shader_state; + +void etna_link_shaders(struct pipe_context *pipe, + struct compiled_shader_state *cs, + const struct etna_shader_object *vs, const struct etna_shader_object *fs); + +void etna_pipe_shader_init(struct pipe_context *pipe); + +#endif diff --git a/src/gallium/drivers/etna/etna_surface.c b/src/gallium/drivers/etna/etna_surface.c new file mode 100644 index 0000000000..5235d4582c --- /dev/null +++ b/src/gallium/drivers/etna/etna_surface.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Surface handling */ +#include "etna_surface.h" + +#include "etna_clear_blit.h" +#include "etna_internal.h" +#include "etna_pipe.h" +#include "etna_resource.h" +#include "etna_translate.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include <etnaviv/common.xml.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> + +static struct pipe_surface *etna_pipe_create_surface(struct pipe_context *pipe, + struct pipe_resource *resource_, + const struct pipe_surface *templat) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct etna_surface *surf = CALLOC_STRUCT(etna_surface); + struct etna_resource *resource = etna_resource(resource_); + assert(templat->u.tex.first_layer == templat->u.tex.last_layer); + unsigned layer = templat->u.tex.first_layer; + unsigned level = templat->u.tex.level; + assert(layer < resource->base.array_size); + + surf->base.context = pipe; + + pipe_reference_init(&surf->base.reference, 1); + pipe_resource_reference(&surf->base.texture, &resource->base); + + /* Allocate a TS for the resource if there isn't one yet, + * and it is allowed by the hw (width is a multiple of 16). + */ + /* XXX for now, don't do TS for render textures as this path + * is not stable. + */ + if(!DBG_ENABLED(ETNA_DBG_NO_TS) && + !resource->ts && + !(resource->base.bind & (PIPE_BIND_SAMPLER_VIEW)) && + (resource->levels[level].padded_width & ETNA_RS_WIDTH_MASK) == 0 && + (resource->levels[level].padded_height & ETNA_RS_HEIGHT_MASK) == 0) + { + etna_screen_resource_alloc_ts(pipe->screen, resource); + } + + surf->base.texture = &resource->base; + surf->base.format = resource->base.format; + surf->base.width = resource->levels[level].width; + surf->base.height = resource->levels[level].height; + surf->base.writable = templat->writable; // what is this for anyway + surf->base.u = templat->u; + + surf->layout = resource->layout; + surf->level = &resource->levels[level]; /* Keep pointer to actual level to set clear color on */ + /* underlying resource instead of surface */ + surf->surf = resource->levels[level]; /* Make copy of level to narrow down address to layer */ + /* XXX we don't really need a copy but it's convenient */ + surf->surf.address += layer * surf->surf.layer_stride; + surf->surf.logical += layer * surf->surf.layer_stride; + + if(surf->surf.ts_address) + { + /* This abuses the RS as a plain buffer memset(). + Currently uses a fixed row size of 64 bytes. Some benchmarking with different sizes may be in order. + */ + etna_compile_rs_state(&surf->clear_command, &(struct rs_state){ + .source_format = RS_FORMAT_A8R8G8B8, + .dest_format = RS_FORMAT_A8R8G8B8, + .dest_addr = surf->surf.ts_address, + .dest_stride = 0x40, + .dest_tiling = ETNA_LAYOUT_TILED, + .dither = {0xffffffff, 0xffffffff}, + .width = 16, + .height = etna_align_up(surf->surf.ts_size/0x40, 4), + .clear_value = {priv->specs.ts_clear_value}, + .clear_mode = VIVS_RS_CLEAR_CONTROL_MODE_ENABLED1, + .clear_bits = 0xffff + }); + } else { + etna_rs_gen_clear_surface(&surf->clear_command, surf, surf->level->clear_value); + } + etna_resource_touch(pipe, surf->base.texture); + return &surf->base; +} + +static void etna_pipe_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + pipe_resource_reference(&surf->texture, NULL); + FREE(surf); +} + + +void etna_pipe_surface_init(struct pipe_context *pc) +{ + pc->create_surface = etna_pipe_create_surface; + pc->surface_destroy = etna_pipe_surface_destroy; +} diff --git a/src/gallium/drivers/etna/etna_surface.h b/src/gallium/drivers/etna/etna_surface.h new file mode 100644 index 0000000000..8002018545 --- /dev/null +++ b/src/gallium/drivers/etna/etna_surface.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Surface handling */ +#ifndef H_ETNA_SURFACE +#define H_ETNA_SURFACE + +struct pipe_context; + +void etna_pipe_surface_init(struct pipe_context *pipe); + +#endif diff --git a/src/gallium/drivers/etna/etna_texture.c b/src/gallium/drivers/etna/etna_texture.c new file mode 100644 index 0000000000..6f814909b8 --- /dev/null +++ b/src/gallium/drivers/etna/etna_texture.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Texture CSOs */ +#include "etna_texture.h" + +#include "etna_internal.h" +#include "etna_pipe.h" +#include "etna_translate.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" +#include "util/u_memory.h" + +#include <etnaviv/common.xml.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> + +static void *etna_pipe_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *ss) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_sampler_state *cs = CALLOC_STRUCT(compiled_sampler_state); + cs->TE_SAMPLER_CONFIG0 = + /* XXX get from sampler view: VIVS_TE_SAMPLER_CONFIG0_TYPE(TEXTURE_TYPE_2D)| */ + VIVS_TE_SAMPLER_CONFIG0_UWRAP(translate_texture_wrapmode(ss->wrap_s))| + VIVS_TE_SAMPLER_CONFIG0_VWRAP(translate_texture_wrapmode(ss->wrap_t))| + VIVS_TE_SAMPLER_CONFIG0_MIN(translate_texture_filter(ss->min_img_filter))| + VIVS_TE_SAMPLER_CONFIG0_MIP(translate_texture_mipfilter(ss->min_mip_filter))| + VIVS_TE_SAMPLER_CONFIG0_MAG(translate_texture_filter(ss->mag_img_filter)); + /* XXX get from sampler view: VIVS_TE_SAMPLER_CONFIG0_FORMAT(tex_format) */ + cs->TE_SAMPLER_CONFIG1 = 0; /* VIVS_TE_SAMPLER_CONFIG1 (swizzle, extended format) fully determined by sampler view */ + cs->TE_SAMPLER_LOD_CONFIG = + (ss->lod_bias != 0.0 ? VIVS_TE_SAMPLER_LOD_CONFIG_BIAS_ENABLE : 0) | + VIVS_TE_SAMPLER_LOD_CONFIG_BIAS(etna_float_to_fixp55(ss->lod_bias)); + if(ss->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) + { + cs->min_lod = etna_float_to_fixp55(ss->min_lod); + cs->max_lod = etna_float_to_fixp55(ss->max_lod); + } else { /* when not mipmapping, we need to set max/min lod so that always lowest LOD is selected */ + cs->min_lod = cs->max_lod = etna_float_to_fixp55(ss->min_lod); + } + return cs; +} + +static void etna_pipe_bind_fragment_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + priv->dirty_bits |= ETNA_STATE_SAMPLERS; + priv->num_fragment_samplers = num_samplers; + for(int idx=0; idx<num_samplers; ++idx) + { + priv->sampler_p[idx] = samplers[idx]; + if(samplers[idx]) + priv->sampler[idx] = *(struct compiled_sampler_state*)samplers[idx]; + } +} + +static void etna_pipe_bind_vertex_sampler_states(struct pipe_context *pipe, + unsigned num_samplers, + void **samplers) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + priv->dirty_bits |= ETNA_STATE_SAMPLERS; + priv->num_vertex_samplers = num_samplers; + for(int idx=0; idx<num_samplers; ++idx) + { + priv->sampler_p[priv->specs.vertex_sampler_offset + idx] = samplers[idx]; + if(samplers[idx]) + priv->sampler[priv->specs.vertex_sampler_offset + idx] = *(struct compiled_sampler_state*)samplers[idx]; + } +} + +static void etna_pipe_delete_sampler_state(struct pipe_context *pipe, void *ss) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + FREE(ss); +} + +static struct pipe_sampler_view *etna_pipe_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templat) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct etna_sampler_view *sv = CALLOC_STRUCT(etna_sampler_view); + sv->base = *templat; + sv->base.context = pipe; + sv->base.texture = 0; + pipe_resource_reference(&sv->base.texture, texture); + sv->base.texture = texture; + assert(sv->base.texture); + + struct compiled_sampler_view *cs = CALLOC_STRUCT(compiled_sampler_view); + struct etna_resource *res = etna_resource(sv->base.texture); + assert(res != NULL); + + cs->TE_SAMPLER_CONFIG0 = + VIVS_TE_SAMPLER_CONFIG0_TYPE(translate_texture_target(res->base.target, false)) | + VIVS_TE_SAMPLER_CONFIG0_FORMAT(translate_texture_format(sv->base.format, false)); + /* merged with sampler state */ + cs->TE_SAMPLER_CONFIG1 = + VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_R(templat->swizzle_r) | + VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_G(templat->swizzle_g) | + VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_B(templat->swizzle_b) | + VIVS_TE_SAMPLER_CONFIG1_SWIZZLE_A(templat->swizzle_a) | + VIVS_TE_SAMPLER_CONFIG1_HALIGN(res->halign); + cs->TE_SAMPLER_SIZE = + VIVS_TE_SAMPLER_SIZE_WIDTH(res->base.width0)| + VIVS_TE_SAMPLER_SIZE_HEIGHT(res->base.height0); + cs->TE_SAMPLER_LOG_SIZE = + VIVS_TE_SAMPLER_LOG_SIZE_WIDTH(etna_log2_fixp55(res->base.width0)) | + VIVS_TE_SAMPLER_LOG_SIZE_HEIGHT(etna_log2_fixp55(res->base.height0)); + /* XXX in principle we only have to define lods sv->first_level .. sv->last_level */ + for(int lod=0; lod<=res->base.last_level; ++lod) + { + cs->TE_SAMPLER_LOD_ADDR[lod] = res->levels[lod].address; + } + cs->min_lod = sv->base.u.tex.first_level << 5; + cs->max_lod = MIN2(sv->base.u.tex.last_level, res->base.last_level) << 5; + + sv->internal = cs; + pipe_reference_init(&sv->base.reference, 1); + return &sv->base; +} + +static void etna_pipe_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + pipe_resource_reference(&view->texture, NULL); + FREE(etna_sampler_view(view)->internal); + FREE(view); +} + + +static void etna_pipe_set_fragment_sampler_views(struct pipe_context *pipe, + unsigned num_views, + struct pipe_sampler_view **info) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + unsigned idx; + priv->dirty_bits |= ETNA_STATE_SAMPLER_VIEWS; + priv->num_fragment_sampler_views = num_views; + for(idx=0; idx<num_views; ++idx) + { + pipe_sampler_view_reference(&priv->sampler_view_s[idx], info[idx]); + if(info[idx]) + priv->sampler_view[idx] = *etna_sampler_view(info[idx])->internal; + } + for(; idx<priv->specs.fragment_sampler_count; ++idx) + { + pipe_sampler_view_reference(&priv->sampler_view_s[idx], NULL); + } +} + +static void etna_pipe_set_vertex_sampler_views(struct pipe_context *pipe, + unsigned num_views, + struct pipe_sampler_view **info) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + unsigned idx; + unsigned offset = priv->specs.vertex_sampler_offset; + priv->dirty_bits |= ETNA_STATE_SAMPLER_VIEWS; + priv->num_vertex_sampler_views = num_views; + for(idx=0; idx<num_views; ++idx) + { + pipe_sampler_view_reference(&priv->sampler_view_s[offset + idx], info[idx]); + if(info[idx]) + priv->sampler_view[offset + idx] = *etna_sampler_view(info[idx])->internal; + } + for(; idx<priv->specs.vertex_sampler_count; ++idx) + { + pipe_sampler_view_reference(&priv->sampler_view_s[offset + idx], NULL); + } +} + +static void etna_pipe_texture_barrier(struct pipe_context *pipe) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + /* clear color and texture cache to make sure that texture unit reads + * what has been written + */ + etna_set_state(priv->ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_TEXTURE); +} + +void etna_pipe_texture_init(struct pipe_context *pc) +{ + pc->create_sampler_state = etna_pipe_create_sampler_state; + pc->bind_fragment_sampler_states = etna_pipe_bind_fragment_sampler_states; + pc->bind_vertex_sampler_states = etna_pipe_bind_vertex_sampler_states; + /* XXX bind_geometry_sampler_states */ + /* XXX bind_compute_sampler_states */ + pc->delete_sampler_state = etna_pipe_delete_sampler_state; + pc->set_fragment_sampler_views = etna_pipe_set_fragment_sampler_views; + pc->set_vertex_sampler_views = etna_pipe_set_vertex_sampler_views; + /* XXX set_geometry_sampler_views */ + /* XXX set_compute_sampler_views */ + /* XXX set_shader_resources */ + pc->create_sampler_view = etna_pipe_create_sampler_view; + pc->sampler_view_destroy = etna_pipe_sampler_view_destroy; + pc->texture_barrier = etna_pipe_texture_barrier; +} diff --git a/src/gallium/drivers/etna/etna_texture.h b/src/gallium/drivers/etna/etna_texture.h new file mode 100644 index 0000000000..b74ecd148b --- /dev/null +++ b/src/gallium/drivers/etna/etna_texture.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Texture CSOs */ +#ifndef H_ETNA_TEXTURE +#define H_ETNA_TEXTURE + +struct pipe_context; + +void etna_pipe_texture_init(struct pipe_context *pipe); + +#endif diff --git a/src/gallium/drivers/etna/etna_transfer.c b/src/gallium/drivers/etna/etna_transfer.c new file mode 100644 index 0000000000..382f4bb989 --- /dev/null +++ b/src/gallium/drivers/etna/etna_transfer.c @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Pipe memory transfer + */ +#include "etna_transfer.h" +#include "etna_pipe.h" + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_surface.h" +#include "util/u_transfer.h" + +/* Compute offset into a 1D/2D/3D buffer of a certain box. + * This box must be aligned to the block width and height of the underlying format. + */ +static inline size_t etna_compute_offset(enum pipe_format format, const struct pipe_box *box, + size_t stride, size_t layer_stride) +{ + return box->z * layer_stride + + box->y / util_format_get_blockheight(format) * stride + + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); +} + +static void *etna_pipe_transfer_map(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, /* a combination of PIPE_TRANSFER_x */ + const struct pipe_box *box, + struct pipe_transfer **out_transfer) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct etna_transfer *ptrans = util_slab_alloc(&priv->transfer_pool); + struct etna_resource *resource_priv = etna_resource(resource); + enum pipe_format format = resource->format; + if (!ptrans) + return NULL; + assert(level <= resource->last_level); + + /* PIPE_TRANSFER_READ always requires a sync. */ + if(usage & PIPE_TRANSFER_READ) + { + etna_finish(priv->ctx); + } + /* XXX we don't handle PIPE_TRANSFER_FLUSH_EXPLICIT; this flag can be ignored when mapping in-place, + * but when not in place we need to fire off the copy operation in transfer_flush_region (currently + * a no-op) instead of unmap. Need to handle this to support ARB_map_buffer_range extension at least. + */ + /* XXX we don't take care of current operations on the resource; which can be, at some point in the pipeline + which is not yet executed: + + - bound as surface + - bound through vertex buffer + - bound through index buffer + - bound in sampler view + - used in clear_render_target / clear_depth_stencil operation + - used in blit + - used in resource_copy_region + + How do other drivers record this information over course of the rendering pipeline? + Is it necessary at all? Only in case we want to provide a fast path and map the resource directly + (and for PIPE_TRANSFER_MAP_DIRECTLY) and we don't want to force a sync. + We also need to know whether the resource is in use to determine if a sync is needed (or just do it + always, but that comes at the expense of performance). + + A conservative approximation without too much overhead would be to mark all resources that have + been bound at some point as busy. A drawback would be that accessing resources that have + been bound but are no longer in use for a while still carry a performance penalty. On the other hand, + the program could be using PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE or PIPE_TRANSFER_UNSYNCHRONIZED to + avoid this in the first place... + + A) We use an in-pipe copy engine, and queue the copy operation after unmap so that the copy + will be performed when all current commands have been executed. + Using the RS is possible, not sure if always efficient. This can also do any kind of tiling for us. + Only possible when PIPE_TRANSFER_DISCARD_RANGE is set. + B) We discard the entire resource (or at least, the mipmap level) and allocate new memory for it. + Only possible when mapping the entire resource or PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE is set. + */ + + /* No need to allocate a buffer for copying if the resource is not in use, + * and no tiling is needed, can just return a direct pointer. + */ + ptrans->in_place = resource_priv->layout == ETNA_LAYOUT_LINEAR || + (resource_priv->layout == ETNA_LAYOUT_TILED && util_format_is_compressed(resource->format)); + ptrans->base.resource = resource; + ptrans->base.level = level; + ptrans->base.usage = usage; + ptrans->base.box = *box; + + struct etna_resource_level *res_level = &resource_priv->levels[level]; + if(likely(ptrans->in_place)) + { + ptrans->base.stride = res_level->stride; + ptrans->base.layer_stride = res_level->layer_stride; + ptrans->buffer = res_level->logical + etna_compute_offset(resource->format, box, res_level->stride, res_level->layer_stride); + } else { + unsigned divSizeX = util_format_get_blockwidth(format); + unsigned divSizeY = util_format_get_blockheight(format); + if(usage & PIPE_TRANSFER_MAP_DIRECTLY) + { + /* No in-place transfer possible */ + util_slab_free(&priv->transfer_pool, ptrans); + return NULL; + } + + ptrans->base.stride = align(box->width, divSizeX) * util_format_get_blocksize(format); /* row stride in bytes */ + ptrans->base.layer_stride = align(box->height, divSizeY) * ptrans->base.stride; + size_t size = ptrans->base.layer_stride * box->depth; + ptrans->buffer = MALLOC(size); + + if(usage & PIPE_TRANSFER_READ) + { + /* untile or copy resource for reading */ + if(resource_priv->layout == ETNA_LAYOUT_LINEAR || resource_priv->layout == ETNA_LAYOUT_TILED) + { + if(resource_priv->layout == ETNA_LAYOUT_TILED && !util_format_is_compressed(resource_priv->base.format)) + { + etna_texture_untile(ptrans->buffer, res_level->logical, + ptrans->base.box.x, ptrans->base.box.y, res_level->stride, + ptrans->base.box.width, ptrans->base.box.height, ptrans->base.stride, + util_format_get_blocksize(resource_priv->base.format)); + } else { /* non-tiled or compressed format */ + util_copy_box(ptrans->buffer, + resource_priv->base.format, + ptrans->base.stride, ptrans->base.layer_stride, + 0, 0, 0, /* dst x,y,z */ + ptrans->base.box.width, ptrans->base.box.height, ptrans->base.box.depth, + res_level->logical, + res_level->stride, res_level->layer_stride, + ptrans->base.box.x, ptrans->base.box.y, ptrans->base.box.z); + } + } else /* TODO supertiling */ + { + printf("etna_pipe_transfer_map: unsupported tiling %i for reading\n", resource_priv->layout); + } + } + } + + *out_transfer = &ptrans->base; + return ptrans->buffer; +} + +static void etna_pipe_transfer_flush_region(struct pipe_context *pipe, + struct pipe_transfer *transfer_, + const struct pipe_box *box) +{ + /* NOOP for now */ +} + +static void etna_pipe_transfer_unmap(struct pipe_context *pipe, + struct pipe_transfer *transfer_) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct etna_transfer *ptrans = etna_transfer(transfer_); + + /* XXX + * When writing to a resource that is already in use, replace the resource with a completely new buffer + * and free the old one using a fenced free. + * The most tricky case to implement will be: tiled or supertiled surface, partial write, target not aligned to 4/64 + */ + struct etna_resource *resource = etna_resource(ptrans->base.resource); + assert(ptrans->base.level <= resource->base.last_level); + struct etna_resource_level *level = &resource->levels[ptrans->base.level]; + + if(ptrans->base.usage & PIPE_TRANSFER_WRITE) + { + /* write back */ + if(unlikely(!ptrans->in_place)) + { + if(resource->layout == ETNA_LAYOUT_LINEAR || resource->layout == ETNA_LAYOUT_TILED) + { + if(resource->layout == ETNA_LAYOUT_TILED && !util_format_is_compressed(resource->base.format)) + { + etna_texture_tile(level->logical, ptrans->buffer, + ptrans->base.box.x, ptrans->base.box.y, level->stride, + ptrans->base.box.width, ptrans->base.box.height, ptrans->base.stride, + util_format_get_blocksize(resource->base.format)); + } else { /* non-tiled or compressed format */ + util_copy_box(level->logical, + resource->base.format, + level->stride, level->layer_stride, + ptrans->base.box.x, ptrans->base.box.y, ptrans->base.box.z, + ptrans->base.box.width, ptrans->base.box.height, ptrans->base.box.depth, + ptrans->buffer, + ptrans->base.stride, ptrans->base.layer_stride, + 0, 0, 0 /* src x,y,z */); + } + } else + { + printf("etna_pipe_transfer_unmap: unsupported tiling %i\n", resource->layout); + } + FREE(ptrans->buffer); + } + if(resource->base.bind & PIPE_BIND_SAMPLER_VIEW) + { + /* XXX do we need to flush the CPU cache too or start a write barrier + * to make sure the GPU sees it? */ + priv->dirty_bits |= ETNA_STATE_TEXTURE_CACHES; + } + } + + util_slab_free(&priv->transfer_pool, ptrans); +} + +void etna_pipe_transfer_init(struct pipe_context *pc) +{ + struct etna_pipe_context *priv = etna_pipe_context(pc); + + pc->transfer_map = etna_pipe_transfer_map; + pc->transfer_flush_region = etna_pipe_transfer_flush_region; + pc->transfer_unmap = etna_pipe_transfer_unmap; + pc->transfer_inline_write = u_default_transfer_inline_write; + + util_slab_create(&priv->transfer_pool, sizeof(struct etna_transfer), + 16, UTIL_SLAB_SINGLETHREADED); +} + +void etna_pipe_transfer_destroy(struct pipe_context *pc) +{ + struct etna_pipe_context *priv = etna_pipe_context(pc); + util_slab_destroy(&priv->transfer_pool); +} + diff --git a/src/gallium/drivers/etna/etna_transfer.h b/src/gallium/drivers/etna/etna_transfer.h new file mode 100644 index 0000000000..1bd1a5de62 --- /dev/null +++ b/src/gallium/drivers/etna/etna_transfer.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Pipe memory transfer + */ +#ifndef H_ETNA_TRANSFER +#define H_ETNA_TRANSFER + +#include "pipe/p_state.h" + +void etna_pipe_transfer_init(struct pipe_context *pipe); + +void etna_pipe_transfer_destroy(struct pipe_context *pc); + +#endif + diff --git a/src/gallium/drivers/etna/etna_translate.h b/src/gallium/drivers/etna/etna_translate.h new file mode 100644 index 0000000000..2ef2f2cc87 --- /dev/null +++ b/src/gallium/drivers/etna/etna_translate.h @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* inlined translation functions between gallium and vivante */ +#ifndef H_TRANSLATE +#define H_TRANSLATE + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" + +#include <etnaviv/common.xml.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> +#include <etnaviv/cmdstream.xml.h> +#include <etnaviv/etna_util.h> +#include <etnaviv/etna_tex.h> +#include "etna_internal.h" +#include "etna_debug.h" + +#include "util/u_format.h" + +#include <stdio.h> + +/* Returned when there is no match of pipe value to etna value */ +#define ETNA_NO_MATCH (~0) + +static inline uint32_t translate_cull_face(unsigned cull_face, unsigned front_ccw) +{ + switch(cull_face) + { + case PIPE_FACE_NONE: return VIVS_PA_CONFIG_CULL_FACE_MODE_OFF; + case PIPE_FACE_BACK: return front_ccw ? VIVS_PA_CONFIG_CULL_FACE_MODE_CW : VIVS_PA_CONFIG_CULL_FACE_MODE_CCW; + case PIPE_FACE_FRONT: return front_ccw ? VIVS_PA_CONFIG_CULL_FACE_MODE_CCW : VIVS_PA_CONFIG_CULL_FACE_MODE_CW; + default: DBG("Unhandled cull face mode %i\n", cull_face); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_polygon_mode(unsigned polygon_mode) +{ + switch(polygon_mode) + { + case PIPE_POLYGON_MODE_FILL: return VIVS_PA_CONFIG_FILL_MODE_SOLID; + case PIPE_POLYGON_MODE_LINE: return VIVS_PA_CONFIG_FILL_MODE_WIREFRAME; + case PIPE_POLYGON_MODE_POINT: return VIVS_PA_CONFIG_FILL_MODE_POINT; + default: DBG("Unhandled polygon mode %i\n", polygon_mode); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_stencil_mode(bool enable_0, bool enable_1) +{ + if(enable_0) + { + return enable_1 ? VIVS_PE_STENCIL_CONFIG_MODE_TWO_SIDED : + VIVS_PE_STENCIL_CONFIG_MODE_ONE_SIDED; + } else { + return VIVS_PE_STENCIL_CONFIG_MODE_DISABLED; + } +} + +static inline uint32_t translate_stencil_op(unsigned stencil_op) +{ + switch(stencil_op) + { + case PIPE_STENCIL_OP_KEEP: return STENCIL_OP_KEEP; + case PIPE_STENCIL_OP_ZERO: return STENCIL_OP_ZERO; + case PIPE_STENCIL_OP_REPLACE: return STENCIL_OP_REPLACE; + case PIPE_STENCIL_OP_INCR: return STENCIL_OP_INCR; + case PIPE_STENCIL_OP_DECR: return STENCIL_OP_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: return STENCIL_OP_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: return STENCIL_OP_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: return STENCIL_OP_INVERT; + default: DBG("Unhandled stencil op: %i\n", stencil_op); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_blend(unsigned blend) +{ + switch(blend) + { + case PIPE_BLEND_ADD: return BLEND_EQ_ADD; + case PIPE_BLEND_SUBTRACT: return BLEND_EQ_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: return BLEND_EQ_REVERSE_SUBTRACT; + case PIPE_BLEND_MIN: return BLEND_EQ_MIN; + case PIPE_BLEND_MAX: return BLEND_EQ_MAX; + default: DBG("Unhandled blend: %i\n", blend); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_blend_factor(unsigned blend_factor) +{ + switch(blend_factor) + { + case PIPE_BLENDFACTOR_ONE: return BLEND_FUNC_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: return BLEND_FUNC_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: return BLEND_FUNC_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: return BLEND_FUNC_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: return BLEND_FUNC_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BLEND_FUNC_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: return BLEND_FUNC_CONSTANT_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: return BLEND_FUNC_CONSTANT_ALPHA; + case PIPE_BLENDFACTOR_ZERO: return BLEND_FUNC_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BLEND_FUNC_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BLEND_FUNC_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BLEND_FUNC_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return BLEND_FUNC_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BLEND_FUNC_ONE_MINUS_CONSTANT_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BLEND_FUNC_ONE_MINUS_CONSTANT_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: DBG("Unhandled blend factor: %i\n", blend_factor); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_texture_wrapmode(unsigned wrap) +{ + switch(wrap) + { + case PIPE_TEX_WRAP_REPEAT: return TEXTURE_WRAPMODE_REPEAT; + case PIPE_TEX_WRAP_CLAMP: return TEXTURE_WRAPMODE_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return TEXTURE_WRAPMODE_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return TEXTURE_WRAPMODE_CLAMP_TO_EDGE; /* XXX */ + case PIPE_TEX_WRAP_MIRROR_REPEAT: return TEXTURE_WRAPMODE_MIRRORED_REPEAT; + case PIPE_TEX_WRAP_MIRROR_CLAMP: return TEXTURE_WRAPMODE_MIRRORED_REPEAT; /* XXX */ + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return TEXTURE_WRAPMODE_MIRRORED_REPEAT; /* XXX */ + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return TEXTURE_WRAPMODE_MIRRORED_REPEAT; /* XXX */ + default: DBG("Unhandled texture wrapmode: %i\n", wrap); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_texture_mipfilter(unsigned filter) +{ + switch(filter) + { + case PIPE_TEX_MIPFILTER_NEAREST: return TEXTURE_FILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: return TEXTURE_FILTER_LINEAR; + case PIPE_TEX_MIPFILTER_NONE: return TEXTURE_FILTER_NONE; + default: DBG("Unhandled texture mipfilter: %i\n", filter); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_texture_filter(unsigned filter) +{ + switch(filter) + { + case PIPE_TEX_FILTER_NEAREST: return TEXTURE_FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: return TEXTURE_FILTER_LINEAR; + /* What about anisotropic? */ + default: DBG("Unhandled texture filter: %i\n", filter); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_texture_format(enum pipe_format fmt, bool silent) +{ + switch(fmt) /* XXX with TEXTURE_FORMAT_EXT and swizzle on newer chips we can support much more */ + { + /* Note: Pipe format convention is LSB to MSB, VIVS is MSB to LSB */ + case PIPE_FORMAT_A8_UNORM: return TEXTURE_FORMAT_A8; + case PIPE_FORMAT_L8_UNORM: return TEXTURE_FORMAT_L8; + case PIPE_FORMAT_I8_UNORM: return TEXTURE_FORMAT_I8; + case PIPE_FORMAT_L8A8_UNORM: return TEXTURE_FORMAT_A8L8; + case PIPE_FORMAT_B4G4R4A4_UNORM: return TEXTURE_FORMAT_A4R4G4B4; + case PIPE_FORMAT_B4G4R4X4_UNORM: return TEXTURE_FORMAT_X4R4G4B4; + case PIPE_FORMAT_B8G8R8A8_UNORM: return TEXTURE_FORMAT_A8R8G8B8; + case PIPE_FORMAT_B8G8R8X8_UNORM: return TEXTURE_FORMAT_X8R8G8B8; + case PIPE_FORMAT_R8G8B8A8_UNORM: return TEXTURE_FORMAT_A8B8G8R8; + case PIPE_FORMAT_R8G8B8X8_UNORM: return TEXTURE_FORMAT_X8B8G8R8; + case PIPE_FORMAT_B5G6R5_UNORM: return TEXTURE_FORMAT_R5G6B5; + case PIPE_FORMAT_B5G5R5A1_UNORM: return TEXTURE_FORMAT_A1R5G5B5; + case PIPE_FORMAT_B5G5R5X1_UNORM: return TEXTURE_FORMAT_X1R5G5B5; + case PIPE_FORMAT_YUYV: return TEXTURE_FORMAT_YUY2; + case PIPE_FORMAT_UYVY: return TEXTURE_FORMAT_UYVY; + case PIPE_FORMAT_Z16_UNORM: return TEXTURE_FORMAT_D16; + case PIPE_FORMAT_X8Z24_UNORM: return TEXTURE_FORMAT_D24S8; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: return TEXTURE_FORMAT_D24S8; + case PIPE_FORMAT_DXT1_RGB: return TEXTURE_FORMAT_DXT1; + case PIPE_FORMAT_DXT1_RGBA: return TEXTURE_FORMAT_DXT1; + case PIPE_FORMAT_DXT3_RGBA: return TEXTURE_FORMAT_DXT2_DXT3; + case PIPE_FORMAT_DXT5_RGBA: return TEXTURE_FORMAT_DXT4_DXT5; + case PIPE_FORMAT_ETC1_RGB8: return TEXTURE_FORMAT_ETC1; + default: if(!silent) { DBG("Unhandled texture format: %i\n", fmt); } return ETNA_NO_MATCH; + } +} + +/* render target format (non-rb swapped RS-supported formats) */ +static inline uint32_t translate_rt_format(enum pipe_format fmt, bool silent) +{ + switch(fmt) + { + /* Note: Pipe format convention is LSB to MSB, VIVS is MSB to LSB */ + case PIPE_FORMAT_B4G4R4X4_UNORM: return RS_FORMAT_X4R4G4B4; + case PIPE_FORMAT_B4G4R4A4_UNORM: return RS_FORMAT_A4R4G4B4; + case PIPE_FORMAT_B5G5R5X1_UNORM: return RS_FORMAT_X1R5G5B5; + case PIPE_FORMAT_B5G5R5A1_UNORM: return RS_FORMAT_A1R5G5B5; + case PIPE_FORMAT_B5G6R5_UNORM: return RS_FORMAT_R5G6B5; + case PIPE_FORMAT_B8G8R8X8_UNORM: return RS_FORMAT_X8R8G8B8; + case PIPE_FORMAT_B8G8R8A8_UNORM: return RS_FORMAT_A8R8G8B8; + case PIPE_FORMAT_YUYV: return RS_FORMAT_YUY2; + default: if(!silent) { DBG("Unhandled rs surface format: %i\n", fmt); } return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_depth_format(enum pipe_format fmt, bool silent) +{ + switch(fmt) + { + /* Note: Pipe format convention is LSB to MSB, VIVS is MSB to LSB */ + case PIPE_FORMAT_Z16_UNORM: return VIVS_PE_DEPTH_CONFIG_DEPTH_FORMAT_D16; + case PIPE_FORMAT_X8Z24_UNORM: return VIVS_PE_DEPTH_CONFIG_DEPTH_FORMAT_D24S8; + case PIPE_FORMAT_S8_UINT_Z24_UNORM: return VIVS_PE_DEPTH_CONFIG_DEPTH_FORMAT_D24S8; + default: if(!silent) { DBG("Unhandled depth format: %i\n", fmt); } return ETNA_NO_MATCH; + } +} + +/* render target format for MSAA */ +static inline uint32_t translate_msaa_format(enum pipe_format fmt, bool silent) +{ + switch(fmt) + { + /* Note: Pipe format convention is LSB to MSB, VIVS is MSB to LSB */ + case PIPE_FORMAT_B4G4R4X4_UNORM: return VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A4R4G4B4; + case PIPE_FORMAT_B4G4R4A4_UNORM: return VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A4R4G4B4; + case PIPE_FORMAT_B5G5R5X1_UNORM: return VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A1R5G5B5; + case PIPE_FORMAT_B5G5R5A1_UNORM: return VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A1R5G5B5; + case PIPE_FORMAT_B5G6R5_UNORM: return VIVS_TS_MEM_CONFIG_MSAA_FORMAT_R5G6B5; + case PIPE_FORMAT_B8G8R8X8_UNORM: return VIVS_TS_MEM_CONFIG_MSAA_FORMAT_X8R8G8B8; + case PIPE_FORMAT_B8G8R8A8_UNORM: return VIVS_TS_MEM_CONFIG_MSAA_FORMAT_A8R8G8B8; + /* MSAA with YUYV not supported */ + default: if(!silent) { DBG("Unhandled msaa surface format: %i\n", fmt); } return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_texture_target(enum pipe_texture_target tgt, bool silent) +{ + switch(tgt) + { + case PIPE_TEXTURE_2D: return TEXTURE_TYPE_2D; + case PIPE_TEXTURE_CUBE: return TEXTURE_TYPE_CUBE_MAP; + default: DBG("Unhandled texture target: %i\n", tgt); return ETNA_NO_MATCH; + } +} + +/* Return type flags for vertex element format */ +static inline uint32_t translate_vertex_format_type(enum pipe_format fmt, bool silent) +{ + switch(fmt) + { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8_USCALED: + case PIPE_FORMAT_R8G8_USCALED: + case PIPE_FORMAT_R8G8B8_USCALED: + case PIPE_FORMAT_R8G8B8A8_USCALED: + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R8G8_UINT: + case PIPE_FORMAT_R8G8B8_UINT: + case PIPE_FORMAT_R8G8B8A8_UINT: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_BYTE; + case PIPE_FORMAT_R8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + case PIPE_FORMAT_R8G8B8_SNORM: + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8_SSCALED: + case PIPE_FORMAT_R8G8_SSCALED: + case PIPE_FORMAT_R8G8B8_SSCALED: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + case PIPE_FORMAT_R8_SINT: + case PIPE_FORMAT_R8G8_SINT: + case PIPE_FORMAT_R8G8B8_SINT: + case PIPE_FORMAT_R8G8B8A8_SINT: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_BYTE; + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_R16G16_UNORM: + case PIPE_FORMAT_R16G16B16_UNORM: + case PIPE_FORMAT_R16G16B16A16_UNORM: + case PIPE_FORMAT_R16_USCALED: + case PIPE_FORMAT_R16G16_USCALED: + case PIPE_FORMAT_R16G16B16_USCALED: + case PIPE_FORMAT_R16G16B16A16_USCALED: + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_R16G16_UINT: + case PIPE_FORMAT_R16G16B16_UINT: + case PIPE_FORMAT_R16G16B16A16_UINT: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_SHORT; + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R16G16_SNORM: + case PIPE_FORMAT_R16G16B16_SNORM: + case PIPE_FORMAT_R16G16B16A16_SNORM: + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + case PIPE_FORMAT_R16_SINT: + case PIPE_FORMAT_R16G16_SINT: + case PIPE_FORMAT_R16G16B16_SINT: + case PIPE_FORMAT_R16G16B16A16_SINT: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_SHORT; + case PIPE_FORMAT_R32_UNORM: + case PIPE_FORMAT_R32G32_UNORM: + case PIPE_FORMAT_R32G32B32_UNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + case PIPE_FORMAT_R32_USCALED: + case PIPE_FORMAT_R32G32_USCALED: + case PIPE_FORMAT_R32G32B32_USCALED: + case PIPE_FORMAT_R32G32B32A32_USCALED: + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_R32G32B32_UINT: + case PIPE_FORMAT_R32G32B32A32_UINT: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_INT; + case PIPE_FORMAT_R32_SNORM: + case PIPE_FORMAT_R32G32_SNORM: + case PIPE_FORMAT_R32G32B32_SNORM: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32_SSCALED: + case PIPE_FORMAT_R32G32_SSCALED: + case PIPE_FORMAT_R32G32B32_SSCALED: + case PIPE_FORMAT_R32G32B32A32_SSCALED: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R32G32B32_SINT: + case PIPE_FORMAT_R32G32B32A32_SINT: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_INT; + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R16G16_FLOAT: + case PIPE_FORMAT_R16G16B16_FLOAT: + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_HALF_FLOAT; + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT; + case PIPE_FORMAT_R32_FIXED: + case PIPE_FORMAT_R32G32_FIXED: + case PIPE_FORMAT_R32G32B32_FIXED: + case PIPE_FORMAT_R32G32B32A32_FIXED: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FIXED; + case PIPE_FORMAT_R10G10B10A2_UNORM: + case PIPE_FORMAT_R10G10B10A2_USCALED: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_INT_10_10_10_2; + case PIPE_FORMAT_R10G10B10A2_SNORM: + case PIPE_FORMAT_R10G10B10A2_SSCALED: + return VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_INT_10_10_10_2; + default: if(!silent) { DBG("Unhandled vertex format: %i", fmt); } return ETNA_NO_MATCH; + } +} + +/* Return normalization flag for vertex element format */ +static inline uint32_t translate_vertex_format_normalize(enum pipe_format fmt) +{ + const struct util_format_description *desc = util_format_description(fmt); + if(!desc) + return VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF; + /* assumes that normalization of channel 0 holds for all channels; + * this holds for all vertex formats that we support */ + return desc->channel[0].normalized ? VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_ON : + VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF; +} + +static inline uint32_t translate_index_size(unsigned index_size) +{ + switch(index_size) + { + case 1: return VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_CHAR; + case 2: return VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_SHORT; + case 4: return VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_INT; + default: DBG("Unhandled index size %i\n", index_size); return ETNA_NO_MATCH; + } +} + +static inline uint32_t translate_draw_mode(unsigned mode) +{ + switch(mode) + { + case PIPE_PRIM_POINTS: return PRIMITIVE_TYPE_POINTS; + case PIPE_PRIM_LINES: return PRIMITIVE_TYPE_LINES; + case PIPE_PRIM_LINE_LOOP: return PRIMITIVE_TYPE_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: return PRIMITIVE_TYPE_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: return PRIMITIVE_TYPE_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: return PRIMITIVE_TYPE_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return PRIMITIVE_TYPE_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return PRIMITIVE_TYPE_QUADS; + default: DBG("Unhandled draw mode primitive %i\n", mode); return ETNA_NO_MATCH; + } +} + +/* Get size multiple for size of texture/rendertarget with a certain layout + * This is affected by many different parameters: + * - A horizontal multiple of 16 is used when possible as in this case tile status and resolve can be used + * at the cost of only a little bit extra memory usage. + * - If the surface is a texture, and HALIGN can not be specified on thie GPU, set tex_no_halign to 1 + * If set, an horizontal multiple of 4 will be used for tiled and linear surfaces, otherwise one of 16. + * - If the surface is supertiled, horizontal and vertical multiple is always 64 + * - If the surface is multi tiled or supertiled, make sure that the vertical size + * is a multiple of the number of pixel pipes as well. + * */ +static inline void etna_layout_multiple(unsigned layout, unsigned pixel_pipes, + bool tex_no_halign, + unsigned *paddingX, unsigned *paddingY, unsigned *halign) +{ + switch(layout) + { + case ETNA_LAYOUT_LINEAR: + *paddingX = tex_no_halign ? 4 : 16; + *paddingY = 1; + *halign = tex_no_halign ? TEXTURE_HALIGN_FOUR : TEXTURE_HALIGN_SIXTEEN; + break; + case ETNA_LAYOUT_TILED: + *paddingX = tex_no_halign ? 4 : 16; + *paddingY = 4; + *halign = tex_no_halign ? TEXTURE_HALIGN_FOUR : TEXTURE_HALIGN_SIXTEEN; + break; + case ETNA_LAYOUT_SUPER_TILED: + *paddingX = 64; + *paddingY = 64; + *halign = TEXTURE_HALIGN_SUPER_TILED; + break; + case ETNA_LAYOUT_MULTI_TILED: + *paddingX = 16; + *paddingY = 4 * pixel_pipes; + *halign = TEXTURE_HALIGN_SPLIT_TILED; + break; + case ETNA_LAYOUT_MULTI_SUPERTILED: + *paddingX = 64; + *paddingY = 64 * pixel_pipes; + *halign = TEXTURE_HALIGN_SPLIT_SUPER_TILED; + break; + default: DBG("Unhandled layout %i\n", layout); + } +} + +/* return 32-bit clear pattern for color */ +static inline uint32_t translate_clear_color(enum pipe_format format, const union pipe_color_union *color) +{ + uint32_t clear_value = 0; + switch(format) // XXX util_pack_color + { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + clear_value = etna_cfloat_to_uintN(color->f[2], 8) | + (etna_cfloat_to_uintN(color->f[1], 8) << 8) | + (etna_cfloat_to_uintN(color->f[0], 8) << 16) | + (etna_cfloat_to_uintN(color->f[3], 8) << 24); + break; + case PIPE_FORMAT_B4G4R4X4_UNORM: + case PIPE_FORMAT_B4G4R4A4_UNORM: + clear_value = etna_cfloat_to_uintN(color->f[2], 4) | + (etna_cfloat_to_uintN(color->f[1], 4) << 4) | + (etna_cfloat_to_uintN(color->f[0], 4) << 8) | + (etna_cfloat_to_uintN(color->f[3], 4) << 12); + clear_value |= clear_value << 16; + break; + case PIPE_FORMAT_B5G5R5X1_UNORM: + case PIPE_FORMAT_B5G5R5A1_UNORM: + clear_value = etna_cfloat_to_uintN(color->f[2], 5) | + (etna_cfloat_to_uintN(color->f[1], 5) << 5) | + (etna_cfloat_to_uintN(color->f[0], 5) << 10) | + (etna_cfloat_to_uintN(color->f[3], 1) << 15); + clear_value |= clear_value << 16; + break; + case PIPE_FORMAT_B5G6R5_UNORM: + clear_value = etna_cfloat_to_uintN(color->f[2], 5) | + (etna_cfloat_to_uintN(color->f[1], 6) << 5) | + (etna_cfloat_to_uintN(color->f[0], 5) << 11); + clear_value |= clear_value << 16; + break; + default: + DBG("Unhandled pipe format for color clear: %i\n", format); + } + return clear_value; +} + +static inline uint32_t translate_clear_depth_stencil(enum pipe_format format, float depth, unsigned stencil) +{ + uint32_t clear_value = 0; + switch(format) // XXX util_pack_color + { + case PIPE_FORMAT_Z16_UNORM: + clear_value = etna_cfloat_to_uintN(depth, 16); + clear_value |= clear_value << 16; + break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + clear_value = (etna_cfloat_to_uintN(depth, 24) << 8) | (stencil & 0xFF); + break; + default: + DBG("Unhandled pipe format for depth stencil clear: %i\n", format); + } + return clear_value; +} + +/* Convert MSAA number of samples to x and y scaling factor and VIVS_GL_MULTI_SAMPLE_CONFIG value. + * Return true if supported and false otherwise. + */ +static inline bool translate_samples_to_xyscale(int num_samples, int *xscale_out, int *yscale_out, uint32_t *config_out) +{ + int xscale, yscale; + uint32_t config; + switch(num_samples) + { + case 0: + case 1: + xscale = 1; + yscale = 1; + config = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_NONE; + break; + case 2: + xscale = 2; + yscale = 1; + config = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_2X; + break; + case 4: + xscale = 2; + yscale = 2; + config = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_4X; + break; + default: + return false; + } + if(xscale_out) + *xscale_out = xscale; + if(yscale_out) + *yscale_out = yscale; + if(config_out) + *config_out = config; + return true; +} + +#endif + diff --git a/src/gallium/drivers/etna/etna_zsa.c b/src/gallium/drivers/etna/etna_zsa.c new file mode 100644 index 0000000000..17ae0c6310 --- /dev/null +++ b/src/gallium/drivers/etna/etna_zsa.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Depth stencil alpha CSOs */ +#include "etna_zsa.h" + +#include "etna_internal.h" +#include "etna_pipe.h" +#include "etna_translate.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include <etnaviv/common.xml.h> +#include <etnaviv/state.xml.h> +#include <etnaviv/state_3d.xml.h> + +static void *etna_pipe_create_depth_stencil_alpha_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *dsa_p) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + struct compiled_depth_stencil_alpha_state *cs = CALLOC_STRUCT(compiled_depth_stencil_alpha_state); + struct pipe_depth_stencil_alpha_state dsa = *dsa_p; + /* XXX does stencil[0] / stencil[1] order depend on rs->front_ccw? */ + bool early_z = true; + bool disable_zs = !dsa.depth.writemask; + int i; + + /* Set operations to KEEP if write mask is 0. + * When we don't do this, the depth buffer is written for the entire primitive instead of + * just where the stencil condition holds (GC600 rev 0x0019, without feature CORRECT_STENCIL). + * Not sure if this is a hardware bug or just a strange edge case. + */ + for(i=0; i<2; ++i) + { + if(dsa.stencil[i].writemask == 0) + { + dsa.stencil[i].fail_op = dsa.stencil[i].zfail_op = dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + } + } + + /* Determine whether to enable early z reject. Don't enable it when any of + * the stencil-modifying functions is used. */ + if(dsa.stencil[0].enabled) + { + if(dsa.stencil[0].fail_op != PIPE_STENCIL_OP_KEEP || + dsa.stencil[0].zfail_op != PIPE_STENCIL_OP_KEEP || + dsa.stencil[0].zpass_op != PIPE_STENCIL_OP_KEEP) + { + disable_zs = early_z = false; + } + else if(dsa.stencil[1].enabled) + { + if(dsa.stencil[1].fail_op != PIPE_STENCIL_OP_KEEP || + dsa.stencil[1].zfail_op != PIPE_STENCIL_OP_KEEP || + dsa.stencil[1].zpass_op != PIPE_STENCIL_OP_KEEP) + { + disable_zs = early_z = false; + } + } + } + /* compare funcs have 1 to 1 mapping */ + cs->PE_DEPTH_CONFIG = + VIVS_PE_DEPTH_CONFIG_DEPTH_FUNC(dsa.depth.enabled ? dsa.depth.func : PIPE_FUNC_ALWAYS) | + (dsa.depth.writemask ? VIVS_PE_DEPTH_CONFIG_WRITE_ENABLE : 0) | + (early_z ? VIVS_PE_DEPTH_CONFIG_EARLY_Z : 0) | + (disable_zs ? VIVS_PE_DEPTH_CONFIG_DISABLE_ZS : 0); + cs->PE_ALPHA_OP = + (dsa.alpha.enabled ? VIVS_PE_ALPHA_OP_ALPHA_TEST : 0) | + VIVS_PE_ALPHA_OP_ALPHA_FUNC(dsa.alpha.func) | + VIVS_PE_ALPHA_OP_ALPHA_REF(etna_cfloat_to_uint8(dsa.alpha.ref_value)); + cs->PE_STENCIL_OP = + VIVS_PE_STENCIL_OP_FUNC_FRONT(dsa.stencil[0].func) | + VIVS_PE_STENCIL_OP_FUNC_BACK(dsa.stencil[1].func) | + VIVS_PE_STENCIL_OP_FAIL_FRONT(translate_stencil_op(dsa.stencil[0].fail_op)) | + VIVS_PE_STENCIL_OP_FAIL_BACK(translate_stencil_op(dsa.stencil[1].fail_op)) | + VIVS_PE_STENCIL_OP_DEPTH_FAIL_FRONT(translate_stencil_op(dsa.stencil[0].zfail_op)) | + VIVS_PE_STENCIL_OP_DEPTH_FAIL_BACK(translate_stencil_op(dsa.stencil[1].zfail_op)) | + VIVS_PE_STENCIL_OP_PASS_FRONT(translate_stencil_op(dsa.stencil[0].zpass_op)) | + VIVS_PE_STENCIL_OP_PASS_BACK(translate_stencil_op(dsa.stencil[1].zpass_op)); + cs->PE_STENCIL_CONFIG = + translate_stencil_mode(dsa.stencil[0].enabled, dsa.stencil[1].enabled) | + VIVS_PE_STENCIL_CONFIG_MASK_FRONT(dsa.stencil[0].valuemask) | + VIVS_PE_STENCIL_CONFIG_WRITE_MASK(dsa.stencil[0].writemask); + /* XXX back masks in VIVS_PE_DEPTH_CONFIG_EXT? */ + /* XXX VIVS_PE_STENCIL_CONFIG_REF_FRONT comes from pipe_stencil_ref */ + + /* XXX does alpha/stencil test affect PE_COLOR_FORMAT_OVERWRITE? */ + return cs; +} + +static void etna_pipe_bind_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) +{ + struct etna_pipe_context *priv = etna_pipe_context(pipe); + priv->dirty_bits |= ETNA_STATE_DSA; + priv->depth_stencil_alpha_p = dsa; + if(dsa) + priv->depth_stencil_alpha = *(struct compiled_depth_stencil_alpha_state*)dsa; +} + +static void etna_pipe_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) +{ + //struct etna_pipe_context *priv = etna_pipe_context(pipe); + FREE(dsa); +} + +void etna_pipe_zsa_init(struct pipe_context *pc) +{ + pc->create_depth_stencil_alpha_state = etna_pipe_create_depth_stencil_alpha_state; + pc->bind_depth_stencil_alpha_state = etna_pipe_bind_depth_stencil_alpha_state; + pc->delete_depth_stencil_alpha_state = etna_pipe_delete_depth_stencil_alpha_state; +} diff --git a/src/gallium/drivers/etna/etna_zsa.h b/src/gallium/drivers/etna/etna_zsa.h new file mode 100644 index 0000000000..ed42f8851b --- /dev/null +++ b/src/gallium/drivers/etna/etna_zsa.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012-2013 Etnaviv Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sub license, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +/* Depth stencil alpha CSOs */ +#ifndef H_ETNA_ZSA +#define H_ETNA_ZSA + +struct pipe_context; + +void etna_pipe_zsa_init(struct pipe_context *pipe); + +#endif diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 9436e7e422..3df8c3c2cc 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -557,6 +557,7 @@ enum pipe_cap { PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET = 95, PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET = 96, PIPE_CAP_SAMPLE_SHADING = 97, + PIPE_CAP_MAX_VERTEX_BUFFERS = 98, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) @@ -615,7 +616,9 @@ enum pipe_shader_cap PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS = 18, PIPE_SHADER_CAP_PREFERRED_IR = 19, PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED = 20, - PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS = 21 + PIPE_SHADER_CAP_TGSI_POW_SUPPORTED = 21, + PIPE_SHADER_CAP_TGSI_LRP_SUPPORTED = 22 + PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS = 23 }; /** diff --git a/src/gallium/state_trackers/egl/Makefile.am b/src/gallium/state_trackers/egl/Makefile.am index 26266ed2b6..f3cb028617 100644 --- a/src/gallium/state_trackers/egl/Makefile.am +++ b/src/gallium/state_trackers/egl/Makefile.am @@ -80,6 +80,7 @@ if HAVE_EGL_PLATFORM_FBDEV libegl_la_SOURCES += $(fbdev_FILES) AM_CPPFLAGS += \ -I$(top_srcdir)/src/gallium/winsys/sw \ + -I$(top_srcdir)/src/gallium/winsys/etna/fbdev \ -DHAVE_FBDEV_BACKEND endif diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c index b5c7b81a69..f02f723086 100644 --- a/src/gallium/state_trackers/egl/fbdev/native_fbdev.c +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.c @@ -46,15 +46,27 @@ #include <sys/stat.h> #include <fcntl.h> #include <linux/fb.h> +#include <stdio.h> #include "pipe/p_screen.h" #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_pointer.h" +#include "os/os_thread.h" #include "common/native.h" #include "common/native_helper.h" -#include "fbdev/fbdev_sw_winsys.h" + +#include "fbdev/native_fbdev.h" + +#define ETNA /* Use ETNA instead of swrast driver */ + +#include "fbdev/native_fbdev_swrast.h" +#ifdef ETNA +#include "etna_fbdev_public.h" +#endif + +#define FBDEV_MAX_BUFFERS (2) /* double buffering */ struct fbdev_display { struct native_display base; @@ -65,6 +77,7 @@ struct fbdev_display { struct fb_fix_screeninfo finfo; struct fb_var_screeninfo config_vinfo; struct native_config config; + struct native_fbdev_screen *fbdev_screen; boolean assume_fixed_vinfo; }; @@ -75,10 +88,22 @@ struct fbdev_surface { struct fbdev_display *fbdpy; struct resource_surface *rsurf; int width, height; + int num_buffers; + int swap_interval; + struct fb_var_screeninfo vinfo; unsigned int sequence_number; - struct fbdev_sw_drawable drawable; + /* For Android-style double/triple buffering */ + volatile bool terminate; /* terminate flag for buffer swap thread */ + int buffer_head; /* next buffer to be shown */ + int buffer_tail; /* next buffer to be posted */ + int posted_buffers; /* number of posted buffers */ + pipe_thread bswap_thread; + pipe_mutex buffer_mutex; + pipe_condvar buffer_available; /* condition if buffer available for use */ + pipe_condvar buffer_posted; /* condition if buffer posted */ + void *drawable[FBDEV_MAX_BUFFERS]; /* drawable for buffers */ }; static INLINE struct fbdev_display * @@ -115,7 +140,7 @@ fbdev_surface_validate(struct native_surface *nsurf, uint attachment_mask, return TRUE; } -static enum pipe_format +enum pipe_format vinfo_to_format(const struct fb_var_screeninfo *vinfo) { enum pipe_format format = PIPE_FORMAT_NONE; @@ -136,6 +161,12 @@ vinfo_to_format(const struct fb_var_screeninfo *vinfo) vinfo->blue.length == 5 && vinfo->transp.length == 0) format = PIPE_FORMAT_B5G6R5_UNORM; + if (vinfo->red.length == 5 && + vinfo->green.length == 5 && + vinfo->blue.length == 5) { + format = (vinfo->transp.length == 1) ? + PIPE_FORMAT_B5G5R5A1_UNORM : PIPE_FORMAT_B5G5R5X1_UNORM; + } break; default: break; @@ -144,41 +175,138 @@ vinfo_to_format(const struct fb_var_screeninfo *vinfo) return format; } -static boolean -fbdev_surface_update_drawable(struct native_surface *nsurf, - const struct fb_var_screeninfo *vinfo) +/* Set currently visible buffer id */ +static int fbdev_set_buffer(struct fbdev_surface *fbsurf, int buffer) { - struct fbdev_surface *fbsurf = fbdev_surface(nsurf); - unsigned x, y, width, height; - - x = vinfo->xoffset; - y = vinfo->yoffset; - width = MIN2(vinfo->xres, fbsurf->width); - height = MIN2(vinfo->yres, fbsurf->height); - - /* sanitize the values */ - if (x + width > vinfo->xres_virtual) { - if (x > vinfo->xres_virtual) - width = 0; - else - width = vinfo->xres_virtual - x; - } - if (y + height > vinfo->yres_virtual) { - if (y > vinfo->yres_virtual) - height = 0; - else - height = vinfo->yres_virtual - y; - } + assert(buffer < fbsurf->num_buffers); + /* Is this supposed to wait for vblank or just postpone the operation asynchronously? + * This assumes the former. + */ + fbsurf->vinfo.activate = fbsurf->swap_interval ? FB_ACTIVATE_VBL : FB_ACTIVATE_NOW; + fbsurf->vinfo.yoffset = buffer * fbsurf->height; + /* Pan framebuffer in y direction. + * Android uses FBIOPUT_VSCREENINFO for this; however on some hardware this does a + * reconfiguration of the DC every time it is called which causes flicker and slowness. + * On the other hand, FBIOPAN_DISPLAY causes a smooth scroll on some hardware, + * according to the Android rationale. Choose the least of both evils. + */ + if (ioctl(fbsurf->fbdpy->fd, FBIOPAN_DISPLAY, &fbsurf->vinfo)) + { + printf("Error: failed to run ioctl to pan display: %s\n", strerror(errno)); + return errno; + } + return 0; +} - fbsurf->drawable.format = vinfo_to_format(vinfo); - fbsurf->drawable.x = vinfo->xoffset; - fbsurf->drawable.y = vinfo->yoffset; - fbsurf->drawable.width = vinfo->xres; - fbsurf->drawable.height = vinfo->yres; +static PIPE_THREAD_ROUTINE(fbdev_bswap_thread, param) +{ + struct fbdev_surface *fbsurf = (struct fbdev_surface*)param; + struct pipe_screen *screen = fbsurf->fbdpy->base.screen; + struct native_fbdev_screen *fbdev_screen = fbsurf->fbdpy->fbdev_screen; + while(!fbsurf->terminate) + { + int cur; + struct pipe_fence_handle *fence; + /* unqueue buffer */ + pipe_mutex_lock(fbsurf->buffer_mutex); + while(fbsurf->posted_buffers == 0 && !fbsurf->terminate) + { + pipe_condvar_wait(fbsurf->buffer_posted, fbsurf->buffer_mutex); + } + cur = fbsurf->buffer_head; + pipe_mutex_unlock(fbsurf->buffer_mutex); + + if(fbsurf->terminate) + break; + /* wait for buffer fence */ + fence = fbdev_screen->get_drawable_fence(fbdev_screen, fbsurf->drawable[cur]); + if(fence) + screen->fence_finish(screen, fence, PIPE_TIMEOUT_INFINITE); + /* switch to buffer */ + fbdev_set_buffer(fbsurf, cur); + /* notify that previously visible buffer is available again */ + pipe_mutex_lock(fbsurf->buffer_mutex); + fbsurf->posted_buffers -= 1; + fbsurf->buffer_head = (fbsurf->buffer_head + 1) % fbsurf->num_buffers; + pipe_condvar_signal(fbsurf->buffer_available); + pipe_mutex_unlock(fbsurf->buffer_mutex); + } + return NULL; +} + +static void fbdev_destroy_buffers(struct fbdev_surface *fbsurf) +{ + struct native_fbdev_screen *fbdev_screen = fbsurf->fbdpy->fbdev_screen; + int buf; + /* Terminate buffer swap thread, wait for it to exit */ + pipe_mutex_lock(fbsurf->buffer_mutex); + fbsurf->terminate = 1; + pipe_condvar_signal(fbsurf->buffer_posted); + pipe_mutex_unlock(fbsurf->buffer_mutex); + pipe_thread_wait(fbsurf->bswap_thread); + pipe_thread_destroy(fbsurf->bswap_thread); + + /* Clean up synchronization primitives */ + pipe_condvar_destroy(fbsurf->buffer_posted); + pipe_condvar_destroy(fbsurf->buffer_available); + pipe_mutex_destroy(fbsurf->buffer_mutex); + for(buf=0; buf<fbsurf->num_buffers; ++buf) + fbdev_screen->destroy_drawable(fbdev_screen, fbsurf->drawable[buf]); + fbsurf->num_buffers = 0; +} - return (fbsurf->drawable.format != PIPE_FORMAT_NONE && - fbsurf->drawable.width && - fbsurf->drawable.height); +static bool fbdev_create_buffers(struct fbdev_surface *fbsurf, const struct fb_var_screeninfo *vinfo) +{ + int buf; + struct fbdev_display *fbdpy = fbsurf->fbdpy; + struct native_fbdev_screen *fbdev_screen = fbsurf->fbdpy->fbdev_screen; + bool fail = false; + if(fbsurf->num_buffers) /* if buffers already exist, destroy and recreate */ + fbdev_destroy_buffers(fbsurf); + /* By default, use maximum number of buffers possible given screen mode */ + fbsurf->num_buffers = MIN2(vinfo->yres_virtual / vinfo->yres, FBDEV_MAX_BUFFERS); + /* Allow overriding number of buffers to a lower amount from command line */ + if(getenv("EGL_FBDEV_BUFFERS")) + { + int requested_buffers = atoi(getenv("EGL_FBDEV_BUFFERS")); + if(requested_buffers > 0) + fbsurf->num_buffers = MIN2(fbsurf->num_buffers, requested_buffers); + } + printf("native_fbdev: %i buffers of %ix%i\n", + fbsurf->num_buffers, fbsurf->width, fbsurf->height); + if(fbsurf->num_buffers > 1) /* double or more buffered */ + { + for(buf=0; buf<fbsurf->num_buffers; ++buf) + { + fbsurf->drawable[buf] = fbdev_screen->create_drawable(fbdev_screen, + fbdpy->fd, /* want_fence */ true, + 0, vinfo->yres*buf, vinfo->xres, vinfo->yres); + if(fbsurf->drawable[buf] == NULL) + fail = true; + } + } else /* single buffer, at current virtual x/y offset */ + { + fbsurf->drawable[0] = fbdev_screen->create_drawable(fbdev_screen, + fbdpy->fd, /* want_fence */ false, + vinfo->xoffset, vinfo->yoffset, vinfo->xres, vinfo->yres); + if(fbsurf->drawable[0] == NULL) + fail = true; + } + if(fail) + { + for(buf=0; buf<fbsurf->num_buffers; ++buf) + fbdev_screen->destroy_drawable(fbdev_screen, fbsurf->drawable[buf]); + return false; + } + fbsurf->terminate = 0; + fbsurf->buffer_head = 0; + fbsurf->buffer_tail = 0; + fbsurf->posted_buffers = 0; + pipe_mutex_init(fbsurf->buffer_mutex); + pipe_condvar_init(fbsurf->buffer_available); + pipe_condvar_init(fbsurf->buffer_posted); + fbsurf->bswap_thread = pipe_thread_create(fbdev_bswap_thread, fbsurf); + return true; } static boolean @@ -189,8 +317,6 @@ fbdev_surface_present(struct native_surface *nsurf, struct fbdev_display *fbdpy = fbsurf->fbdpy; boolean ret = FALSE; - if (ctrl->swap_interval) - return FALSE; if (ctrl->natt != NATIVE_ATTACHMENT_BACK_LEFT) return FALSE; @@ -201,27 +327,50 @@ fbdev_surface_present(struct native_surface *nsurf, if (ioctl(fbdpy->fd, FBIOGET_VSCREENINFO, &vinfo)) return FALSE; - /* present the surface */ - if (fbdev_surface_update_drawable(&fbsurf->base, &vinfo)) { - ret = resource_surface_present(fbsurf->rsurf, - ctrl->natt, (void *) &fbsurf->drawable); + if (fbsurf->width != vinfo.xres || fbsurf->height != vinfo.yres) + { + fbsurf->width = vinfo.xres; + fbsurf->height = vinfo.yres; + + if(!fbdev_create_buffers(fbsurf, &vinfo)) + return FALSE; + + if (resource_surface_set_size(fbsurf->rsurf, + fbsurf->width, fbsurf->height)) { + /* surface resized */ + fbsurf->sequence_number++; + fbdpy->event_handler->invalid_surface(&fbdpy->base, + &fbsurf->base, fbsurf->sequence_number); + } } + } - fbsurf->width = vinfo.xres; - fbsurf->height = vinfo.yres; - - if (resource_surface_set_size(fbsurf->rsurf, - fbsurf->width, fbsurf->height)) { - /* surface resized */ - fbsurf->sequence_number++; - fbdpy->event_handler->invalid_surface(&fbdpy->base, - &fbsurf->base, fbsurf->sequence_number); + int cur = 0; + fbsurf->swap_interval = ctrl->swap_interval; + if(fbsurf->num_buffers > 1) + { + /* wait for buffer to be available */ + pipe_mutex_lock(fbsurf->buffer_mutex); + while(fbsurf->posted_buffers >= fbsurf->num_buffers-1) + { + pipe_condvar_wait(fbsurf->buffer_available, fbsurf->buffer_mutex); } + cur = fbsurf->buffer_tail; + pipe_mutex_unlock(fbsurf->buffer_mutex); } - else { - /* the drawable never changes */ - ret = resource_surface_present(fbsurf->rsurf, - ctrl->natt, (void *) &fbsurf->drawable); + + /* present */ + ret = resource_surface_present(fbsurf->rsurf, + ctrl->natt, fbsurf->drawable[cur]); + + if(fbsurf->num_buffers > 1) + { + /* post the buffer */ + pipe_mutex_lock(fbsurf->buffer_mutex); + fbsurf->posted_buffers += 1; + fbsurf->buffer_tail = (fbsurf->buffer_tail + 1) % fbsurf->num_buffers; + pipe_condvar_signal(fbsurf->buffer_posted); + pipe_mutex_unlock(fbsurf->buffer_mutex); } return ret; @@ -239,6 +388,7 @@ fbdev_surface_destroy(struct native_surface *nsurf) struct fbdev_surface *fbsurf = fbdev_surface(nsurf); resource_surface_destroy(fbsurf->rsurf); + fbdev_destroy_buffers(fbsurf); FREE(fbsurf); } @@ -260,23 +410,26 @@ fbdev_display_create_window_surface(struct native_display *ndpy, return NULL; fbsurf->fbdpy = fbdpy; + fbsurf->swap_interval = 1; /* get current vinfo */ if (fbdpy->assume_fixed_vinfo) { vinfo = fbdpy->config_vinfo; } else { - memset(&vinfo, 0, sizeof(vinfo)); + memset(&fbsurf->vinfo, 0, sizeof(vinfo)); if (ioctl(fbdpy->fd, FBIOGET_VSCREENINFO, &vinfo)) { FREE(fbsurf); return NULL; } } + /* determine number of buffers */ fbsurf->width = vinfo.xres; fbsurf->height = vinfo.yres; - if (!fbdev_surface_update_drawable(&fbsurf->base, &vinfo)) { + if(!fbdev_create_buffers(fbsurf, &vinfo)) + { FREE(fbsurf); return NULL; } @@ -296,6 +449,7 @@ fbdev_display_create_window_surface(struct native_display *ndpy, fbsurf->base.present = fbdev_surface_present; fbsurf->base.validate = fbdev_surface_validate; fbsurf->base.wait = fbdev_surface_wait; + fbsurf->vinfo = vinfo; return &fbsurf->base; } @@ -393,10 +547,10 @@ fbdev_display_get_param(struct native_display *ndpy, switch (param) { case NATIVE_PARAM_PRESERVE_BUFFER: + case NATIVE_PARAM_MAX_SWAP_INTERVAL: val = 1; break; case NATIVE_PARAM_USE_NATIVE_BUFFER: - case NATIVE_PARAM_MAX_SWAP_INTERVAL: default: val = 0; break; @@ -411,6 +565,9 @@ fbdev_display_destroy(struct native_display *ndpy) struct fbdev_display *fbdpy = fbdev_display(ndpy); ndpy_uninit(&fbdpy->base); + + fbdpy->fbdev_screen->destroy(fbdpy->fbdev_screen); + close(fbdpy->fd); FREE(fbdpy); } @@ -419,24 +576,33 @@ static boolean fbdev_display_init_screen(struct native_display *ndpy) { struct fbdev_display *fbdpy = fbdev_display(ndpy); - struct sw_winsys *ws; - - ws = fbdev_create_sw_winsys(fbdpy->fd); - if (!ws) + const struct native_fbdev_driver *driver = NULL; + const char *driver_name; + + driver_name = os_get_option("EGL_FBDEV_DRIVER"); + if(!driver_name) + driver_name = "etna"; /* XXX probe drivers */ + + if(!strcmp(driver_name, "etna")) + driver = etna_fbdev_get_driver(); + else if(!strcmp(driver_name, "swrast")) + driver = swrast_fbdev_get_driver(); + if(!driver) return FALSE; - fbdpy->base.screen = fbdpy->event_handler->new_sw_screen(&fbdpy->base, ws); - if (!fbdpy->base.screen) { - if (ws->destroy) - ws->destroy(ws); + fbdpy->fbdev_screen = driver->create_screen(driver, fbdpy->fd, &fbdpy->base, fbdpy->event_handler); + if(!fbdpy->fbdev_screen) return FALSE; - } + fbdpy->base.screen = fbdpy->fbdev_screen->screen; if (!fbdpy->base.screen->is_format_supported(fbdpy->base.screen, fbdpy->config.color_format, PIPE_TEXTURE_2D, 0, PIPE_BIND_RENDER_TARGET)) { + fbdpy->fbdev_screen->destroy(fbdpy->fbdev_screen); fbdpy->base.screen->destroy(fbdpy->base.screen); fbdpy->base.screen = NULL; + printf("native_fbdev: color format for screen (%i) not supported by driver\n", fbdpy->config.color_format); + /* XXX try next driver */ return FALSE; } @@ -460,6 +626,8 @@ fbdev_display_init_config(struct native_display *ndpy) nconf->window_bit = TRUE; + printf("fbdev_display succesful\n"); + return TRUE; } diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev.h b/src/gallium/state_trackers/egl/fbdev/native_fbdev.h new file mode 100644 index 0000000000..d02fde1fd7 --- /dev/null +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev.h @@ -0,0 +1,67 @@ +#ifndef __NATIVE_FBDEV_H__ +#define __NATIVE_FBDEV_H__ + +struct native_fbdev_screen; +struct native_event_handler; +struct fb_var_screeninfo; + +/* Framebuffer rendering driver. This top-level structure is used + * for probing the device and creating a screen. + */ +struct native_fbdev_driver +{ + const char *driver_name; + /* Probe for the device. + * @returns true if this device can be used, false otherwise. + */ + bool (*probe)(const struct native_fbdev_driver *driver); + /* Create screen, given framebuffer handle. + * (XXX passing a framebuffer handle is necessary here because sw_winsys needs + * a framebuffer handle on creation, maybe get rid of this) + */ + struct native_fbdev_screen *(*create_screen)( + const struct native_fbdev_driver *driver, + int fd, + struct native_display *display, + const struct native_event_handler *event_handler); +}; + +/* Structure that represents a screen for a fbdev_driver. + */ +struct native_fbdev_screen +{ + /* Get pipe screen associated with this fbdev screen. */ + struct pipe_screen *screen; + + /* Destroy this fbdev_screen. + * This does not destroy the pipe_screen, this is the responsibility + * of the client code. + */ + void (*destroy)(struct native_fbdev_screen *fbdev_screen); + + /* Create window drawable for subset of (the virtual resolution of) a framebuffer. + * This returns a drawable that can be passed to screen->flush_frontbuffer + * or resource_surface_present. + * + * @input fd fb file descriptor. It is possible to provide another + * fd handle than the one this screen was created with, to be able to render to multiple + * framebuffers (or subsets thereof) at once. + */ + void *(*create_drawable)(struct native_fbdev_screen *fbdev_screen, + int fd, bool want_fence, + unsigned xoffset, unsigned yoffset, + unsigned width, unsigned height); + + /* Destroy drawable handle. */ + void (*destroy_drawable)(struct native_fbdev_screen *fbdev_screen, void *drawable); + + /* Get fence handle to track finished rendering and copying of the last + * frame (after calling flush_frontbuffer with the handle of a drawable). + */ + struct pipe_fence_handle *(*get_drawable_fence)(struct native_fbdev_screen *fbdev_screen, void *drawable); +}; + +enum pipe_format +vinfo_to_format(const struct fb_var_screeninfo *vinfo); + +#endif diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev_swrast.c b/src/gallium/state_trackers/egl/fbdev/native_fbdev_swrast.c new file mode 100644 index 0000000000..15f0e3f65d --- /dev/null +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev_swrast.c @@ -0,0 +1,165 @@ +/* + * Mesa 3-D graphics library + * Version: 7.9 + * + * Copyright (C) 2013 Mesa developers + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Wladimir van der Laan <laanwj@gmail.com> + */ + +#include <errno.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <linux/fb.h> +#include <stdio.h> + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_pointer.h" +#include "os/os_thread.h" + +#include "common/native.h" +#include "common/native_helper.h" + +#include "fbdev/native_fbdev.h" +#include "fbdev/native_fbdev_swrast.h" +#include "fbdev/fbdev_sw_winsys.h" + +struct swrast_fbdev_screen +{ + struct native_fbdev_screen base; + int fd; /* fbdev fd used to create fbdev_sw_winsys */ +}; + +static inline struct swrast_fbdev_screen *swrast_fbdev_screen(struct native_fbdev_screen *fbdev_screen) +{ + return (struct swrast_fbdev_screen*)fbdev_screen; +} + +static void swrast_fbdev_screen_destroy(struct native_fbdev_screen *fbdev_screen) +{ + FREE(fbdev_screen); +} + +static void *swrast_fbdev_create_drawable(struct native_fbdev_screen *hfbdev_screen, + int fd, bool want_fence, + unsigned xoffset, unsigned yoffset, + unsigned width, unsigned height) +{ + struct swrast_fbdev_screen *fbdev_screen = swrast_fbdev_screen(hfbdev_screen); + struct fbdev_sw_drawable *drawable = CALLOC_STRUCT(fbdev_sw_drawable); + struct fb_var_screeninfo vinfo; + struct fb_fix_screeninfo finfo; + + if (fd != fbdev_screen->fd) + return NULL; + if (ioctl(fd, FBIOGET_VSCREENINFO, &vinfo)) + return NULL; + if (ioctl(fd, FBIOGET_FSCREENINFO, &finfo)) + return NULL; + + /* sanitize the values */ + if (xoffset + width > vinfo.xres_virtual) { + if (xoffset > vinfo.xres_virtual) + width = 0; + else + width = vinfo.xres_virtual - xoffset; + } + if (yoffset + height > vinfo.yres_virtual) { + if (yoffset > vinfo.yres_virtual) + height = 0; + else + height = vinfo.yres_virtual - yoffset; + } + + drawable->format = vinfo_to_format(&vinfo); + drawable->x = xoffset; + drawable->y = yoffset; + drawable->width = width; + drawable->height = height; + + if(drawable->format == PIPE_FORMAT_NONE || + !drawable->width || !drawable->height) + { + FREE(drawable); + return NULL; + } + + return drawable; +} + +static void swrast_fbdev_destroy_drawable(struct native_fbdev_screen *fbdev_screen, void *hdrawable) +{ + FREE(hdrawable); +} + +static struct pipe_fence_handle *swrast_fbdev_get_drawable_fence(struct native_fbdev_screen *fbdev_screen, void *hdrawable) +{ + return NULL; +} + +static bool swrast_fbdev_probe(const struct native_fbdev_driver *driver) +{ + return true; +} + +static struct native_fbdev_screen *swrast_fbdev_create_screen( + const struct native_fbdev_driver *driver, + int fd, + struct native_display *display, + const struct native_event_handler *event_handler) +{ + struct swrast_fbdev_screen *fscreen = CALLOC_STRUCT(swrast_fbdev_screen); + struct sw_winsys *ws; + + ws = fbdev_create_sw_winsys(fd); + if (!ws) + return FALSE; + + fscreen->fd = fd; + fscreen->base.screen = event_handler->new_sw_screen(display, ws); + if(!fscreen->base.screen) + { + FREE(fscreen); + return NULL; + } + + fscreen->base.destroy = swrast_fbdev_screen_destroy; + fscreen->base.create_drawable = swrast_fbdev_create_drawable; + fscreen->base.destroy_drawable = swrast_fbdev_destroy_drawable; + fscreen->base.get_drawable_fence = swrast_fbdev_get_drawable_fence; + return &fscreen->base; +} + +static const struct native_fbdev_driver fbdev_driver = { + "swrast", /* name */ + swrast_fbdev_probe, + swrast_fbdev_create_screen +}; + +const struct native_fbdev_driver *swrast_fbdev_get_driver(void) +{ + return &fbdev_driver; +} diff --git a/src/gallium/state_trackers/egl/fbdev/native_fbdev_swrast.h b/src/gallium/state_trackers/egl/fbdev/native_fbdev_swrast.h new file mode 100644 index 0000000000..57973a45d9 --- /dev/null +++ b/src/gallium/state_trackers/egl/fbdev/native_fbdev_swrast.h @@ -0,0 +1,8 @@ +#ifndef __NATIVE_FBDEV_SWRAST_H__ +#define __NATIVE_FBDEV_SWRAST_H__ + +#include "fbdev/native_fbdev.h" + +const struct native_fbdev_driver *swrast_fbdev_get_driver(void); + +#endif diff --git a/src/gallium/targets/egl-static/Makefile.am b/src/gallium/targets/egl-static/Makefile.am index e64de4df7b..df1eb73c36 100644 --- a/src/gallium/targets/egl-static/Makefile.am +++ b/src/gallium/targets/egl-static/Makefile.am @@ -223,6 +223,17 @@ egl_gallium_la_LIBADD += \ endif +if HAVE_GALLIUM_ETNA +AM_CPPFLAGS += -D_EGL_PIPE_ETNA=1 +egl_gallium_la_LIBADD += \ + $(top_builddir)/src/gallium/winsys/etna/drm/libetnadrm.la \ + $(top_builddir)/src/gallium/drivers/etna/libetna.la \ + $(ETNA_LIBS) +if HAVE_EGL_PLATFORM_FBDEV +egl_gallium_la_LIBADD += $(top_builddir)/src/gallium/winsys/etna/fbdev/libetnafbdev.la +endif +endif + if HAVE_GALLIUM_SOFTPIPE AM_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE egl_gallium_la_LIBADD += \ diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c index 4f960a52ff..7966d65617 100644 --- a/src/gallium/targets/egl-static/egl_pipe.c +++ b/src/gallium/targets/egl-static/egl_pipe.c @@ -70,6 +70,10 @@ #if _EGL_PIPE_FREEDRENO #include "freedreno/drm/freedreno_drm_public.h" #endif +#if _EGL_PIPE_ETNA +/* for etnaviv */ +#include "etna/drm/etna_drm_public.h" +#endif static struct pipe_screen * pipe_i915_create_screen(int fd) @@ -215,6 +219,24 @@ pipe_freedreno_create_screen(int fd) #endif } +static struct pipe_screen * +pipe_etna_create_screen(int fd) +{ +#if _EGL_PIPE_ETNA + struct pipe_screen *screen; + + screen = etna_drm_screen_create(fd); + if (!screen) + return NULL; + + screen = debug_screen_wrap(screen); + + return screen; +#else + return NULL; +#endif +} + struct pipe_screen * egl_pipe_create_drm_screen(const char *name, int fd) { @@ -234,6 +256,8 @@ egl_pipe_create_drm_screen(const char *name, int fd) return pipe_vmwgfx_create_screen(fd); else if ((strcmp(name, "kgsl") == 0) || (strcmp(name, "msm") == 0)) return pipe_freedreno_create_screen(fd); + else if (strcmp(name, "etna") == 0) + return pipe_etna_create_screen(fd); else return NULL; } diff --git a/src/gallium/winsys/Makefile.am b/src/gallium/winsys/Makefile.am index ab1acc37b4..a7fa258557 100644 --- a/src/gallium/winsys/Makefile.am +++ b/src/gallium/winsys/Makefile.am @@ -59,6 +59,13 @@ if HAVE_GALLIUM_NOUVEAU SUBDIRS += nouveau/drm endif +if HAVE_GALLIUM_ETNA +SUBDIRS += etna/drm +if HAVE_EGL_PLATFORM_FBDEV +SUBDIRS += etna/fbdev +endif +endif + if NEED_RADEON_DRM_WINSYS SUBDIRS += radeon/drm endif diff --git a/src/gallium/winsys/etna/drm/Makefile.am b/src/gallium/winsys/etna/drm/Makefile.am new file mode 100644 index 0000000000..c87adad9b2 --- /dev/null +++ b/src/gallium/winsys/etna/drm/Makefile.am @@ -0,0 +1,32 @@ +# Copyright © 2012 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + $(GALLIUM_CFLAGS) \ + $(ETNA_CFLAGS) + +noinst_LTLIBRARIES = libetnadrm.la + +libetnadrm_la_SOURCES = etna_drm_winsys.c diff --git a/src/gallium/winsys/etna/drm/etna_drm_public.h b/src/gallium/winsys/etna/drm/etna_drm_public.h new file mode 100644 index 0000000000..28f44aa9b0 --- /dev/null +++ b/src/gallium/winsys/etna/drm/etna_drm_public.h @@ -0,0 +1,9 @@ + +#ifndef __ETNA_DRM_PUBLIC_H__ +#define __ETNA_DRM_PUBLIC_H__ + +struct pipe_screen; + +struct pipe_screen *etna_drm_screen_create(int drmFD); + +#endif diff --git a/src/gallium/winsys/etna/drm/etna_drm_winsys.c b/src/gallium/winsys/etna/drm/etna_drm_winsys.c new file mode 100644 index 0000000000..d43c726908 --- /dev/null +++ b/src/gallium/winsys/etna/drm/etna_drm_winsys.c @@ -0,0 +1,28 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "etna_drm_public.h" + +#include "etnaviv/viv.h" +#include "etna/etna_screen.h" + +#include <stdio.h> + +struct pipe_screen * +etna_drm_screen_create(int fd) +{ + struct viv_conn *dev = 0; + /* XXX this handle will leak */ + int rv = viv_open(VIV_HW_3D, &dev); + if(rv != 0) + { + fprintf(stderr, "Error opening device\n"); + return NULL; + } + fprintf(stderr, "Succesfully opened device\n"); + + return etna_screen_create(dev); +} diff --git a/src/gallium/winsys/etna/fbdev/Makefile.am b/src/gallium/winsys/etna/fbdev/Makefile.am new file mode 100644 index 0000000000..1ea996f123 --- /dev/null +++ b/src/gallium/winsys/etna/fbdev/Makefile.am @@ -0,0 +1,33 @@ +# Copyright © 2012 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/state_trackers/egl \ + $(GALLIUM_CFLAGS) \ + $(ETNA_CFLAGS) + +noinst_LTLIBRARIES = libetnafbdev.la + +libetnafbdev_la_SOURCES = etna_fbdev_winsys.c diff --git a/src/gallium/winsys/etna/fbdev/etna_fbdev_public.h b/src/gallium/winsys/etna/fbdev/etna_fbdev_public.h new file mode 100644 index 0000000000..f8b848d6d0 --- /dev/null +++ b/src/gallium/winsys/etna/fbdev/etna_fbdev_public.h @@ -0,0 +1,8 @@ +#ifndef __ETNA_FBDEV_PUBLIC_H__ +#define __ETNA_FBDEV_PUBLIC_H__ + +#include "fbdev/native_fbdev.h" + +const struct native_fbdev_driver *etna_fbdev_get_driver(void); + +#endif diff --git a/src/gallium/winsys/etna/fbdev/etna_fbdev_winsys.c b/src/gallium/winsys/etna/fbdev/etna_fbdev_winsys.c new file mode 100644 index 0000000000..3a2938e8da --- /dev/null +++ b/src/gallium/winsys/etna/fbdev/etna_fbdev_winsys.c @@ -0,0 +1,131 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_format.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "common/native.h" + +#include "etna_fbdev_public.h" + +#include "etna/etna_screen.h" + +#include <etnaviv/viv.h> +#include <etnaviv/etna_fb.h> + +#include <stdio.h> +#include <linux/fb.h> +#include <sys/ioctl.h> + +struct etna_fbdev_screen +{ + struct native_fbdev_screen base; + /* anything? */ +}; + +static inline struct etna_fbdev_screen *etna_fbdev_screen(struct native_fbdev_screen *fbdev_screen) +{ + return (struct etna_fbdev_screen*)fbdev_screen; +} + +static void etna_fbdev_screen_destroy(struct native_fbdev_screen *fbdev_screen) +{ + FREE(fbdev_screen); +} + +static void *etna_fbdev_create_drawable(struct native_fbdev_screen *fbdev_screen, + int fd, bool want_fence, + unsigned xoffset, unsigned yoffset, + unsigned width, unsigned height) +{ + struct etna_rs_target *drawable = CALLOC_STRUCT(etna_rs_target); + struct fb_var_screeninfo vinfo; + struct fb_fix_screeninfo finfo; + + if (ioctl(fd, FBIOGET_VSCREENINFO, &vinfo)) + return NULL; + if (ioctl(fd, FBIOGET_FSCREENINFO, &finfo)) + return NULL; + + drawable->want_fence = want_fence; + + /* sanitize the values */ + if (xoffset + width > vinfo.xres_virtual) { + if (xoffset > vinfo.xres_virtual) + width = 0; + else + width = vinfo.xres_virtual - xoffset; + } + if (yoffset + height > vinfo.yres_virtual) { + if (yoffset > vinfo.yres_virtual) + height = 0; + else + height = vinfo.yres_virtual - yoffset; + } + + drawable->width = width; + drawable->height = height; + drawable->addr = finfo.smem_start + + finfo.line_length * yoffset + + vinfo.bits_per_pixel / 8 * xoffset; + drawable->stride = finfo.line_length; + + if(width == 0 || height == 0 || + !etna_fb_get_format(&vinfo, &drawable->rs_format, &drawable->swap_rb)) + { + FREE(drawable); + return NULL; + } + return drawable; +} + +static void etna_fbdev_destroy_drawable(struct native_fbdev_screen *fbdev_screen, void *hdrawable) +{ + struct etna_rs_target *drawable = (struct etna_rs_target*)hdrawable; + fbdev_screen->screen->fence_reference(fbdev_screen->screen, &drawable->fence, NULL); + FREE(drawable); +} + +static struct pipe_fence_handle *etna_fbdev_get_drawable_fence(struct native_fbdev_screen *fbdev_screen, void *hdrawable) +{ + struct etna_rs_target *drawable = (struct etna_rs_target*)hdrawable; + return (struct pipe_fence_handle*)drawable->fence; +} + +static bool etna_fbdev_probe(const struct native_fbdev_driver *driver) +{ + return true; +} + +static struct native_fbdev_screen *etna_fbdev_create_screen( + const struct native_fbdev_driver *driver, + int fd, + struct native_display *display, + const struct native_event_handler *event_handler) +{ + struct etna_fbdev_screen *fscreen = CALLOC_STRUCT(etna_fbdev_screen); + + fscreen->base.screen = event_handler->new_drm_screen(display, "etna", -1); + if(!fscreen->base.screen) + { + FREE(fscreen); + return NULL; + } + + fscreen->base.destroy = etna_fbdev_screen_destroy; + fscreen->base.create_drawable = etna_fbdev_create_drawable; + fscreen->base.destroy_drawable = etna_fbdev_destroy_drawable; + fscreen->base.get_drawable_fence = etna_fbdev_get_drawable_fence; + return &fscreen->base; +} + +static const struct native_fbdev_driver fbdev_driver = { + "etna", /* name */ + etna_fbdev_probe, + etna_fbdev_create_screen +}; + +const struct native_fbdev_driver *etna_fbdev_get_driver(void) +{ + return &fbdev_driver; +} diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 917d071a2c..1672350141 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2848,6 +2848,7 @@ struct gl_shader_compiler_options GLboolean EmitNoMainReturn; /**< Emit CONT/RET opcodes? */ GLboolean EmitNoNoise; /**< Emit NOISE opcodes? */ GLboolean EmitNoPow; /**< Emit POW opcodes? */ + GLboolean EmitNoLrp; /**< Emit LRP opcodes? */ GLboolean LowerClipDistance; /**< Lower gl_ClipDistance from float[8] to vec4[2]? */ /** diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 59cf1232ad..4b946ef47c 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -3002,7 +3002,8 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) do_mat_op_to_vec(ir); lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP - | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); + | ((options->EmitNoPow) ? POW_TO_EXP2 : 0) + | ((options->EmitNoLrp) ? LRP_TO_ARITH : 0))); progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 33cd1295bf..c5ce8e328a 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -215,6 +215,8 @@ void st_init_limits(struct st_context *st) pc->MaxEnvParams = MIN2(pc->MaxParameters, MAX_PROGRAM_ENV_PARAMS); options->EmitNoNoise = TRUE; + options->EmitNoPow = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_TGSI_POW_SUPPORTED); + options->EmitNoLrp = !screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_TGSI_LRP_SUPPORTED); /* TODO: make these more fine-grained if anyone needs it */ options->MaxIfDepth = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH); @@ -408,6 +410,7 @@ void st_init_extensions(struct st_context *st) { o(EXT_blend_equation_separate), PIPE_CAP_BLEND_EQUATION_SEPARATE }, { o(EXT_draw_buffers2), PIPE_CAP_INDEP_BLEND_ENABLE }, { o(EXT_stencil_two_side), PIPE_CAP_TWO_SIDED_STENCIL }, + { o(EXT_texture3D), PIPE_CAP_MAX_TEXTURE_3D_LEVELS }, { o(EXT_texture_array), PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS }, { o(EXT_texture_filter_anisotropic), PIPE_CAP_ANISOTROPIC_FILTER }, { o(EXT_texture_mirror_clamp), PIPE_CAP_TEXTURE_MIRROR_CLAMP }, diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 162864f6cb..cfd4887cef 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5410,6 +5410,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) CARRY_TO_ARITH | BORROW_TO_ARITH | (options->EmitNoPow ? POW_TO_EXP2 : 0) | + (options->EmitNoLrp ? LRP_TO_ARITH : 0) | (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0)); lower_ubo_reference(prog->_LinkedShaders[i], ir); |