From 293435cf5955935a6ce43bf59a6d743aad8be6d8 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Mon, 19 Oct 2015 13:31:29 -0500 Subject: [PATCH 1/3] Initial public Mesa+SWR --- README.md | 33 + configure.ac | 54 + src/gallium/Makefile.am | 4 + src/gallium/SConscript | 1 + src/gallium/auxiliary/gallivm/lp_bld_flow.h | 7 + src/gallium/auxiliary/gallivm/lp_bld_init.h | 7 + src/gallium/auxiliary/gallivm/lp_bld_sample.h | 6 + src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 8 + .../auxiliary/target-helpers/inline_sw_helper.h | 13 +- .../target-helpers/inline_wrapper_sw_helper.h | 2 +- src/gallium/drivers/swr/.clang-format | 64 + src/gallium/drivers/swr/Automake.inc | 28 + src/gallium/drivers/swr/Makefile.am | 82 ++ src/gallium/drivers/swr/Makefile.sources | 114 ++ src/gallium/drivers/swr/SConscript | 69 + src/gallium/drivers/swr/swr_clear.cpp | 141 ++ src/gallium/drivers/swr/swr_context.cpp | 392 ++++++ src/gallium/drivers/swr/swr_context.h | 172 +++ src/gallium/drivers/swr/swr_context_llvm.h | 124 ++ src/gallium/drivers/swr/swr_draw.cpp | 277 ++++ src/gallium/drivers/swr/swr_fence.cpp | 141 ++ src/gallium/drivers/swr/swr_fence.h | 73 ++ src/gallium/drivers/swr/swr_memory.h | 99 ++ src/gallium/drivers/swr/swr_public.h | 40 + src/gallium/drivers/swr/swr_query.cpp | 334 +++++ src/gallium/drivers/swr/swr_query.h | 48 + src/gallium/drivers/swr/swr_resource.h | 98 ++ src/gallium/drivers/swr/swr_scratch.cpp | 116 ++ src/gallium/drivers/swr/swr_scratch.h | 63 + src/gallium/drivers/swr/swr_screen.cpp | 666 ++++++++++ src/gallium/drivers/swr/swr_screen.h | 52 + src/gallium/drivers/swr/swr_shader.cpp | 608 +++++++++ src/gallium/drivers/swr/swr_shader.h | 61 + src/gallium/drivers/swr/swr_state.cpp | 1344 ++++++++++++++++++++ src/gallium/drivers/swr/swr_state.h | 240 ++++ src/gallium/drivers/swr/swr_tex_sample.cpp | 338 +++++ src/gallium/drivers/swr/swr_tex_sample.h | 47 + src/gallium/targets/libgl-xlib/Makefile.am | 5 + src/gallium/targets/libgl-xlib/SConscript | 4 + src/gallium/targets/osmesa/Makefile.am | 6 + 40 files changed, 5979 insertions(+), 2 deletions(-) create mode 100644 README.md create mode 100644 src/gallium/drivers/swr/.clang-format create mode 100644 src/gallium/drivers/swr/Automake.inc create mode 100644 src/gallium/drivers/swr/Makefile.am create mode 100644 src/gallium/drivers/swr/Makefile.sources create mode 100644 src/gallium/drivers/swr/SConscript create mode 100644 src/gallium/drivers/swr/swr_clear.cpp create mode 100644 src/gallium/drivers/swr/swr_context.cpp create mode 100644 src/gallium/drivers/swr/swr_context.h create mode 100644 src/gallium/drivers/swr/swr_context_llvm.h create mode 100644 src/gallium/drivers/swr/swr_draw.cpp create mode 100644 src/gallium/drivers/swr/swr_fence.cpp create mode 100644 src/gallium/drivers/swr/swr_fence.h create mode 100644 src/gallium/drivers/swr/swr_memory.h create mode 100644 src/gallium/drivers/swr/swr_public.h create mode 100644 src/gallium/drivers/swr/swr_query.cpp create mode 100644 src/gallium/drivers/swr/swr_query.h create mode 100644 src/gallium/drivers/swr/swr_resource.h create mode 100644 src/gallium/drivers/swr/swr_scratch.cpp create mode 100644 src/gallium/drivers/swr/swr_scratch.h create mode 100644 src/gallium/drivers/swr/swr_screen.cpp create mode 100644 src/gallium/drivers/swr/swr_screen.h create mode 100644 src/gallium/drivers/swr/swr_shader.cpp create mode 100644 src/gallium/drivers/swr/swr_shader.h create mode 100644 src/gallium/drivers/swr/swr_state.cpp create mode 100644 src/gallium/drivers/swr/swr_state.h create mode 100644 src/gallium/drivers/swr/swr_tex_sample.cpp create mode 100644 src/gallium/drivers/swr/swr_tex_sample.h diff --git a/README.md b/README.md new file mode 100644 index 0000000..3bf3031 --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +OpenSWR-Mesa +============ + +Overview +-------- + +This is repository of the integration work combining the high +performance, highly scalable core SWR rasterizer with Mesa. A more +complete introduction and discussion towards upstreaming to the Mesa +project can be found on the mesa-dev mailing list. + +Notes +----- + +* SWR is set as the default software renderer. Use +GALLIUM_DRIVER=llvmpipe to switch to Mesa's standard rasterizer. This +particular change is to make it easier for people evaluating OpenSWR, +and will not be upstreamed. + +* LLVM-3.6 is required. + +* To build SWR with autoconf, include the following in the config +line: "--with-gallium-drivers=swr --enable-swr-native". + +* Build defaults to AVX2; for a version to run on AVX build with + "--with-swr-arch=AVX". + +* To build SWR with SCons, nothing needs to be done - it is built by + default. + +* Code for the driver is in src/gallium/drivers/swr + +* Code for the rasterizer is in src/gallium/drivers/swr/rasterizer diff --git a/configure.ac b/configure.ac index d3df195..f216dc7 100644 --- a/configure.ac +++ b/configure.ac @@ -1753,6 +1753,11 @@ AC_SUBST([LLVM_LIBS]) AC_SUBST([LLVM_LDFLAGS]) AC_SUBST([LLVM_INCLUDEDIR]) AC_SUBST([LLVM_VERSION]) +AC_SUBST([SWR_LIBDIR]) +AC_SUBST([SWR_ARCH]) +AC_SUBST([SWR_ARCH_FLAG]) +AC_SUBST([SWR_NATIVE]) +AC_SUBST([SWR_INCLUDEDIR]) AC_SUBST([CLANG_RESOURCE_DIR]) case "x$enable_opengl$enable_gles1$enable_gles2" in @@ -2177,6 +2182,9 @@ if test -n "$with_gallium_drivers"; then HAVE_GALLIUM_LLVMPIPE=yes fi ;; + xswr) + HAVE_GALLIUM_SWR=yes + ;; xvc4) HAVE_GALLIUM_VC4=yes gallium_require_drm "vc4" @@ -2243,6 +2251,41 @@ if test "x$MESA_LLVM" != x0; then fi fi +dnl SWR include/library + +AC_ARG_WITH([swr-includedir], + [AS_HELP_STRING([--with-swr-includedir], [Path to SWR includes])], + [SWR_INCLUDEDIR="$withval"], + [SWR_INCLUDEDIR='']) + +AC_ARG_WITH([swr-libdir], + [AS_HELP_STRING([--with-swr-libdir], [Path to SWR library])], + [SWR_LIBDIR="$withval"], + [SWR_LIBDIR='']) + +AC_ARG_WITH([swr-arch], + [AS_HELP_STRING([--with-swr-arch], [AVX architecture for swr (AVX | CORE_AVX2) ])], + [SWR_ARCH="$withval"], + [SWR_ARCH="CORE-AVX2"]) + +case "$SWR_ARCH" in +"AVX") + SWR_ARCH_FLAG='-march=core-avx-i -DKNOB_ARCH=KNOB_ARCH_AVX ' + ;; +"CORE-AVX2") + SWR_ARCH_FLAG='-march=core-avx2 -DKNOB_ARCH=KNOB_ARCH_AVX2 ' + ;; +**) + SWR_ARCH_FLAG='-march=core-avx2 -DKNOB_ARCH=KNOB_ARCH_AVX2 ' +esac + +AC_ARG_ENABLE([swr-native], + [AS_HELP_STRING([--enable-swr-native], + [use in-tree version of SWR core @<:@default=disabled@:>@])], + [enable_swr_native="$enableval"], + [enable_swr_native=no] +) + AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_I915, test "x$HAVE_GALLIUM_I915" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_ILO, test "x$HAVE_GALLIUM_ILO" = xyes) @@ -2255,6 +2298,8 @@ AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes) +AM_CONDITIONAL(HAVE_GALLIUM_SWR, test "x$HAVE_GALLIUM_SWR" = xyes) +AM_CONDITIONAL(SWR_NATIVE, test "x$enable_swr_native" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes) AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno) @@ -2374,6 +2419,7 @@ AC_CONFIG_FILES([Makefile src/gallium/drivers/rbug/Makefile src/gallium/drivers/softpipe/Makefile src/gallium/drivers/svga/Makefile + src/gallium/drivers/swr/Makefile src/gallium/drivers/trace/Makefile src/gallium/drivers/vc4/Makefile src/gallium/state_trackers/clover/Makefile @@ -2562,6 +2608,14 @@ if test "x$MESA_LLVM" = x1; then echo " LLVM_LDFLAGS: $LLVM_LDFLAGS" echo "" fi +if test "x$HAVE_GALLIUM_SWR" = xyes; then + echo " SWR_INCLUDEDIR: $SWR_INCLUDEDIR" + echo " SWR_LIBDIR: $SWR_LIBDIR" + echo " SWR_ARCH: $SWR_ARCH" + echo " SWR_ARCH_FLAG: $SWR_ARCH_FLAG" + echo " SWR_NATIVE: $enable_swr_native" + echo "" +fi echo " PYTHON2: $PYTHON2" echo "" diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am index a7c3606..dcce6a3 100644 --- a/src/gallium/Makefile.am +++ b/src/gallium/Makefile.am @@ -77,6 +77,10 @@ SUBDIRS += drivers/llvmpipe endif endif +if HAVE_GALLIUM_SWR +SUBDIRS += drivers/swr +endif + ## vc4/rpi if HAVE_GALLIUM_VC4 SUBDIRS += drivers/vc4 winsys/vc4/drm diff --git a/src/gallium/SConscript b/src/gallium/SConscript index fa5fa6e..766c24a 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -17,6 +17,7 @@ SConscript([ 'drivers/softpipe/SConscript', 'drivers/svga/SConscript', 'drivers/trace/SConscript', + 'drivers/swr/SConscript', ]) # diff --git a/src/gallium/auxiliary/gallivm/lp_bld_flow.h b/src/gallium/auxiliary/gallivm/lp_bld_flow.h index 0da849b..083b0ad 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_flow.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.h @@ -37,6 +37,9 @@ #include "gallivm/lp_bld.h" +#ifdef __cplusplus +extern "C" { +#endif struct lp_type; @@ -198,4 +201,8 @@ lp_build_array_alloca(struct gallivm_state *gallivm, LLVMValueRef count, const char *name); +#ifdef __cplusplus +} +#endif + #endif /* !LP_BLD_FLOW_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.h b/src/gallium/auxiliary/gallivm/lp_bld_init.h index 9e50f88..ab44661 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_init.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.h @@ -35,6 +35,9 @@ #include "lp_bld.h" #include +#ifdef __cplusplus +extern "C" { +#endif struct gallivm_state { @@ -82,4 +85,8 @@ void lp_set_store_alignment(LLVMValueRef Inst, unsigned Align); +#ifdef __cplusplus +} +#endif + #endif /* !LP_BLD_INIT_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index eba758d..5f53c47 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -42,6 +42,9 @@ #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_swizzle.h" +#ifdef __cplusplus +extern "C" { +#endif struct pipe_resource; struct pipe_sampler_view; @@ -612,5 +615,8 @@ lp_build_minify(struct lp_build_context *bld, LLVMValueRef level, boolean lod_scalar); +#ifdef __cplusplus +} +#endif #endif /* LP_BLD_SAMPLE_H */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 2ca9c61..189d03d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -48,6 +48,10 @@ #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_info.h" +#ifdef __cplusplus +extern "C" { +#endif + #define LP_CHAN_ALL ~0 #define LP_MAX_INSTRUCTIONS 256 @@ -661,4 +665,8 @@ lp_build_tgsi_llvm( struct lp_build_tgsi_context * bld_base, const struct tgsi_token *tokens); +#ifdef __cplusplus +} +#endif + #endif /* LP_BLD_TGSI_H */ diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 5f46552..e67dd17 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -19,6 +19,10 @@ #include "llvmpipe/lp_public.h" #endif +#ifdef GALLIUM_SWR +#include "swr/swr_public.h" +#endif + static inline struct pipe_screen * sw_screen_create_named(struct sw_winsys *winsys, const char *driver) @@ -30,6 +34,11 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver) screen = llvmpipe_create_screen(winsys); #endif +#if defined(GALLIUM_SWR) + if (screen == NULL && strcmp(driver, "swr") == 0) + screen = swr_create_screen(winsys); +#endif + #if defined(GALLIUM_SOFTPIPE) if (screen == NULL) screen = softpipe_create_screen(winsys); @@ -45,7 +54,9 @@ sw_screen_create(struct sw_winsys *winsys) const char *default_driver; const char *driver; -#if defined(GALLIUM_LLVMPIPE) +#if defined(GALLIUM_SWR) + default_driver = "swr"; +#elif defined(GALLIUM_LLVMPIPE) default_driver = "llvmpipe"; #elif defined(GALLIUM_SOFTPIPE) default_driver = "softpipe"; diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h index 4f38ba9..d707b8b 100644 --- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h @@ -12,7 +12,7 @@ static inline struct pipe_screen * sw_screen_wrap(struct pipe_screen *screen) { -#if defined(GALLIUM_SOFTPIPE) || defined(GALLIUM_LLVMPIPE) +#if defined(GALLIUM_SOFTPIPE) || defined(GALLIUM_LLVMPIPE) || defined(GALLIUM_SWR) struct sw_winsys *sws; struct pipe_screen *sw_screen = NULL; const char *driver; diff --git a/src/gallium/drivers/swr/.clang-format b/src/gallium/drivers/swr/.clang-format new file mode 100644 index 0000000..0ec65a5 --- /dev/null +++ b/src/gallium/drivers/swr/.clang-format @@ -0,0 +1,64 @@ +--- +Language: Cpp +AccessModifierOffset: -3 +AlignAfterOpenBracket: true +AlignEscapedNewlinesLeft: false +AlignOperands: false +AlignTrailingComments: false +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AlwaysBreakAfterDefinitionReturnType: true +AlwaysBreakTemplateDeclarations: false +AlwaysBreakBeforeMultilineStrings: false +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: true +BinPackParameters: false +BinPackArguments: false +ColumnLimit: 78 +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 3 +DerivePointerAlignment: false +ExperimentalAutoDetectBinPacking: false +IndentCaseLabels: false +IndentWrappedFunctionNames: false +IndentFunctionDeclarationAfterType: false +MaxEmptyLinesToKeep: 2 +KeepEmptyLinesAtTheStartOfBlocks: true +NamespaceIndentation: Inner +ObjCBlockIndentWidth: 3 +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakString: 1000 +PenaltyBreakFirstLessLess: 120 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 0 +PointerAlignment: Right +SpacesBeforeTrailingComments: 1 +Cpp11BracedListStyle: true +Standard: Cpp11 +IndentWidth: 3 +TabWidth: 8 +UseTab: Never +BreakBeforeBraces: Linux +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpacesInAngles: false +SpaceInEmptyParentheses: false +SpacesInCStyleCastParentheses: false +SpaceAfterCStyleCast: false +SpacesInContainerLiterals: true +SpaceBeforeAssignmentOperators: true +ContinuationIndentWidth: 3 +CommentPragmas: '^ IWYU pragma:' +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +SpaceBeforeParens: ControlStatements +DisableFormat: false +... + diff --git a/src/gallium/drivers/swr/Automake.inc b/src/gallium/drivers/swr/Automake.inc new file mode 100644 index 0000000..8e66744 --- /dev/null +++ b/src/gallium/drivers/swr/Automake.inc @@ -0,0 +1,28 @@ +# Copyright (C) 2015 Intel Corporation. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +if HAVE_GALLIUM_SWR + +TARGET_CPPFLAGS += -DGALLIUM_SWR +TARGET_LIB_DEPS += \ + $(top_builddir)/src/gallium/drivers/swr/libmesaswr.la + +endif diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am new file mode 100644 index 0000000..5dff02c --- /dev/null +++ b/src/gallium/drivers/swr/Makefile.am @@ -0,0 +1,82 @@ +# Copyright (C) 2015 Intel Corporation. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +AUTOMAKE_OPTIONS = subdir-objects + +include Makefile.sources +include $(top_srcdir)/src/gallium/Automake.inc + +AM_CXXFLAGS = \ + $(GALLIUM_DRIVER_CFLAGS) \ + -std=c++11 -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS \ + $(SWR_ARCH_FLAG) \ + $(LLVM_CFLAGS) + +noinst_LTLIBRARIES = libmesaswr.la + +libmesaswr_la_SOURCES = $(CXX_SOURCES) + +libmesaswr_la_LDFLAGS = + +if SWR_NATIVE +BUILT_SOURCES = \ + rasterizer/scripts/gen_knobs.cpp \ + rasterizer/scripts/gen_knobs.h \ + rasterizer/jitter/state_llvm.h + +rasterizer/scripts/gen_knobs.cpp rasterizer/scripts/gen_knobs.h: rasterizer/scripts/gen_knobs.py rasterizer/scripts/knob_defs.py rasterizer/scripts/templates/knobs.template + $(PYTHON2) $(PYTHON_FLAGS) \ + $(srcdir)/rasterizer/scripts/gen_knobs.py \ + rasterizer/scripts + +rasterizer/jitter/state_llvm.h: rasterizer/jitter/scripts/gen_llvm_types.py rasterizer/core/state.h + $(PYTHON2) $(PYTHON_FLAGS) \ + $(srcdir)/rasterizer/jitter/scripts/gen_llvm_types.py \ + --input $(srcdir)/rasterizer/core/state.h \ + --output rasterizer/jitter/state_llvm.h + +libmesaswr_la_SOURCES += \ + $(COMMON_CXX_SOURCES) \ + $(CORE_CXX_SOURCES) \ + $(JITTER_CXX_SOURCES) \ + $(MEMORY_CXX_SOURCES) \ + rasterizer/scripts/gen_knobs.cpp \ + rasterizer/scripts/gen_knobs.h +AM_CXXFLAGS += \ + -I$(srcdir)/rasterizer \ + -I$(srcdir)/rasterizer/core \ + -I$(srcdir)/rasterizer/jitter \ + -I$(builddir)/rasterizer/scripts \ + -I$(builddir)/rasterizer/jitter +else +libmesaswr_la_LDFLAGS += -L$(SWR_LIBDIR) -lSWR +AM_CXXFLAGS += \ + -I$(SWR_INCLUDEDIR) \ + -I$(SWR_INCLUDEDIR)/core \ + -I$(SWR_INCLUDEDIR)/jitter \ + -I$(SWR_INCLUDEDIR)/build/jitter \ + -I$(SWR_INCLUDEDIR)/build/scripts +endif + +libmesaswr_la_LDFLAGS += -lnuma + + +EXTRA_DIST = SConscript diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources new file mode 100644 index 0000000..1c6fe08 --- /dev/null +++ b/src/gallium/drivers/swr/Makefile.sources @@ -0,0 +1,114 @@ +# Copyright (C) 2015 Intel Corporation. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +CXX_SOURCES := \ + swr_clear.cpp \ + swr_context.cpp \ + swr_context.h \ + swr_context_llvm.h \ + swr_draw.cpp \ + swr_public.h \ + swr_resource.h \ + swr_screen.cpp \ + swr_screen.h \ + swr_state.cpp \ + swr_state.h \ + swr_tex_sample.cpp \ + swr_tex_sample.h \ + swr_scratch.h \ + swr_scratch.cpp \ + swr_shader.cpp \ + swr_memory.h \ + swr_fence.h \ + swr_fence.cpp \ + swr_query.h \ + swr_query.cpp + +COMMON_CXX_SOURCES := \ + rasterizer/common/containers.hpp \ + rasterizer/common/formats.cpp \ + rasterizer/common/formats.h \ + rasterizer/common/isa.hpp \ + rasterizer/common/os.h \ + rasterizer/common/rdtsc_buckets.cpp \ + rasterizer/common/rdtsc_buckets.h \ + rasterizer/common/rdtsc_buckets_shared.h \ + rasterizer/common/rdtsc_buckets_shared.h \ + rasterizer/common/simdintrin.h \ + rasterizer/common/swr_assert.cpp \ + rasterizer/common/swr_assert.h + +CORE_CXX_SOURCES := \ + rasterizer/core/api.cpp \ + rasterizer/core/api.h \ + rasterizer/core/arena.cpp \ + rasterizer/core/arena.h \ + rasterizer/core/backend.cpp \ + rasterizer/core/backend.h \ + rasterizer/core/blend.h \ + rasterizer/core/clip.cpp \ + rasterizer/core/clip.h \ + rasterizer/core/context.h \ + rasterizer/core/depthstencil.h \ + rasterizer/core/fifo.hpp \ + rasterizer/core/format_traits.h \ + rasterizer/core/format_types.h \ + rasterizer/core/frontend.cpp \ + rasterizer/core/frontend.h \ + rasterizer/core/knobs.h \ + rasterizer/core/knobs_init.h \ + rasterizer/core/multisample.h \ + rasterizer/core/pa_avx.cpp \ + rasterizer/core/pa.h \ + rasterizer/core/rasterizer.cpp \ + rasterizer/core/rasterizer.h \ + rasterizer/core/rdtsc_core.cpp \ + rasterizer/core/rdtsc_core.h \ + rasterizer/core/state.h \ + rasterizer/core/threads.cpp \ + rasterizer/core/threads.h \ + rasterizer/core/tilemgr.cpp \ + rasterizer/core/tilemgr.h \ + rasterizer/core/utils.cpp \ + rasterizer/core/utils.h + +JITTER_CXX_SOURCES := \ + rasterizer/jitter/blend_jit.cpp \ + rasterizer/jitter/blend_jit.h \ + rasterizer/jitter/builder.cpp \ + rasterizer/jitter/builder_gen.cpp \ + rasterizer/jitter/builder_gen.h \ + rasterizer/jitter/builder.h \ + rasterizer/jitter/builder_misc.cpp \ + rasterizer/jitter/builder_misc.h \ + rasterizer/jitter/builder_x86.cpp \ + rasterizer/jitter/builder_x86.h \ + rasterizer/jitter/fetch_jit.cpp \ + rasterizer/jitter/fetch_jit.h \ + rasterizer/jitter/JitManager.cpp \ + rasterizer/jitter/JitManager.h \ + rasterizer/jitter/streamout_jit.cpp \ + rasterizer/jitter/streamout_jit.h + +MEMORY_CXX_SOURCES := \ + rasterizer/memory/ClearTile.cpp \ + rasterizer/memory/LoadTile.cpp \ + rasterizer/memory/StoreTile.cpp diff --git a/src/gallium/drivers/swr/SConscript b/src/gallium/drivers/swr/SConscript new file mode 100644 index 0000000..4c8c121 --- /dev/null +++ b/src/gallium/drivers/swr/SConscript @@ -0,0 +1,69 @@ +from sys import executable as python_cmd +import distutils.version + +Import('*') + +if not env['llvm']: + print 'warning: LLVM disabled: not building swr' + Return() + +env = env.Clone() + +env.MSVC2008Compat() + +env.Append(CPPDEFINES = [ + '__STDC_CONSTANT_MACROS', + '__STDC_LIMIT_MACROS', + 'KNOB_ARCH=KNOB_ARCH_AVX2', + ]) + +env.Append(CCFLAGS = [ + '-std=c++11', + '-march=core-avx2', + ]) + +env.Prepend(CPPPATH = [ + 'rasterizer', + 'rasterizer/core', + 'rasterizer/jitter', + 'rasterizer/scripts', + ]) + +gen_knobs = env.CodeGenerate( + target = 'rasterizer/scripts/gen_knobs.cpp', + script = 'rasterizer/scripts/gen_knobs.py', + source = [], + command = python_cmd + ' $SCRIPT ' + Dir('rasterizer/scripts').abspath +) + +gen_knobs = env.CodeGenerate( + target = 'rasterizer/scripts/gen_knobs.h', + script = 'rasterizer/scripts/gen_knobs.py', + source = [], + command = python_cmd + ' $SCRIPT ' + Dir('rasterizer/scripts').abspath +) + +state_llvm = env.CodeGenerate( + target = 'rasterizer/jitter/state_llvm.h', + script = 'rasterizer/jitter/scripts/gen_llvm_types.py', + source = 'rasterizer/core/state.h', + command = python_cmd + ' $SCRIPT --input $SOURCE --output $TARGET' +) + +source = ['rasterizer/scripts/gen_knobs.cpp', 'rasterizer/scripts/gen_knobs.h'] +source += env.ParseSourceList('Makefile.sources', [ + 'CXX_SOURCES', + 'COMMON_CXX_SOURCES', + 'CORE_CXX_SOURCES', + 'JITTER_CXX_SOURCES', + 'MEMORY_CXX_SOURCES' +]) + +swr = env.ConvenienceLibrary( + target = 'swr', + source = source, + ) + +env.Alias('swr', swr) + +Export('swr') diff --git a/src/gallium/drivers/swr/swr_clear.cpp b/src/gallium/drivers/swr/swr_clear.cpp new file mode 100644 index 0000000..7704359 --- /dev/null +++ b/src/gallium/drivers/swr/swr_clear.cpp @@ -0,0 +1,141 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "swr_context.h" +#include "swr_query.h" + +static void +swr_clear(struct pipe_context *pipe, + unsigned buffers, + const union pipe_color_union *color, + double depth, + unsigned stencil) +{ + struct swr_context *ctx = swr_context(pipe); + struct pipe_framebuffer_state *fb = &ctx->framebuffer; + + UINT clearMask = 0; + + if (!swr_check_render_cond(pipe)) + return; + + if (ctx->dirty) + swr_update_derived(ctx); + +/* Update clearMask/targetMask */ +#if 0 /* XXX SWR currently only clears SWR_ATTACHMENT_COLOR0, don't bother \ + checking others yet. */ + if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { + UINT i; + for (i = 0; i < fb->nr_cbufs; ++i) + if (fb->cbufs[i]) + clearMask |= (SWR_CLEAR_COLOR0 << i); + } +#else + if (buffers & PIPE_CLEAR_COLOR && fb->cbufs[0]) + clearMask |= SWR_CLEAR_COLOR; +#endif + + if (buffers & PIPE_CLEAR_DEPTH && fb->zsbuf) + clearMask |= SWR_CLEAR_DEPTH; + + if (buffers & PIPE_CLEAR_STENCIL && fb->zsbuf) + clearMask |= SWR_CLEAR_STENCIL; + +#if 0 // XXX HACK, override clear color alpha. On ubuntu, clears are + // transparent. + ((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */ +#endif + + /* Reset viewport to full framebuffer width/height before clear, then + * restore it */ + /* Scissor affects clear, viewport should not */ + ctx->dirty |= SWR_NEW_VIEWPORT; + SWR_VIEWPORT vp = {0}; + vp.width = ctx->framebuffer.width; + vp.height = ctx->framebuffer.height; + SwrSetViewports(ctx->swrContext, 1, &vp, NULL); + + SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil); +} + + +#if 0 // XXX, these don't get called. how to get these called? Do we need + // them? Docs? +static void +swr_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps, + const union pipe_color_union *color, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct swr_context *ctx = swr_context(pipe); + fprintf(stderr, "SWR swr_clear_render_target!\n"); + + ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR; +} + +static void +swr_clear_depth_stencil(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned buffers, double depth, unsigned stencil, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct swr_context *ctx = swr_context(pipe); + fprintf(stderr, "SWR swr_clear_depth_stencil!\n"); + + ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR; +} + +static void +swr_clear_buffer(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size, + const void *data, int data_size) +{ + fprintf(stderr, "SWR swr_clear_buffer!\n"); + struct swr_context *ctx = swr_context(pipe); + struct swr_resource *buf = swr_resource(res); + union pipe_color_union color; + enum pipe_format dst_fmt; + unsigned width, height, elements; + + assert(res->target == PIPE_BUFFER); + assert(buf); + assert(size % data_size == 0); + + SWR_SURFACE_STATE &swr_buffer = buf->swr; + + ctx->dirty |= SWR_NEW_FRAMEBUFFER | SWR_NEW_SCISSOR; +} +#endif + + +void +swr_clear_init(struct pipe_context *pipe) +{ + pipe->clear = swr_clear; +#if 0 // XXX, these don't get called. how to get these called? Do we need + // them? Docs? + pipe->clear_render_target = swr_clear_render_target; + pipe->clear_depth_stencil = swr_clear_depth_stencil; + pipe->clear_buffer = swr_clear_buffer; +#endif +} diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp new file mode 100644 index 0000000..6269cd0 --- /dev/null +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -0,0 +1,392 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +extern "C" { +#include "util/u_transfer.h" +#include "util/u_surface.h" +} + +#include "swr_context.h" +#include "swr_memory.h" +#include "swr_screen.h" +#include "swr_resource.h" +#include "swr_scratch.h" +#include "swr_query.h" + +#include "api.h" + +static struct pipe_surface * +swr_create_surface(struct pipe_context *pipe, + struct pipe_resource *pt, + const struct pipe_surface *surf_tmpl) +{ + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (ps) { + pipe_reference_init(&ps->reference, 1); + pipe_resource_reference(&ps->texture, pt); + ps->context = pipe; + ps->format = surf_tmpl->format; + if (pt->target != PIPE_BUFFER) { + assert(surf_tmpl->u.tex.level <= pt->last_level); + ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); + ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); + ps->u.tex.level = surf_tmpl->u.tex.level; + ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; + ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; + if (ps->u.tex.first_layer != ps->u.tex.last_layer) { + debug_printf("creating surface with multiple layers, rendering " + "to first layer only\n"); + } + } else { + /* setting width as number of elements should get us correct + * renderbuffer width */ + ps->width = surf_tmpl->u.buf.last_element + - surf_tmpl->u.buf.first_element + 1; + ps->height = pt->height0; + ps->u.buf.first_element = surf_tmpl->u.buf.first_element; + ps->u.buf.last_element = surf_tmpl->u.buf.last_element; + assert(ps->u.buf.first_element <= ps->u.buf.last_element); + assert(ps->u.buf.last_element < ps->width); + } + } + return ps; +} + +static void +swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf) +{ + assert(surf->texture); + struct pipe_resource *resource = surf->texture; + + /* If the surface being destroyed is a current render target, + * call StoreTiles to resolve the hotTile state then set attachment + * to NULL. + */ + if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL + | PIPE_BIND_DISPLAY_TARGET)) { + struct swr_context *ctx = swr_context(pipe); + struct swr_resource *spr = swr_resource(resource); + for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) + if (ctx->current.attachment[i] == &spr->swr) { + swr_store_render_target(ctx, i, SWR_TILE_RESOLVED); + ctx->current.attachment[i] = nullptr; + /* + * Mesa thinks depth/stencil are fused, so we'll never get an + * explicit resource for stencil. So, if checking depth, then + * also + * check for stencil. + */ + if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) { + swr_store_render_target( + ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_RESOLVED); + ctx->current.attachment[SWR_ATTACHMENT_STENCIL] = nullptr; + } + + SwrWaitForIdle(ctx->swrContext); + break; + } + } + + pipe_resource_reference(&surf->texture, NULL); + FREE(surf); +} + + +static void * +swr_transfer_map(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **transfer) +{ + struct swr_resource *spr = swr_resource(resource); + struct pipe_transfer *pt; + enum pipe_format format = resource->format; + + assert(resource); + assert(level <= resource->last_level); + + /* + * If mapping any attached rendertarget, store tiles and wait for idle + * before giving CPU access to the surface. + * (set postStoreTileState to SWR_TILE_INVALID so tiles are reloaded) + */ + if (resource->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL + | PIPE_BIND_DISPLAY_TARGET)) { + struct swr_context *ctx = swr_context(pipe); + for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++) + if (ctx->current.attachment[i] == &spr->swr) { + swr_store_render_target(ctx, i, SWR_TILE_INVALID); + /* + * Mesa thinks depth/stencil are fused, so we'll never get an + * explicit map for stencil. So, if mapping depth, then also + * store tile for stencil. + */ + if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) + swr_store_render_target( + ctx, SWR_ATTACHMENT_STENCIL, SWR_TILE_INVALID); + SwrWaitForIdle(ctx->swrContext); + break; + } + } + + + pt = CALLOC_STRUCT(pipe_transfer); + if (!pt) + return NULL; + pipe_resource_reference(&pt->resource, resource); + pt->level = level; + pt->box = *box; + pt->stride = spr->row_stride[level]; + pt->layer_stride = spr->img_stride[level]; + + /* if we're mapping the depth/stencil, copy in stencil */ + if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT + && spr->has_stencil) { + for (unsigned i = 0; i < spr->alignedWidth * spr->alignedHeight; i++) { + spr->swr.pBaseAddress[4 * i + 3] = spr->secondary.pBaseAddress[i]; + } + } + + unsigned offset = box->z * pt->layer_stride + box->y * pt->stride + + box->x * util_format_get_blocksize(format); + + *transfer = pt; + + return spr->swr.pBaseAddress + offset + spr->mip_offsets[level]; +} + +static void +swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer) +{ + assert(transfer->resource); + + /* + * XXX TODO: use fences and come up with a real resource manager. + * + * If this resource has been mapped/unmapped, it's probably in use. Tag it + *with this context so + * we'll know to check dependencies when it's deleted. + */ + struct swr_resource *res = swr_resource(transfer->resource); + res->bound_to_context = (void *)pipe; + + /* if we're mapping the depth/stencil, copy out stencil */ + if (res->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT + && res->has_stencil) { + for (unsigned i = 0; i < res->alignedWidth * res->alignedHeight; i++) { + res->secondary.pBaseAddress[i] = res->swr.pBaseAddress[4 * i + 3]; + } + } + + pipe_resource_reference(&transfer->resource, NULL); + FREE(transfer); +} + + +static void +swr_resource_copy(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, + unsigned dsty, + unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) + || (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) { + util_resource_copy_region( + pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box); + return; + } + + debug_printf("unhandled swr_resource_copy\n"); +} + + +static void +swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) +{ + struct swr_context *ctx = swr_context(pipe); + struct pipe_blit_info info = *blit_info; + + if (blit_info->render_condition_enable && !swr_check_render_cond(pipe)) + return; + + if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1 + && !util_format_is_depth_or_stencil(info.src.resource->format) + && !util_format_is_pure_integer(info.src.resource->format)) { + debug_printf("swr: color resolve unimplemented\n"); + return; + } + + if (util_try_blit_via_copy_region(pipe, &info)) { + return; /* done */ + } + + if (info.mask & PIPE_MASK_S) { + debug_printf("swr: cannot blit stencil, skipping\n"); + info.mask &= ~PIPE_MASK_S; + } + + if (!util_blitter_is_blit_supported(ctx->blitter, &info)) { + debug_printf("swr: blit unsupported %s -> %s\n", + util_format_short_name(info.src.resource->format), + util_format_short_name(info.dst.resource->format)); + return; + } + + /* XXX turn off occlusion and streamout queries */ + + util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer); + util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems); + util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs); + /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/ + util_blitter_save_so_targets( + ctx->blitter, + ctx->num_so_targets, + (struct pipe_stream_output_target **)ctx->so_targets); + util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer); + util_blitter_save_viewport(ctx->blitter, &ctx->viewport); + util_blitter_save_scissor(ctx->blitter, &ctx->scissor); + util_blitter_save_fragment_shader(ctx->blitter, ctx->fs); + util_blitter_save_blend(ctx->blitter, (void *)ctx->blend); + util_blitter_save_depth_stencil_alpha(ctx->blitter, + (void *)ctx->depth_stencil); + util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref); + util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask); + util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer); + util_blitter_save_fragment_sampler_states( + ctx->blitter, + ctx->num_samplers[PIPE_SHADER_FRAGMENT], + (void **)ctx->samplers[PIPE_SHADER_FRAGMENT]); + util_blitter_save_fragment_sampler_views( + ctx->blitter, + ctx->num_sampler_views[PIPE_SHADER_FRAGMENT], + ctx->sampler_views[PIPE_SHADER_FRAGMENT]); + util_blitter_save_render_condition(ctx->blitter, + ctx->render_cond_query, + ctx->render_cond_cond, + ctx->render_cond_mode); + + util_blitter_blit(ctx->blitter, &info); +} + + +static void +swr_destroy(struct pipe_context *pipe) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->blitter) + util_blitter_destroy(ctx->blitter); + + if (ctx->swrContext) + SwrDestroyContext(ctx->swrContext); + + delete ctx->blendJIT; + + swr_destroy_scratch_buffers(ctx); + + FREE(ctx); +} + + +static void +swr_render_condition(struct pipe_context *pipe, + struct pipe_query *query, + boolean condition, + uint mode) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->render_cond_query = query; + ctx->render_cond_mode = mode; + ctx->render_cond_cond = condition; +} + + +struct pipe_context * +swr_create_context(struct pipe_screen *screen, void *priv) +{ + struct swr_context *ctx = CALLOC_STRUCT(swr_context); + ctx->blendJIT = + new std::unordered_map; + + SWR_CREATECONTEXT_INFO createInfo; + createInfo.driver = GL; + createInfo.privateStateSize = sizeof(swr_draw_context); + createInfo.pfnLoadTile = swr_LoadHotTile; + createInfo.pfnStoreTile = swr_StoreHotTile; + createInfo.pfnClearTile = swr_StoreHotTileClear; + ctx->swrContext = SwrCreateContext(&createInfo); + + /* Init Load/Store/ClearTiles Tables */ + swr_InitMemoryModule(); + + if (ctx->swrContext == NULL) + goto fail; + + ctx->pipe.screen = screen; + ctx->pipe.destroy = swr_destroy; + ctx->pipe.priv = priv; + ctx->pipe.create_surface = swr_create_surface; + ctx->pipe.surface_destroy = swr_surface_destroy; + ctx->pipe.transfer_map = swr_transfer_map; + ctx->pipe.transfer_unmap = swr_transfer_unmap; + + ctx->pipe.transfer_flush_region = u_default_transfer_flush_region; + ctx->pipe.transfer_inline_write = u_default_transfer_inline_write; + + ctx->pipe.resource_copy_region = swr_resource_copy; + ctx->pipe.render_condition = swr_render_condition; + + swr_state_init(&ctx->pipe); + swr_clear_init(&ctx->pipe); + swr_draw_init(&ctx->pipe); + swr_query_init(&ctx->pipe); + + ctx->pipe.blit = swr_blit; + ctx->blitter = util_blitter_create(&ctx->pipe); + if (!ctx->blitter) { + goto fail; + } + + swr_init_scratch_buffers(ctx); + + return &ctx->pipe; + +fail: + /* Should really validate the init steps and fail gracefully */ + swr_destroy(&ctx->pipe); + return NULL; +} diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h new file mode 100644 index 0000000..9d93a6d --- /dev/null +++ b/src/gallium/drivers/swr/swr_context.h @@ -0,0 +1,172 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_CONTEXT_H +#define SWR_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_blitter.h" +#include "jit_api.h" +#include "swr_state.h" +#include + +#define SWR_NEW_BLEND (1 << 0) +#define SWR_NEW_RASTERIZER (1 << 1) +#define SWR_NEW_DEPTH_STENCIL_ALPHA (1 << 2) +#define SWR_NEW_SAMPLER (1 << 3) +#define SWR_NEW_SAMPLER_VIEW (1 << 4) +#define SWR_NEW_VS (1 << 5) +#define SWR_NEW_FS (1 << 6) +#define SWR_NEW_VSCONSTANTS (1 << 7) +#define SWR_NEW_FSCONSTANTS (1 << 8) +#define SWR_NEW_VERTEX (1 << 9) +#define SWR_NEW_STIPPLE (1 << 10) +#define SWR_NEW_SCISSOR (1 << 11) +#define SWR_NEW_VIEWPORT (1 << 12) +#define SWR_NEW_FRAMEBUFFER (1 << 13) +#define SWR_NEW_CLIP (1 << 14) +#define SWR_NEW_SO (1 << 15) +#define SWR_NEW_ALL 0x0000ffff + +namespace std +{ +template <> struct hash { + std::size_t operator()(const BLEND_COMPILE_STATE &k) const + { + return util_hash_crc32(&k, sizeof(k)); + } +}; +}; + +struct swr_context { + struct pipe_context pipe; /**< base class */ + + HANDLE swrContext; + + /** Constant state objects */ + struct swr_blend_state *blend; + struct pipe_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; + struct pipe_depth_stencil_alpha_state *depth_stencil; + struct pipe_rasterizer_state *rasterizer; + + struct swr_vertex_shader *vs; + struct swr_fragment_shader *fs; + struct swr_vertex_element_state *velems; + + /** Other rendering state */ + struct pipe_blend_color blend_color; + struct pipe_stencil_ref stencil_ref; + struct pipe_clip_state clip; + struct pipe_constant_buffer + constants[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_sampler_view * + sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS]; + + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_index_buffer index_buffer; + + struct blitter_context *blitter; + + /** Conditional query object and mode */ + struct pipe_query *render_cond_query; + uint render_cond_mode; + boolean render_cond_cond; + unsigned active_queries; + + unsigned num_vertex_buffers; + unsigned num_samplers[PIPE_SHADER_TYPES]; + unsigned num_sampler_views[PIPE_SHADER_TYPES]; + + unsigned sample_mask; + + // streamout + pipe_stream_output_target *so_targets[MAX_SO_STREAMS]; + uint32_t num_so_targets; + + /* Temp storage for user_buffer constants */ + struct swr_scratch_buffers *scratch; + + // blend jit functions + std::unordered_map *blendJIT; + + /* Shadows of current SWR API DrawState */ + struct swr_shadow_state current; + + unsigned dirty; /**< Mask of SWR_NEW_x flags */ +}; + +struct swr_jit_texture { + uint32_t width; // same as number of elements + uint32_t height; + uint32_t depth; // doubles as array size + uint32_t first_level; + uint32_t last_level; + const void *base_ptr; + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS]; + uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; +}; + +struct swr_jit_sampler { + float min_lod; + float max_lod; + float lod_bias; + float border_color[4]; +}; + +struct swr_draw_context { + const float *constantVS[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS]; + const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS]; + + swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS]; + swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS]; + + SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS]; +}; + + +static INLINE struct swr_context * +swr_context(struct pipe_context *pipe) +{ + return (struct swr_context *)pipe; +} + +struct pipe_context *swr_create_context(struct pipe_screen *, void *priv); + +void swr_state_init(struct pipe_context *pipe); + +void swr_clear_init(struct pipe_context *pipe); + +void swr_draw_init(struct pipe_context *pipe); + +void swr_finish(struct pipe_context *pipe); +#endif diff --git a/src/gallium/drivers/swr/swr_context_llvm.h b/src/gallium/drivers/swr/swr_context_llvm.h new file mode 100644 index 0000000..58da813 --- /dev/null +++ b/src/gallium/drivers/swr/swr_context_llvm.h @@ -0,0 +1,124 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#pragma once + +////////////////////////////////////////////////////////////////////////// +/// Generate LLVM type information for swr_jit_texture +INLINE static StructType * +Gen_swr_jit_texture(JitManager *pShG) +{ + LLVMContext &ctx = pShG->mContext; + std::vector members; + + members.push_back(Type::getInt32Ty(ctx)); // width + members.push_back(Type::getInt32Ty(ctx)); // height + members.push_back(Type::getInt32Ty(ctx)); // depth + members.push_back(Type::getInt32Ty(ctx)); // first_level + members.push_back(Type::getInt32Ty(ctx)); // last_level + members.push_back(PointerType::get(Type::getInt8Ty(ctx), 0)); // base_ptr + members.push_back(ArrayType::get(Type::getInt32Ty(ctx), + PIPE_MAX_TEXTURE_LEVELS)); // row_stride + members.push_back(ArrayType::get(Type::getInt32Ty(ctx), + PIPE_MAX_TEXTURE_LEVELS)); // img_stride + members.push_back(ArrayType::get(Type::getInt32Ty(ctx), + PIPE_MAX_TEXTURE_LEVELS)); // mip_offsets + + return StructType::get(ctx, members, false); +} + +static const UINT swr_jit_texture_width = 0; +static const UINT swr_jit_texture_height = 1; +static const UINT swr_jit_texture_depth = 2; +static const UINT swr_jit_texture_first_level = 3; +static const UINT swr_jit_texture_last_level = 4; +static const UINT swr_jit_texture_base_ptr = 5; +static const UINT swr_jit_texture_row_stride = 6; +static const UINT swr_jit_texture_img_stride = 7; +static const UINT swr_jit_texture_mip_offsets = 8; + +////////////////////////////////////////////////////////////////////////// +/// Generate LLVM type information for swr_jit_sampler +INLINE static StructType * +Gen_swr_jit_sampler(JitManager *pShG) +{ + LLVMContext &ctx = pShG->mContext; + std::vector members; + + members.push_back(Type::getFloatTy(ctx)); // min_lod + members.push_back(Type::getFloatTy(ctx)); // max_lod + members.push_back(Type::getFloatTy(ctx)); // lod_bias + members.push_back( + ArrayType::get(Type::getFloatTy(ctx), 4)); // border_color + + return StructType::get(ctx, members, false); +} + +static const UINT swr_jit_sampler_min_lod = 0; +static const UINT swr_jit_sampler_max_lod = 1; +static const UINT swr_jit_sampler_lod_bias = 2; +static const UINT swr_jit_sampler_border_color = 3; + +////////////////////////////////////////////////////////////////////////// +/// Generate LLVM type information for swr_draw_context +INLINE static StructType * +Gen_swr_draw_context(JitManager *pShG) +{ + LLVMContext &ctx = pShG->mContext; + std::vector members; + + members.push_back( + ArrayType::get(PointerType::get(Type::getFloatTy(ctx), 0), + PIPE_MAX_CONSTANT_BUFFERS)); // constantVS + members.push_back(ArrayType::get( + Type::getInt32Ty(ctx), PIPE_MAX_CONSTANT_BUFFERS)); // num_constantsVS + members.push_back( + ArrayType::get(PointerType::get(Type::getFloatTy(ctx), 0), + PIPE_MAX_CONSTANT_BUFFERS)); // constantFS + members.push_back(ArrayType::get( + Type::getInt32Ty(ctx), PIPE_MAX_CONSTANT_BUFFERS)); // num_constantsFS + members.push_back( + ArrayType::get(Gen_swr_jit_texture(pShG), + PIPE_MAX_SHADER_SAMPLER_VIEWS)); // texturesVS + members.push_back(ArrayType::get(Gen_swr_jit_sampler(pShG), + PIPE_MAX_SAMPLERS)); // samplersVS + members.push_back( + ArrayType::get(Gen_swr_jit_texture(pShG), + PIPE_MAX_SHADER_SAMPLER_VIEWS)); // texturesFS + members.push_back(ArrayType::get(Gen_swr_jit_sampler(pShG), + PIPE_MAX_SAMPLERS)); // samplersFS + members.push_back(ArrayType::get(Gen_SWR_SURFACE_STATE(pShG), + SWR_NUM_ATTACHMENTS)); // renderTargets + + return StructType::get(ctx, members, false); +} + +static const UINT swr_draw_context_constantVS = 0; +static const UINT swr_draw_context_num_constantsVS = 1; +static const UINT swr_draw_context_constantFS = 2; +static const UINT swr_draw_context_num_constantsFS = 3; +static const UINT swr_draw_context_texturesVS = 4; +static const UINT swr_draw_context_samplersVS = 5; +static const UINT swr_draw_context_texturesFS = 6; +static const UINT swr_draw_context_samplersFS = 7; +static const UINT swr_draw_context_renderTargets = 8; diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp new file mode 100644 index 0000000..797ebdc --- /dev/null +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -0,0 +1,277 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "swr_screen.h" +#include "swr_context.h" +#include "swr_resource.h" +#include "swr_fence.h" +#include "swr_query.h" +#include "jit_api.h" + +#include "util/u_draw.h" +#include "util/u_prim.h" + +/* + * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY + */ +static INLINE enum PRIMITIVE_TOPOLOGY +swr_convert_prim_topology(const unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: + return TOP_POINT_LIST; + case PIPE_PRIM_LINES: + return TOP_LINE_LIST; + case PIPE_PRIM_LINE_LOOP: + return TOP_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: + return TOP_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: + return TOP_TRIANGLE_LIST; + case PIPE_PRIM_TRIANGLE_STRIP: + return TOP_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: + return TOP_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: + return TOP_QUAD_LIST; + case PIPE_PRIM_QUAD_STRIP: + return TOP_QUAD_STRIP; + case PIPE_PRIM_POLYGON: + return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */ + case PIPE_PRIM_LINES_ADJACENCY: + return TOP_LINE_LIST_ADJ; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return TOP_LISTSTRIP_ADJ; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return TOP_TRI_LIST_ADJ; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return TOP_TRI_STRIP_ADJ; + default: + assert(0 && "Unknown topology"); + return TOP_UNKNOWN; + } +}; + + +/* + * Draw vertex arrays, with optional indexing, optional instancing. + */ +static void +swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) +{ + struct swr_context *ctx = swr_context(pipe); + + if (!swr_check_render_cond(pipe)) + return; + + if (info->indirect) { + util_draw_indirect(pipe, info); + return; + } + + /* Update derived state, pass draw info to update function */ + if (ctx->dirty) + swr_update_derived(ctx, info); + + if (ctx->vs->pipe.stream_output.num_outputs) { + if (!ctx->vs->soFunc[info->mode]) { + STREAMOUT_COMPILE_STATE state = {0}; + struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output; + + state.numVertsPerPrim = u_vertices_per_prim(info->mode); + + uint32_t offsets[MAX_SO_STREAMS] = {0}; + uint32_t num = 0; + + for (uint32_t i = 0; i < so->num_outputs; i++) { + assert(so->output[i].stream == 0); // @todo + uint32_t output_buffer = so->output[i].output_buffer; + if (so->output[i].dst_offset != offsets[output_buffer]) { + // hole - need to fill + state.stream.decl[num].bufferIndex = output_buffer; + state.stream.decl[num].hole = true; + state.stream.decl[num].componentMask = + (1 << (so->output[i].dst_offset - offsets[output_buffer])) + - 1; + num++; + offsets[output_buffer] = so->output[i].dst_offset; + } + + state.stream.decl[num].bufferIndex = output_buffer; + state.stream.decl[num].attribSlot = so->output[i].register_index - 1; + state.stream.decl[num].componentMask = + ((1 << so->output[i].num_components) - 1) + << so->output[i].start_component; + state.stream.decl[num].hole = false; + num++; + + offsets[output_buffer] += so->output[i].num_components; + } + + state.stream.numDecls = num; + + HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr; + ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state); + debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]); + assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL"); + } + + SwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0); + } + + struct swr_vertex_element_state *velems = ctx->velems; + if (!velems->fsFunc + || (velems->fsState.cutIndex != info->restart_index) + || (velems->fsState.bEnableCutIndex != info->primitive_restart)) { + + velems->fsState.cutIndex = info->restart_index; + velems->fsState.bEnableCutIndex = info->primitive_restart; + + /* Create Fetch Shader */ + HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr; + velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState); + + debug_printf("fetch shader %p\n", velems->fsFunc); + assert(velems->fsFunc && "Error: FetchShader = NULL"); + } + + SwrSetFetchFunc(ctx->swrContext, velems->fsFunc); + + if (info->indexed) + SwrDrawIndexedInstanced(ctx->swrContext, + swr_convert_prim_topology(info->mode), + info->count, + info->instance_count, + info->start, + info->index_bias, + info->start_instance); + else + SwrDrawInstanced(ctx->swrContext, + swr_convert_prim_topology(info->mode), + info->count, + info->instance_count, + info->start, + info->start_instance); +} + + +static void +swr_flush(struct pipe_context *pipe, + struct pipe_fence_handle **fence, + unsigned flags) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_screen *screen = swr_screen(pipe->screen); + + /* If the current renderTarget is the display surface, store tiles back to + * the surface, in + * preparation for present (swr_flush_frontbuffer) + */ + struct pipe_surface *cb = ctx->framebuffer.cbufs[0]; + if (cb && swr_resource(cb->texture)->display_target) + swr_store_render_target(ctx, SWR_ATTACHMENT_COLOR0, SWR_TILE_RESOLVED); + + // SwrStoreTiles is asynchronous, always submit the "flush" fence. + // flush_frontbuffer needs it. + swr_fence_submit(ctx, screen->flush_fence); + + if (fence) + swr_fence_reference(pipe->screen, fence, screen->flush_fence); +} + +void +swr_finish(struct pipe_context *pipe) +{ + struct swr_screen *screen = swr_screen(pipe->screen); + struct pipe_fence_handle *fence = NULL; + + swr_flush(pipe, &fence, 0); + swr_fence_finish(&screen->base, fence, 0); + swr_fence_reference(&screen->base, &fence, NULL); +} + + +/* + * Store SWR HotTiles back to RenderTarget surface. + */ +void +swr_store_render_target(struct swr_context *ctx, + uint32_t attachment, + enum SWR_TILE_STATE post_tile_state, + struct SWR_SURFACE_STATE *surface) +{ + struct swr_draw_context *pDC = + (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); + struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment]; + + /* If the passed in surface isn't already attached, it will be attached and + * then restored. */ + if (surface && (surface != ctx->current.attachment[attachment])) + *renderTarget = *surface; + + /* Only proceed if there's a valid surface to store to */ + if (renderTarget->pBaseAddress) { + /* Set viewport to full renderTarget width/height and disable scissor + * before StoreTiles */ + boolean change_viewport = + (ctx->current.vp.x != 0.0f || ctx->current.vp.y != 0.0f + || ctx->current.vp.width != renderTarget->width + || ctx->current.vp.height != renderTarget->height); + if (change_viewport) { + SWR_VIEWPORT vp = {0}; + vp.width = renderTarget->width; + vp.height = renderTarget->height; + SwrSetViewports(ctx->swrContext, 1, &vp, NULL); + } + + boolean scissor_enable = ctx->current.rastState.scissorEnable; + if (scissor_enable) { + ctx->current.rastState.scissorEnable = FALSE; + SwrSetRastState(ctx->swrContext, &ctx->current.rastState); + } + + SwrStoreTiles(ctx->swrContext, + (enum SWR_RENDERTARGET_ATTACHMENT)attachment, + post_tile_state); + + /* Restore viewport and scissor enable */ + if (change_viewport) + SwrSetViewports(ctx->swrContext, 1, &ctx->current.vp, &ctx->current.vpm); + if (scissor_enable) { + ctx->current.rastState.scissorEnable = scissor_enable; + SwrSetRastState(ctx->swrContext, &ctx->current.rastState); + } + + /* Restore surface attachment, if changed */ + if (surface && (surface != ctx->current.attachment[attachment])) + *renderTarget = *ctx->current.attachment[attachment]; + } +} + + +void +swr_draw_init(struct pipe_context *pipe) +{ + pipe->draw_vbo = swr_draw_vbo; + pipe->flush = swr_flush; +} diff --git a/src/gallium/drivers/swr/swr_fence.cpp b/src/gallium/drivers/swr/swr_fence.cpp new file mode 100644 index 0000000..aaf7223 --- /dev/null +++ b/src/gallium/drivers/swr/swr_fence.cpp @@ -0,0 +1,141 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "os/os_time.h" + +#include "swr_context.h" +#include "swr_screen.h" +#include "swr_fence.h" + + +/* + * Fence callback, called by back-end thread on completion of all rendering up + * to SwrSync call. + */ +static void +swr_sync_cb(UINT64 userData, UINT64 userData2) +{ + struct swr_fence *fence = (struct swr_fence *)userData; + + fence->read = fence->write; +} + +/* + * Submit an existing fence. + */ +void +swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fh) +{ + struct swr_fence *fence = swr_fence(fh); + + fence->write++; + SwrSync(ctx->swrContext, swr_sync_cb, (UINT64)fence, 0); +} + +/* + * Create a new fence object. + */ +struct pipe_fence_handle * +swr_fence_create() +{ + static int fence_id = 0; + struct swr_fence *fence = CALLOC_STRUCT(swr_fence); + if (!fence) + return NULL; + + memset(fence, 0, sizeof(*fence)); + pipe_reference_init(&fence->reference, 1); + fence->id = fence_id++; + + return (struct pipe_fence_handle *)fence; +} + +/** Destroy a fence. Called when refcount hits zero. */ +static void +swr_fence_destroy(struct swr_fence *fence) +{ + FREE(fence); +} + +/** + * Set ptr = fence, with reference counting + */ +void +swr_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *f) +{ + struct swr_fence *fence = swr_fence(f); + struct swr_fence *old; + + if (likely(ptr)) { + old = swr_fence(*ptr); + *ptr = f; + } else { + old = NULL; + } + + if (pipe_reference(&old->reference, &fence->reference)) + swr_fence_destroy(old); +} + +/* + * Wait for the fence to finish. + */ +boolean +swr_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + uint64_t timeout) +{ + struct swr_fence *fence = swr_fence(fence_handle); + + while (!swr_is_fence_done(fence)) + sched_yield(); + + return TRUE; +} + + +uint64_t +swr_get_timestamp(struct pipe_screen *screen) +{ + return os_time_get_nano(); +} + + +void +swr_fence_init(struct pipe_screen *p_screen) +{ + p_screen->fence_reference = swr_fence_reference; + p_screen->fence_finish = swr_fence_finish; + + p_screen->get_timestamp = swr_get_timestamp; + + /* + * Create persistant "flush" fence, submitted when swr_flush is called. + */ + struct swr_screen *screen = swr_screen(p_screen); + screen->flush_fence = swr_fence_create(); +} diff --git a/src/gallium/drivers/swr/swr_fence.h b/src/gallium/drivers/swr/swr_fence.h new file mode 100644 index 0000000..317d74c --- /dev/null +++ b/src/gallium/drivers/swr/swr_fence.h @@ -0,0 +1,73 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_FENCE_H +#define SWR_FENCE_H + + +#include "os/os_thread.h" +#include "pipe/p_state.h" +#include "util/u_inlines.h" + + +struct pipe_screen; + +struct swr_fence { + struct pipe_reference reference; + + uint64_t read; + uint64_t write; + + unsigned id; /* Just for reference */ +}; + + +static inline struct swr_fence * +swr_fence(struct pipe_fence_handle *fence) +{ + return (struct swr_fence *)fence; +} + +static INLINE boolean +swr_is_fence_done(struct swr_fence *fence) +{ + return (fence->read == fence->write); +} + + +void swr_fence_init(struct pipe_screen *screen); + +struct pipe_fence_handle *swr_fence_create(); + +void swr_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *f); + +boolean swr_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence_handle, + uint64_t timeout); + +void +swr_fence_submit(struct swr_context *ctx, struct pipe_fence_handle *fence); + +uint64_t swr_get_timestamp(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/drivers/swr/swr_memory.h b/src/gallium/drivers/swr/swr_memory.h new file mode 100644 index 0000000..d116781 --- /dev/null +++ b/src/gallium/drivers/swr/swr_memory.h @@ -0,0 +1,99 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#pragma once + +void LoadHotTile( + SWR_SURFACE_STATE *pSrcSurface, + SWR_FORMAT dstFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, UINT y, uint32_t renderTargetArrayIndex, + BYTE *pDstHotTile); + +void StoreHotTile( + SWR_SURFACE_STATE *pDstSurface, + SWR_FORMAT srcFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, UINT y, uint32_t renderTargetArrayIndex, + BYTE *pSrcHotTile); + +void StoreHotTileClear( + SWR_SURFACE_STATE *pDstSurface, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, + UINT y, + const float* pClearColor); + +INLINE void +swr_LoadHotTile(HANDLE hPrivateContext, + SWR_FORMAT dstFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, UINT y, + uint32_t renderTargetArrayIndex, BYTE* pDstHotTile) +{ + // Grab source surface state from private context + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + SWR_SURFACE_STATE *pSrcSurface = &pDC->renderTargets[renderTargetIndex]; + + LoadHotTile(pSrcSurface, dstFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pDstHotTile); +} + +INLINE void +swr_StoreHotTile(HANDLE hPrivateContext, + SWR_FORMAT srcFormat, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, UINT y, + uint32_t renderTargetArrayIndex, BYTE* pSrcHotTile) +{ + // Grab destination surface state from private context + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex]; + + StoreHotTile(pDstSurface, srcFormat, renderTargetIndex, x, y, renderTargetArrayIndex, pSrcHotTile); +} + +INLINE void +swr_StoreHotTileClear(HANDLE hPrivateContext, + SWR_RENDERTARGET_ATTACHMENT renderTargetIndex, + UINT x, + UINT y, + const float* pClearColor) +{ + // Grab destination surface state from private context + swr_draw_context *pDC = (swr_draw_context*)hPrivateContext; + SWR_SURFACE_STATE *pDstSurface = &pDC->renderTargets[renderTargetIndex]; + + StoreHotTileClear(pDstSurface, renderTargetIndex, x, y, pClearColor); +} + +void InitSimLoadTilesTable(); +void InitSimStoreTilesTable(); +void InitSimClearTilesTable(); + +/* Init Load/Store/ClearTiles Tables */ +INLINE void swr_InitMemoryModule() +{ + InitSimLoadTilesTable(); + InitSimStoreTilesTable(); + InitSimClearTilesTable(); +} diff --git a/src/gallium/drivers/swr/swr_public.h b/src/gallium/drivers/swr/swr_public.h new file mode 100644 index 0000000..4d56ead --- /dev/null +++ b/src/gallium/drivers/swr/swr_public.h @@ -0,0 +1,40 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_PUBLIC_H +#define SWR_PUBLIC_H + +struct pipe_screen; +struct sw_winsys; + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_screen *swr_create_screen(struct sw_winsys *winsys); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/swr/swr_query.cpp b/src/gallium/drivers/swr/swr_query.cpp new file mode 100644 index 0000000..2510b3a --- /dev/null +++ b/src/gallium/drivers/swr/swr_query.cpp @@ -0,0 +1,334 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "os/os_time.h" +#include "swr_context.h" +#include "swr_fence.h" +#include "swr_query.h" +#include "swr_screen.h" +#include "swr_state.h" + + +static struct swr_query * +swr_query(struct pipe_query *p) +{ + return (struct swr_query *)p; +} + +static struct pipe_query * +swr_create_query(struct pipe_context *pipe, unsigned type, unsigned index) +{ + struct swr_query *pq; + + assert(type < PIPE_QUERY_TYPES); + assert(index < MAX_SO_STREAMS); + + pq = CALLOC_STRUCT(swr_query); + + if (pq) { + pq->type = type; + pq->index = index; + } + + return (struct pipe_query *)pq; +} + + +static void +swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct swr_query *pq = swr_query(q); + + if (pq->fence) { + if (!swr_is_fence_done(swr_fence(pq->fence))) { + swr_fence_submit(swr_context(pipe), pq->fence); + swr_fence_finish(pipe->screen, pq->fence, 0); + } + swr_fence_reference(pipe->screen, &pq->fence, NULL); + } + + FREE(pq); +} + + +// XXX Create a fence callback, rather than stalling SwrWaitForIdle +static void +swr_gather_stats(struct pipe_context *pipe, struct swr_query *pq) +{ + struct swr_context *ctx = swr_context(pipe); + + assert(pq->result); + union pipe_query_result *result = pq->result; + boolean enable_stats = pq->enable_stats; + SWR_STATS swr_stats = {0}; + + if (pq->fence) { + if (!swr_is_fence_done(swr_fence(pq->fence))) { + swr_fence_submit(ctx, pq->fence); + swr_fence_finish(pipe->screen, pq->fence, 0); + } + swr_fence_reference(pipe->screen, &pq->fence, NULL); + } + + /* + * These queries don't need SWR Stats enabled in the core + * Set and return. + */ + switch (pq->type) { + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + result->u64 = swr_get_timestamp(pipe->screen); + return; + break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: + /* nothing to do here */ + return; + break; + case PIPE_QUERY_GPU_FINISHED: + result->b = TRUE; /* XXX TODO Add an api func to SWR to compare drawId + vs LastRetiredId? */ + return; + break; + default: + /* Any query that needs SwrCore stats */ + break; + } + + /* + * All other results are collected from SwrCore counters + */ + + /* XXX, Should turn this into a fence callback and skip the stall */ + SwrGetStats(ctx->swrContext, &swr_stats); + /* SwrGetStats returns immediately, wait for collection */ + SwrWaitForIdle(ctx->swrContext); + + switch (pq->type) { + case PIPE_QUERY_OCCLUSION_PREDICATE: + case PIPE_QUERY_OCCLUSION_COUNTER: + result->u64 = swr_stats.DepthPassCount; + break; + case PIPE_QUERY_PRIMITIVES_GENERATED: + result->u64 = swr_stats.IaPrimitives; + break; + case PIPE_QUERY_PRIMITIVES_EMITTED: + result->u64 = swr_stats.SoNumPrimsWritten[pq->index]; + break; + case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { + struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; + so_stats->num_primitives_written = + swr_stats.SoNumPrimsWritten[pq->index]; + so_stats->primitives_storage_needed = + swr_stats.SoPrimStorageNeeded[pq->index]; + } break; + case PIPE_QUERY_PIPELINE_STATISTICS: { + struct pipe_query_data_pipeline_statistics *p_stats = + &result->pipeline_statistics; + p_stats->ia_vertices = swr_stats.IaVertices; + p_stats->ia_primitives = swr_stats.IaPrimitives; + p_stats->vs_invocations = swr_stats.VsInvocations; + p_stats->gs_invocations = swr_stats.GsInvocations; + p_stats->gs_primitives = swr_stats.GsPrimitives; + p_stats->c_invocations = swr_stats.CPrimitives; + p_stats->c_primitives = swr_stats.CPrimitives; + p_stats->ps_invocations = swr_stats.PsInvocations; + p_stats->hs_invocations = swr_stats.HsInvocations; + p_stats->ds_invocations = swr_stats.DsInvocations; + p_stats->cs_invocations = swr_stats.CsInvocations; + } break; + default: + assert(0 && "Unsupported query"); + break; + } + + /* Only change stat collection if there are no active queries */ + if (ctx->active_queries == 0) + SwrEnableStats(ctx->swrContext, enable_stats); +} + + +static boolean +swr_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + union pipe_query_result *result) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_query *pq = swr_query(q); + + if (pq->fence) { + if (!swr_is_fence_done(swr_fence(pq->fence))) { + swr_fence_submit(ctx, pq->fence); + if (!wait) + return FALSE; + swr_fence_finish(pipe->screen, pq->fence, 0); + } + swr_fence_reference(pipe->screen, &pq->fence, NULL); + } + + /* XXX: Need to handle counter rollover */ + + switch (pq->type) { + /* Booleans */ + case PIPE_QUERY_OCCLUSION_PREDICATE: + result->b = pq->end.u64 != pq->start.u64 ? TRUE : FALSE; + break; + case PIPE_QUERY_GPU_FINISHED: + result->b = pq->end.b; + break; + /* Counters */ + case PIPE_QUERY_OCCLUSION_COUNTER: + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_TIME_ELAPSED: + case PIPE_QUERY_PRIMITIVES_GENERATED: + case PIPE_QUERY_PRIMITIVES_EMITTED: + result->u64 = pq->end.u64 - pq->start.u64; + break; + /* Structures */ + case PIPE_QUERY_SO_STATISTICS: { + struct pipe_query_data_so_statistics *so_stats = &result->so_statistics; + struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; + struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; + so_stats->num_primitives_written = + end->num_primitives_written - start->num_primitives_written; + so_stats->primitives_storage_needed = + end->primitives_storage_needed - start->primitives_storage_needed; + } break; + case PIPE_QUERY_TIMESTAMP_DISJOINT: { + /* os_get_time_nano returns nanoseconds */ + result->timestamp_disjoint.frequency = UINT64_C(1000000000); + result->timestamp_disjoint.disjoint = FALSE; + } break; + case PIPE_QUERY_PIPELINE_STATISTICS: { + struct pipe_query_data_pipeline_statistics *p_stats = + &result->pipeline_statistics; + struct pipe_query_data_pipeline_statistics *start = + &pq->start.pipeline_statistics; + struct pipe_query_data_pipeline_statistics *end = + &pq->end.pipeline_statistics; + p_stats->ia_vertices = end->ia_vertices - start->ia_vertices; + p_stats->ia_primitives = end->ia_primitives - start->ia_primitives; + p_stats->vs_invocations = end->vs_invocations - start->vs_invocations; + p_stats->gs_invocations = end->gs_invocations - start->gs_invocations; + p_stats->gs_primitives = end->gs_primitives - start->gs_primitives; + p_stats->c_invocations = end->c_invocations - start->c_invocations; + p_stats->c_primitives = end->c_primitives - start->c_primitives; + p_stats->ps_invocations = end->ps_invocations - start->ps_invocations; + p_stats->hs_invocations = end->hs_invocations - start->hs_invocations; + p_stats->ds_invocations = end->ds_invocations - start->ds_invocations; + p_stats->cs_invocations = end->cs_invocations - start->cs_invocations; + } break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: { + struct pipe_query_data_so_statistics *start = &pq->start.so_statistics; + struct pipe_query_data_so_statistics *end = &pq->end.so_statistics; + uint64_t num_primitives_written = + end->num_primitives_written - start->num_primitives_written; + uint64_t primitives_storage_needed = + end->primitives_storage_needed - start->primitives_storage_needed; + result->b = num_primitives_written > primitives_storage_needed; + } break; + default: + assert(0 && "Unsupported query"); + break; + } + + return TRUE; +} + +static boolean +swr_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_query *pq = swr_query(q); + + /* Initialize Results */ + memset(&pq->start, 0, sizeof(pq->start)); + memset(&pq->end, 0, sizeof(pq->end)); + + /* Gather start stats and enable SwrCore counters */ + pq->result = &pq->start; + pq->enable_stats = TRUE; + swr_gather_stats(pipe, pq); + ctx->active_queries++; + + /* override start timestamp to 0 for TIMESTAMP query */ + if (pq->type == PIPE_QUERY_TIMESTAMP) + pq->start.u64 = 0; + + return true; +} + +static void +swr_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_query *pq = swr_query(q); + + assert(ctx->active_queries + && "swr_end_query, there are no active queries!"); + ctx->active_queries--; + + /* Gather end stats and disable SwrCore counters */ + pq->result = &pq->end; + pq->enable_stats = FALSE; + swr_gather_stats(pipe, pq); +} + + +boolean +swr_check_render_cond(struct pipe_context *pipe) +{ + struct swr_context *ctx = swr_context(pipe); + boolean b, wait; + uint64_t result; + + if (!ctx->render_cond_query) + return TRUE; /* no query predicate, draw normally */ + + wait = (ctx->render_cond_mode == PIPE_RENDER_COND_WAIT + || ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT); + + b = pipe->get_query_result( + pipe, ctx->render_cond_query, wait, (union pipe_query_result *)&result); + if (b) + return (!result == ctx->render_cond_cond); + else + return TRUE; +} + +void +swr_query_init(struct pipe_context *pipe) +{ + struct swr_context *ctx = swr_context(pipe); + + pipe->create_query = swr_create_query; + pipe->destroy_query = swr_destroy_query; + pipe->begin_query = swr_begin_query; + pipe->end_query = swr_end_query; + pipe->get_query_result = swr_get_query_result; + + ctx->active_queries = 0; +} diff --git a/src/gallium/drivers/swr/swr_query.h b/src/gallium/drivers/swr/swr_query.h new file mode 100644 index 0000000..2a2aeee --- /dev/null +++ b/src/gallium/drivers/swr/swr_query.h @@ -0,0 +1,48 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_QUERY_H +#define SWR_QUERY_H + + +#include +#include "os/os_thread.h" + + +struct swr_query { + unsigned type; /* PIPE_QUERY_* */ + unsigned index; + + union pipe_query_result *result; + union pipe_query_result start; + union pipe_query_result end; + + struct pipe_fence_handle *fence; + + boolean enable_stats; +}; + +extern void swr_query_init(struct pipe_context *pipe); + +extern boolean swr_check_render_cond(struct pipe_context *pipe); +#endif diff --git a/src/gallium/drivers/swr/swr_resource.h b/src/gallium/drivers/swr/swr_resource.h new file mode 100644 index 0000000..f7f641e --- /dev/null +++ b/src/gallium/drivers/swr/swr_resource.h @@ -0,0 +1,98 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_RESOURCE_H +#define SWR_RESOURCE_H + +#include "pipe/p_state.h" +#include "api.h" + +struct sw_displaytarget; + +struct swr_resource { + struct pipe_resource base; + + bool has_depth; + bool has_stencil; + + UINT alignedWidth; + UINT alignedHeight; + + SWR_SURFACE_STATE swr; + SWR_SURFACE_STATE secondary; // for faking depth/stencil merged formats + + struct sw_displaytarget *display_target; + + unsigned row_stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned img_stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned mip_offsets[PIPE_MAX_TEXTURE_LEVELS]; + + /* Opaque pointer to swr_context to mark resource in use */ + void *bound_to_context; +}; + + +static INLINE struct swr_resource * +swr_resource(struct pipe_resource *resource) +{ + return (struct swr_resource *)resource; +} + +static INLINE boolean +swr_resource_is_texture(const struct pipe_resource *resource) +{ + switch (resource->target) { + case PIPE_BUFFER: + return FALSE; + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_3D: + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return TRUE; + default: + assert(0); + return FALSE; + } +} + + +static INLINE void * +swr_resource_data(struct pipe_resource *resource) +{ + struct swr_resource *swr_r = swr_resource(resource); + + assert(!swr_resource_is_texture(resource)); + + return swr_r->swr.pBaseAddress; +} + + +void swr_store_render_target(struct swr_context *ctx, + uint32_t attachment, + enum SWR_TILE_STATE post_tile_state, + struct SWR_SURFACE_STATE *surface = nullptr); +#endif diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp new file mode 100644 index 0000000..e6c448c --- /dev/null +++ b/src/gallium/drivers/swr/swr_scratch.cpp @@ -0,0 +1,116 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "util/u_memory.h" +#include "swr_context.h" +#include "swr_scratch.h" +#include "api.h" + + +void * +swr_copy_to_scratch_space(struct swr_context *ctx, + struct swr_scratch_space *space, + const void *user_buffer, + unsigned int size) +{ + void *ptr; + assert(space); + assert(user_buffer); + assert(size); + + if (size >= 2048) { /* XXX TODO create KNOB_ for this */ + /* Use per draw SwrAllocDrawContextMemory for larger copies */ + ptr = SwrAllocDrawContextMemory(ctx->swrContext, size, 4); + } else { + /* Allocate enough so that MAX_DRAWS_IN_FLIGHT sets fit. */ + unsigned int max_size_in_flight = size * KNOB_MAX_DRAWS_IN_FLIGHT; + + /* Need to grow space */ + if (max_size_in_flight > space->current_size) { + /* Must idle the pipeline, this is infrequent */ + SwrWaitForIdle(ctx->swrContext); + + space->current_size = max_size_in_flight; + + if (space->base) { + align_free(space->base); + space->base = NULL; + } + + if (!space->base) { + space->base = (BYTE *)align_malloc(space->current_size, 4); + space->head = (void *)space->base; + } + } + + /* Wrap */ + if (((BYTE *)space->head + size) + >= ((BYTE *)space->base + space->current_size)) { + /* + * TODO XXX: Should add a fence on wrap. Assumption is that + * current_space >> size, and there are at least MAX_DRAWS_IN_FLIGHT + * draws in scratch. So fence would always be met on wrap. A fence + * would ensure that first frame in buffer is done before wrapping. + * If fence ever needs to be waited on, can increase buffer size. + * So far in testing, this hasn't been necessary. + */ + space->head = space->base; + } + + ptr = space->head; + space->head = (BYTE *)space->head + size; + } + + /* Copy user_buffer to scratch */ + memcpy(ptr, user_buffer, size); + + return ptr; +} + + +void +swr_init_scratch_buffers(struct swr_context *ctx) +{ + struct swr_scratch_buffers *scratch; + + scratch = CALLOC_STRUCT(swr_scratch_buffers); + ctx->scratch = scratch; +} + +void +swr_destroy_scratch_buffers(struct swr_context *ctx) +{ + struct swr_scratch_buffers *scratch = ctx->scratch; + + if (scratch) { + if (scratch->vs_constants.base) + align_free(scratch->vs_constants.base); + if (scratch->fs_constants.base) + align_free(scratch->fs_constants.base); + if (scratch->vertex_buffer.base) + align_free(scratch->vertex_buffer.base); + if (scratch->index_buffer.base) + align_free(scratch->index_buffer.base); + FREE(scratch); + } +} diff --git a/src/gallium/drivers/swr/swr_scratch.h b/src/gallium/drivers/swr/swr_scratch.h new file mode 100644 index 0000000..74218d6 --- /dev/null +++ b/src/gallium/drivers/swr/swr_scratch.h @@ -0,0 +1,63 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_SCRATCH_H +#define SWR_SCRATCH_H + +struct swr_scratch_space { + void *head; + unsigned int current_size; + /* TODO XXX: Add a fence for wrap condition. */ + + void *base; +}; + +struct swr_scratch_buffers { + struct swr_scratch_space vs_constants; + struct swr_scratch_space fs_constants; + struct swr_scratch_space vertex_buffer; + struct swr_scratch_space index_buffer; +}; + + +/* + * swr_copy_to_scratch_space + * Copies size bytes of user_buffer into the scratch ring buffer. + * Used to store temporary data such as client arrays and constants. + * + * Inputs: + * space ptr to scratch pool (vs_constants, fs_constants) + * user_buffer, data to copy into scratch space + * size to be copied + * Returns: + * pointer to data copied to scratch space. + */ +void *swr_copy_to_scratch_space(struct swr_context *ctx, + struct swr_scratch_space *space, + const void *user_buffer, + unsigned int size); + +void swr_init_scratch_buffers(struct swr_context *ctx); +void swr_destroy_scratch_buffers(struct swr_context *ctx); + +#endif diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp new file mode 100644 index 0000000..66eb58b --- /dev/null +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -0,0 +1,666 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_format.h" +#include "util/u_inlines.h" +#include "util/u_cpu_detect.h" + +#include "state_tracker/sw_winsys.h" + +extern "C" { +#include "gallivm/lp_bld_limits.h" +} + +#include "swr_public.h" +#include "swr_screen.h" +#include "swr_context.h" +#include "swr_resource.h" +#include "swr_fence.h" +#include "gen_knobs.h" + +#include "jit_api.h" + +#include + +static const char * +swr_get_name(struct pipe_screen *screen) +{ + return "SWR"; +} + +static const char * +swr_get_vendor(struct pipe_screen *screen) +{ + return "Intel Corporation"; +} + +static boolean +swr_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned bind) +{ + struct sw_winsys *winsys = swr_screen(screen)->winsys; + const struct util_format_description *format_desc; + + assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D + || target == PIPE_TEXTURE_1D_ARRAY + || target == PIPE_TEXTURE_2D + || target == PIPE_TEXTURE_2D_ARRAY + || target == PIPE_TEXTURE_RECT + || target == PIPE_TEXTURE_3D + || target == PIPE_TEXTURE_CUBE + || target == PIPE_TEXTURE_CUBE_ARRAY); + + format_desc = util_format_description(format); + if (!format_desc) + return FALSE; + + if (sample_count > 1) + return FALSE; + + if (bind + & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) { + if (!winsys->is_displaytarget_format_supported(winsys, bind, format)) + return FALSE; + } + + if (bind & PIPE_BIND_RENDER_TARGET) { + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + + if (mesa_to_swr_format(format) == (SWR_FORMAT)-1) + return FALSE; + + /* + * Although possible, it is unnatural to render into compressed or YUV + * surfaces. So disable these here to avoid going into weird paths + * inside the state trackers. + */ + if (format_desc->block.width != 1 || format_desc->block.height != 1) + return FALSE; + } + + /* We're going to lie and say we support all depth/stencil formats. + * SWR actually needs separate bindings, and only does F32 depth. + */ + if (bind & PIPE_BIND_DEPTH_STENCIL) { + if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) + return FALSE; + } + + return TRUE; +} + +static int +swr_get_param(struct pipe_screen *screen, enum pipe_cap param) +{ + switch (param) { + case PIPE_CAP_NPOT_TEXTURES: + case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_SM3: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return PIPE_MAX_COLOR_BUFS; + case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_QUERY_TIME_ELAPSED: + case PIPE_CAP_QUERY_PIPELINE_STATISTICS: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_TEXTURE_SWIZZLE: + return 1; + case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; // xxx This increases rendertarget max size to 4k x 4k. No + // way to separate widht/height. + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 12; // xxx + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; // xxx + case PIPE_CAP_BLEND_EQUATION_SEPARATE: + return 1; + case PIPE_CAP_INDEP_BLEND_ENABLE: + return 1; + case PIPE_CAP_INDEP_BLEND_FUNC: + return 1; + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: + return 0; // Don't support lower left frag coord. + case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: + case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: + return 1; + case PIPE_CAP_DEPTH_CLIP_DISABLE: + return 1; + case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: + return MAX_SO_STREAMS; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return MAX_ATTRIBUTES; + case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES: + case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS: + return 1024; + case PIPE_CAP_MAX_VERTEX_STREAMS: + return 1; + case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE: + return 2048; + case PIPE_CAP_PRIMITIVE_RESTART: + return 1; + case PIPE_CAP_SHADER_STENCIL_EXPORT: + return 1; + case PIPE_CAP_TGSI_INSTANCEID: + case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR: + case PIPE_CAP_START_INSTANCE: + return 1; + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + return 1; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 256; /* for GL3 */ + case PIPE_CAP_MIN_TEXEL_OFFSET: + return -8; + case PIPE_CAP_MAX_TEXEL_OFFSET: + return 7; + case PIPE_CAP_CONDITIONAL_RENDER: + return 1; + case PIPE_CAP_TEXTURE_BARRIER: + return 0; + case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: + case PIPE_CAP_VERTEX_COLOR_UNCLAMPED: /* draw module */ + case PIPE_CAP_VERTEX_COLOR_CLAMPED: /* draw module */ + return 1; + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: + return 0; + case PIPE_CAP_GLSL_FEATURE_LEVEL: + return 330; + case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION: + return 0; + case PIPE_CAP_COMPUTE: + return 0; + case PIPE_CAP_USER_VERTEX_BUFFERS: + case PIPE_CAP_USER_INDEX_BUFFERS: + case PIPE_CAP_USER_CONSTANT_BUFFERS: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: + return 1; + case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: + return 16; + case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_TEXTURE_MULTISAMPLE: + return 0; + case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: + return 64; + case PIPE_CAP_QUERY_TIMESTAMP: + return 1; + case PIPE_CAP_CUBE_MAP_ARRAY: + return 0; + case PIPE_CAP_TEXTURE_BUFFER_OBJECTS: + return 1; + case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE: + return 65536; + case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: + return 0; + case PIPE_CAP_TGSI_TEXCOORD: + case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: + return 0; + case PIPE_CAP_MAX_VIEWPORTS: + return 1; + case PIPE_CAP_ENDIANNESS: + return PIPE_ENDIAN_NATIVE; + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: + case PIPE_CAP_TEXTURE_GATHER_SM5: + return 0; + case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: + return 1; + case PIPE_CAP_TEXTURE_QUERY_LOD: + case PIPE_CAP_SAMPLE_SHADING: + case PIPE_CAP_TEXTURE_GATHER_OFFSETS: + case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION: + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE: + case PIPE_CAP_SAMPLER_VIEW_TARGET: + return 0; + case PIPE_CAP_FAKE_SW_MSAA: + return 1; + case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: + case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET: + return 0; + case PIPE_CAP_DRAW_INDIRECT: + return 1; + + case PIPE_CAP_VENDOR_ID: + return 0xFFFFFFFF; + case PIPE_CAP_DEVICE_ID: + return 0xFFFFFFFF; + case PIPE_CAP_ACCELERATED: + return 0; + case PIPE_CAP_VIDEO_MEMORY: { + /* XXX: Do we want to return the full amount of system memory ? */ + uint64_t system_memory; + + if (!os_get_total_physical_memory(&system_memory)) + return 0; + + return (int)(system_memory >> 20); + } + case PIPE_CAP_UMA: + return 1; + case PIPE_CAP_CONDITIONAL_RENDER_INVERTED: + return 1; + case PIPE_CAP_CLIP_HALFZ: + return 1; + case PIPE_CAP_VERTEXID_NOBASE: + return 0; + case PIPE_CAP_POLYGON_OFFSET_CLAMP: + return 1; + case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: + return 0; + case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: + return 0; // xxx + case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: + return 0; + case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: + return 0; + case PIPE_CAP_DEPTH_BOUNDS_TEST: + return 0; // xxx + case PIPE_CAP_TEXTURE_FLOAT_LINEAR: + case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR: + return 1; + } + + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAP %d query\n", param); + return 0; +} + +static int +swr_get_shader_param(struct pipe_screen *screen, + unsigned shader, + enum pipe_shader_cap param) +{ + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT) + return gallivm_get_shader_param(param); + + // Todo: geometry, tesselation, compute + return 0; +} + + +static float +swr_get_paramf(struct pipe_screen *screen, enum pipe_capf param) +{ + switch (param) { + case PIPE_CAPF_MAX_LINE_WIDTH: + case PIPE_CAPF_MAX_LINE_WIDTH_AA: + case PIPE_CAPF_MAX_POINT_WIDTH: + return 255.0; /* arbitrary */ + case PIPE_CAPF_MAX_POINT_WIDTH_AA: + return 0.0; + case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: + return 0.0; + case PIPE_CAPF_GUARD_BAND_LEFT: + case PIPE_CAPF_GUARD_BAND_TOP: + case PIPE_CAPF_GUARD_BAND_RIGHT: + case PIPE_CAPF_GUARD_BAND_BOTTOM: + return 0.0; + } + /* should only get here on unhandled cases */ + debug_printf("Unexpected PIPE_CAPF %d query\n", param); + return 0.0; +} + +SWR_FORMAT +mesa_to_swr_format(enum pipe_format format) +{ + const struct util_format_description *format_desc = + util_format_description(format); + if (!format_desc) + return (SWR_FORMAT)-1; + + // more robust check would be comparing all attributes of the formats + // luckily format names are mostly standardized + for (int i = 0; i < NUM_SWR_FORMATS; i++) { + const SWR_FORMAT_INFO &swr_desc = GetFormatInfo((SWR_FORMAT)i); + + if (!strcasecmp(format_desc->short_name, swr_desc.name)) + return (SWR_FORMAT)i; + } + + // ... with some exceptions + switch (format) { + case PIPE_FORMAT_R8G8B8A8_SRGB: + return R8G8B8A8_UNORM_SRGB; + case PIPE_FORMAT_B8G8R8A8_SRGB: + return B8G8R8A8_UNORM_SRGB; + case PIPE_FORMAT_I8_UNORM: + return R8_UNORM; + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return R24_UNORM_X8_TYPELESS; + case PIPE_FORMAT_L8A8_UNORM: + return R8G8_UNORM; + default: + break; + } + + debug_printf("asked to convert unsupported format %s\n", + format_desc->name); + return (SWR_FORMAT)-1; +} + +static boolean +swr_displaytarget_layout(struct swr_screen *screen, struct swr_resource *res) +{ + struct sw_winsys *winsys = screen->winsys; + + UINT stride; + res->display_target = winsys->displaytarget_create(winsys, + res->base.bind, + res->base.format, + res->alignedWidth, + res->alignedHeight, + 64, + &stride); + + if (res->display_target == NULL) + return FALSE; + + /* Clear the display target surface */ + void *map = winsys->displaytarget_map( + winsys, res->display_target, PIPE_TRANSFER_WRITE); + + if (map) + memset(map, 0, res->alignedHeight * stride); + + winsys->displaytarget_unmap(winsys, res->display_target); + + return TRUE; +} + +static struct pipe_resource * +swr_resource_create(struct pipe_screen *_screen, + const struct pipe_resource *templat) +{ + struct swr_screen *screen = swr_screen(_screen); + struct swr_resource *res = CALLOC_STRUCT(swr_resource); + if (!res) + return NULL; + + res->base = *templat; + pipe_reference_init(&res->base.reference, 1); + res->base.screen = &screen->base; + + const struct util_format_description *desc = + util_format_description(templat->format); + res->has_depth = util_format_has_depth(desc); + res->has_stencil = util_format_has_stencil(desc); + + pipe_format fmt = templat->format; + if (res->has_depth) + fmt = PIPE_FORMAT_Z24_UNORM_S8_UINT; + if (res->has_stencil && !res->has_depth) + fmt = PIPE_FORMAT_R8_UINT; + + res->swr.width = templat->width0; + res->swr.height = templat->height0; + res->swr.depth = templat->depth0; + res->swr.type = SURFACE_2D; + res->swr.tileMode = SWR_TILE_NONE; + res->swr.format = mesa_to_swr_format(fmt); + res->swr.numSamples = (1 << templat->nr_samples); + + SWR_FORMAT_INFO finfo = GetFormatInfo(res->swr.format); + + unsigned total_size = 0; + unsigned width = templat->width0; + unsigned height = templat->height0; + unsigned depth = templat->depth0; + unsigned layers = templat->array_size; + + for (int level = 0; level <= templat->last_level; level++) { + unsigned alignedWidth, alignedHeight; + unsigned num_slices; + + if (templat->bind & (PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_RENDER_TARGET + | PIPE_BIND_DISPLAY_TARGET)) { + alignedWidth = (width + (KNOB_MACROTILE_X_DIM - 1)) + & ~(KNOB_MACROTILE_X_DIM - 1); + alignedHeight = (height + (KNOB_MACROTILE_Y_DIM - 1)) + & ~(KNOB_MACROTILE_Y_DIM - 1); + } else { + alignedWidth = width; + alignedHeight = height; + } + + if (level == 0) { + res->alignedWidth = alignedWidth; + res->alignedHeight = alignedHeight; + } + + res->row_stride[level] = alignedWidth * finfo.Bpp; + res->img_stride[level] = res->row_stride[level] * alignedHeight; + res->mip_offsets[level] = total_size; + + if (templat->target == PIPE_TEXTURE_3D) + num_slices = depth; + else if (templat->target == PIPE_TEXTURE_1D_ARRAY + || templat->target == PIPE_TEXTURE_2D_ARRAY + || templat->target == PIPE_TEXTURE_CUBE + || templat->target == PIPE_TEXTURE_CUBE_ARRAY) + num_slices = layers; + else + num_slices = 1; + + total_size += res->img_stride[level] * num_slices; + + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); + } + + res->swr.halign = res->alignedWidth; + res->swr.valign = res->alignedHeight; + res->swr.pitch = res->row_stride[0]; + res->swr.pBaseAddress = (BYTE *)_aligned_malloc(total_size, 64); + + if (res->has_depth && res->has_stencil) { + res->secondary.width = templat->width0; + res->secondary.height = templat->height0; + res->secondary.depth = templat->depth0; + res->secondary.type = SURFACE_2D; + res->secondary.tileMode = SWR_TILE_NONE; + res->secondary.format = R8_UINT; + res->secondary.numSamples = (1 << templat->nr_samples); + + SWR_FORMAT_INFO finfo = GetFormatInfo(res->secondary.format); + res->secondary.pitch = res->alignedWidth * finfo.Bpp; + res->secondary.pBaseAddress = (BYTE *)_aligned_malloc( + res->alignedHeight * res->secondary.pitch, 64); + } + + if (swr_resource_is_texture(&res->base)) { + if (res->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT + | PIPE_BIND_SHARED)) { + /* displayable surface */ + if (!swr_displaytarget_layout(screen, res)) + goto fail; + } + } + + return &res->base; + +fail: + FREE(res); + return NULL; +} + +static void +swr_resource_destroy(struct pipe_screen *p_screen, struct pipe_resource *pt) +{ + struct swr_screen *screen = swr_screen(p_screen); + struct swr_resource *res = swr_resource(pt); + + /* + * If this resource is attached to a context it may still be in use, check + * dependencies before freeing + * XXX TODO: don't use SwrWaitForIdle, use fences and come up with a real + * resource manager. + * XXX It's happened that we get a swr_destroy prior to freeing the + * framebuffer resource. Don't wait on it. + */ + if (res->bound_to_context && !res->display_target) { + struct swr_context *ctx = + swr_context((pipe_context *)res->bound_to_context); + SwrWaitForIdle( + ctx->swrContext); // BMCDEBUG, don't SwrWaitForIdle!!! Use a fence. + } + + if (res->display_target) { + /* display target */ + struct sw_winsys *winsys = screen->winsys; + winsys->displaytarget_destroy(winsys, res->display_target); + } + + _aligned_free(res->swr.pBaseAddress); + _aligned_free(res->secondary.pBaseAddress); + + FREE(res); +} + + +static void +swr_flush_frontbuffer(struct pipe_screen *p_screen, + struct pipe_resource *resource, + unsigned level, + unsigned layer, + void *context_private, + struct pipe_box *sub_box) +{ + SWR_SURFACE_STATE &colorBuffer = swr_resource(resource)->swr; + + struct swr_screen *screen = swr_screen(p_screen); + struct sw_winsys *winsys = screen->winsys; + struct swr_resource *res = swr_resource(resource); + + /* Ensure fence set at flush is finished, before reading frame buffer */ + swr_fence_finish(p_screen, screen->flush_fence, 0); + + void *map = winsys->displaytarget_map( + winsys, res->display_target, PIPE_TRANSFER_WRITE); + memcpy( + map, colorBuffer.pBaseAddress, colorBuffer.pitch * colorBuffer.height); + winsys->displaytarget_unmap(winsys, res->display_target); + + assert(res->display_target); + if (res->display_target) + winsys->displaytarget_display( + winsys, res->display_target, context_private, sub_box); +} + + +static void +swr_destroy_screen(struct pipe_screen *p_screen) +{ + struct swr_screen *screen = swr_screen(p_screen); + struct sw_winsys *winsys = screen->winsys; + + fprintf(stderr, "SWR destroy screen!\n"); + + swr_fence_finish(p_screen, screen->flush_fence, 0); + swr_fence_reference(p_screen, &screen->flush_fence, NULL); + + JitDestroyContext(screen->hJitMgr); + + if (winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + +struct pipe_screen * +swr_create_screen(struct sw_winsys *winsys) +{ + struct swr_screen *screen = CALLOC_STRUCT(swr_screen); + + if (!screen) + return NULL; + + fprintf(stderr, "SWR create screen!\n"); + util_cpu_detect(); + if (util_cpu_caps.has_avx2) + fprintf(stderr, "This processor supports AVX2.\n"); + else if (util_cpu_caps.has_avx) + fprintf(stderr, "This processor supports AVX.\n"); + /* Exit gracefully if there is no AVX support */ + else { + fprintf(stderr, " !!! This processor does not support AVX or AVX2. " + "OpenSWR requires AVX.\n"); + exit(-1); + } + + if (!getenv("KNOB_MAX_PRIMS_PER_DRAW")) { + g_GlobalKnobs.MAX_PRIMS_PER_DRAW.Value(49152); + } + + screen->winsys = winsys; + screen->base.get_name = swr_get_name; + screen->base.get_vendor = swr_get_vendor; + screen->base.is_format_supported = swr_is_format_supported; + screen->base.context_create = swr_create_context; + + screen->base.destroy = swr_destroy_screen; + screen->base.get_param = swr_get_param; + screen->base.get_shader_param = swr_get_shader_param; + screen->base.get_paramf = swr_get_paramf; + + screen->base.resource_create = swr_resource_create; + screen->base.resource_destroy = swr_resource_destroy; + + screen->base.flush_frontbuffer = swr_flush_frontbuffer; + + screen->hJitMgr = JitCreateContext(KNOB_SIMD_WIDTH, KNOB_ARCH_STR); + + swr_fence_init(&screen->base); + + return &screen->base; +} diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h new file mode 100644 index 0000000..a96dc44 --- /dev/null +++ b/src/gallium/drivers/swr/swr_screen.h @@ -0,0 +1,52 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_SCREEN_H +#define SWR_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" +#include "api.h" + +struct sw_winsys; + +struct swr_screen { + struct pipe_screen base; + + struct pipe_fence_handle *flush_fence; + + struct sw_winsys *winsys; + + HANDLE hJitMgr; +}; + +static INLINE struct swr_screen * +swr_screen(struct pipe_screen *pipe) +{ + return (struct swr_screen *)pipe; +} + +SWR_FORMAT +mesa_to_swr_format(enum pipe_format format); + +#endif diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp new file mode 100644 index 0000000..edad4c2 --- /dev/null +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -0,0 +1,608 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "JitManager.h" +#include "state.h" +#include "state_llvm.h" +#include "builder.h" + +#include "llvm-c/Core.h" +#include "llvm/Support/CBindingWrapping.h" + +#include "tgsi/tgsi_strings.h" +#include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_flow.h" +#include "gallivm/lp_bld_struct.h" +#include "gallivm/lp_bld_tgsi.h" + +#include "swr_context.h" +#include "swr_context_llvm.h" +#include "swr_state.h" +#include "swr_screen.h" + +bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs) +{ + return !memcmp(&lhs, &rhs, sizeof(lhs)); +} + +void +swr_generate_fs_key(struct swr_jit_key &key, + struct swr_context *ctx, + swr_fragment_shader *swr_fs) +{ + key.nr_cbufs = ctx->framebuffer.nr_cbufs; + key.light_twoside = ctx->rasterizer->light_twoside; + memcpy(&key.vs_output_semantic_name, + &ctx->vs->info.base.output_semantic_name, + sizeof(key.vs_output_semantic_name)); + memcpy(&key.vs_output_semantic_idx, + &ctx->vs->info.base.output_semantic_index, + sizeof(key.vs_output_semantic_idx)); + + key.nr_samplers = swr_fs->info.base.file_max[TGSI_FILE_SAMPLER] + 1; + + for (unsigned i = 0; i < key.nr_samplers; i++) { + if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_sampler_state( + &key.sampler[i].sampler_state, + ctx->samplers[PIPE_SHADER_FRAGMENT][i]); + } + } + + /* + * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes + * are dx10-style? Can't really have mixed opcodes, at least not + * if we want to skip the holes here (without rescanning tgsi). + */ + if (swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + key.nr_sampler_views = + swr_fs->info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + for (unsigned i = 0; i < key.nr_sampler_views; i++) { + if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { + lp_sampler_static_texture_state( + &key.sampler[i].texture_state, + ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]); + } + } + } else { + key.nr_sampler_views = key.nr_samplers; + for (unsigned i = 0; i < key.nr_sampler_views; i++) { + if (swr_fs->info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { + lp_sampler_static_texture_state( + &key.sampler[i].texture_state, + ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]); + } + } + } + + memcpy(&key.alphaTest, + &ctx->depth_stencil->alpha, + sizeof(struct pipe_alpha_state)); +} + +struct BuilderSWR : public Builder { + BuilderSWR(JitManager *pJitMgr) + : Builder(pJitMgr) + { + pJitMgr->SetupNewModule(); + } + + PFN_VERTEX_FUNC + CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs); + PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_key &key); +}; + +PFN_VERTEX_FUNC +BuilderSWR::CompileVS(struct pipe_context *ctx, swr_vertex_shader *swr_vs) +{ + swr_vs->linkageMask = 0; + + for (unsigned i = 0; i < swr_vs->info.base.num_outputs; i++) { + switch (swr_vs->info.base.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_PSIZE: + swr_vs->pointSizeAttrib = i; + break; + default: + swr_vs->linkageMask |= (1 << i); + break; + } + } + + // tgsi_dump(swr_vs->pipe.tokens, 0); + + struct gallivm_state *gallivm = + gallivm_create("VS", wrap(&JM()->mContext)); + gallivm->module = wrap(JM()->mpCurrentModule); + + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + + memset(outputs, 0, sizeof(outputs)); + + AttrBuilder attrBuilder; + attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); + AttributeSet attrSet = AttributeSet::get( + JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); + + std::vector vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), + PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)}; + FunctionType *vsFuncType = + FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false); + + // create new vertex shader function + auto pFunction = Function::Create(vsFuncType, + GlobalValue::ExternalLinkage, + "VS", + JM()->mpCurrentModule); + pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); + + BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); + IRB()->SetInsertPoint(block); + LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); + + auto argitr = pFunction->getArgumentList().begin(); + Value *hPrivateData = argitr++; + hPrivateData->setName("hPrivateData"); + Value *pVsCtx = argitr++; + pVsCtx->setName("vsCtx"); + + Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantVS}); + consts_ptr->setName("vs_constants"); + Value *const_sizes_ptr = + GEP(hPrivateData, {0, swr_draw_context_num_constantsVS}); + const_sizes_ptr->setName("num_vs_constants"); + + Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin}); + + for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { + const unsigned mask = swr_vs->info.base.input_usage_mask[attrib]; + for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { + if (mask & (1 << channel)) { + inputs[attrib][channel] = + wrap(LOAD(vtxInput, {0, 0, attrib, channel})); + } + } + } + + struct lp_bld_tgsi_system_values system_values; + memset(&system_values, 0, sizeof(system_values)); + system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID})); + system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID})); + + lp_build_tgsi_soa(gallivm, + swr_vs->pipe.tokens, + lp_type_float_vec(32, 32 * 8), + NULL, // mask + wrap(consts_ptr), + wrap(const_sizes_ptr), + &system_values, + inputs, + outputs, + NULL, // wrap(hPrivateData), (sampler context) + NULL, // sampler + &swr_vs->info.base, + NULL); // geometry shader face + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout}); + + for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { + for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { + if (!outputs[attrib][channel]) + continue; + + Value *val = LOAD(unwrap(outputs[attrib][channel])); + STORE(val, vtxOutput, {0, 0, attrib, channel}); + } + } + + RET_VOID(); + + gallivm_verify_function(gallivm, wrap(pFunction)); + gallivm_compile_module(gallivm); + + // lp_debug_dump_value(func); + + PFN_VERTEX_FUNC pFunc = + (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); + + debug_printf("vert shader %p\n", pFunc); + assert(pFunc && "Error: VertShader = NULL"); + +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5) + JM()->mIsModuleFinalized = true; +#endif + + return pFunc; +} + +PFN_VERTEX_FUNC +swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs) +{ + BuilderSWR builder( + reinterpret_cast(swr_screen(ctx->screen)->hJitMgr)); + return builder.CompileVS(ctx, swr_vs); +} + +static unsigned +locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) +{ + for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { + if ((info->output_semantic_name[i] == name) + && (info->output_semantic_index[i] == index)) { + return i - 1; // position is not part of the linkage + } + } + + if (name == TGSI_SEMANTIC_COLOR) { // BCOLOR fallback + for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { + if ((info->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) + && (info->output_semantic_index[i] == index)) { + return i - 1; // position is not part of the linkage + } + } + } + + return 0xFFFFFFFF; +} + +PFN_PIXEL_KERNEL +BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_key &key) +{ + struct swr_fragment_shader *swr_fs = ctx->fs; + + // tgsi_dump(swr_fs->pipe.tokens, 0); + + struct gallivm_state *gallivm = + gallivm_create("FS", wrap(&JM()->mContext)); + gallivm->module = wrap(JM()->mpCurrentModule); + + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + + memset(inputs, 0, sizeof(inputs)); + memset(outputs, 0, sizeof(outputs)); + + struct lp_build_sampler_soa *sampler = NULL; + + AttrBuilder attrBuilder; + attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); + AttributeSet attrSet = AttributeSet::get( + JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); + + std::vector fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), + PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)}; + FunctionType *funcType = + FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false); + + auto pFunction = Function::Create(funcType, + GlobalValue::ExternalLinkage, + "FS", + JM()->mpCurrentModule); + pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); + + BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); + IRB()->SetInsertPoint(block); + LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); + + auto &args = pFunction->getArgumentList(); + Value *hPrivateData = args.begin(); + hPrivateData->setName("hPrivateData"); + Value *pPS = ++args.begin(); + pPS->setName("psCtx"); + + Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS}); + consts_ptr->setName("fs_constants"); + Value *const_sizes_ptr = + GEP(hPrivateData, {0, swr_draw_context_num_constantsFS}); + const_sizes_ptr->setName("num_fs_constants"); + + // xxx should check for flat shading versus interpolation + + // load i + Value *vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI}, "i"); + + // load j + Value *vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ}, "j"); + + // load/compute w + Value *vw = FDIV(VIMMED1(1.0f), LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW})); + vw->setName("w"); + + // load *pAttribs, *pPerspAttribs + Value *pAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pAttribs"); + Value *pPerspAttribs = + LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs"); + + swr_fs->constantMask = 0; + + for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { + const unsigned mask = swr_fs->info.base.input_usage_mask[attrib]; + const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib]; + + if (!mask) + continue; + + ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib]; + ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib]; + + if (semantic_name == TGSI_SEMANTIC_FACE) { + Value *ff = + UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty); + ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f)); + ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace"); + + inputs[attrib][0] = wrap(ff); + inputs[attrib][1] = wrap(VIMMED1(0.0f)); + inputs[attrib][2] = wrap(VIMMED1(0.0f)); + inputs[attrib][3] = wrap(VIMMED1(1.0f)); + continue; + } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord + inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX}, "vX")); + inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY}, "vY")); + inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ")); + inputs[attrib][3] = + wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW}, "vOneOverW")); + continue; + } else if (semantic_name == TGSI_SEMANTIC_PRIMID) { + Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID"); + inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID)); + inputs[attrib][1] = wrap(VIMMED1(0)); + inputs[attrib][2] = wrap(VIMMED1(0)); + inputs[attrib][3] = wrap(VIMMED1(0)); + continue; + } + + unsigned linkedAttrib = + locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); + if (linkedAttrib == 0xFFFFFFFF) { + // not found - check for point sprite + if (ctx->rasterizer->sprite_coord_enable) { + linkedAttrib = ctx->vs->info.base.num_outputs - 1; + } else { + fprintf(stderr, + "Missing %s[%d]\n", + tgsi_semantic_names[semantic_name], + semantic_idx); + assert(0 && "attribute linkage not found"); + } + } + + if (interpMode == TGSI_INTERPOLATE_CONSTANT) { + swr_fs->constantMask |= 1 << linkedAttrib; + } + + for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { + if (mask & (1 << channel)) { + Value *indexA = C(linkedAttrib * 12 + channel); + Value *indexB = C(linkedAttrib * 12 + channel + 4); + Value *indexC = C(linkedAttrib * 12 + channel + 8); + + if ((semantic_name == TGSI_SEMANTIC_COLOR) + && ctx->rasterizer->light_twoside) { + unsigned bcolorAttrib = locate_linkage( + TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base); + + unsigned diff = 12 * (bcolorAttrib - linkedAttrib); + + Value *back = + XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace"); + + Value *offset = MUL(back, C(diff)); + offset->setName("offset"); + + indexA = ADD(indexA, offset); + indexB = ADD(indexB, offset); + indexC = ADD(indexC, offset); + + if (interpMode == TGSI_INTERPOLATE_CONSTANT) { + swr_fs->constantMask |= 1 << bcolorAttrib; + } + } + + Value *pAttribPtr = (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) + ? pPerspAttribs + : pAttribs; + + Value *va = + VECTOR_SPLAT(JM()->mVWidth, LOAD(GEP(pAttribPtr, indexA))); + Value *vb = + VECTOR_SPLAT(JM()->mVWidth, LOAD(GEP(pAttribPtr, indexB))); + Value *vc = + VECTOR_SPLAT(JM()->mVWidth, LOAD(GEP(pAttribPtr, indexC))); + + if (interpMode == TGSI_INTERPOLATE_CONSTANT) { + inputs[attrib][channel] = wrap(va); + } else { + Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj); + + vc = FMUL(vk, vc); + + Value *interp = FMUL(va, vi); + Value *interp1 = FMUL(vb, vj); + interp = FADD(interp, interp1); + interp = FADD(interp, vc); + if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE) + interp = FMUL(interp, vw); + inputs[attrib][channel] = wrap(interp); + } + } + } + } + + sampler = swr_sampler_soa_create(key.sampler); + + struct lp_bld_tgsi_system_values system_values; + memset(&system_values, 0, sizeof(system_values)); + + struct lp_build_mask_context mask; + + if (swr_fs->info.base.uses_kill || key.alphaTest.enabled) { + Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_mask}, "coverage_mask"); + lp_build_mask_begin( + &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val)); + } + + lp_build_tgsi_soa(gallivm, + swr_fs->pipe.tokens, + lp_type_float_vec(32, 32 * 8), + swr_fs->info.base.uses_kill ? &mask : NULL, // mask + wrap(consts_ptr), + wrap(const_sizes_ptr), + &system_values, + inputs, + outputs, + wrap(hPrivateData), + sampler, // sampler + &swr_fs->info.base, + NULL); // geometry shader face + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs; + attrib++) { + switch (swr_fs->info.base.output_semantic_name[attrib]) { + case TGSI_SEMANTIC_POSITION: { + // write z + LLVMValueRef outZ = + LLVMBuildLoad(gallivm->builder, outputs[attrib][2], ""); + STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ}); + break; + } + case TGSI_SEMANTIC_COLOR: { + for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { + if (!outputs[attrib][channel]) + continue; + + LLVMValueRef out = + LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], ""); + if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { + for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) { + STORE(unwrap(out), + pPS, + {0, SWR_PS_CONTEXT_shaded, rt, channel}); + } + } else { + STORE(unwrap(out), + pPS, + {0, + SWR_PS_CONTEXT_shaded, + swr_fs->info.base.output_semantic_index[attrib], + channel}); + } + } + break; + } + default: { + fprintf(stderr, + "unknown output from FS %s[%d]\n", + tgsi_semantic_names[swr_fs->info.base + .output_semantic_name[attrib]], + swr_fs->info.base.output_semantic_index[attrib]); + break; + } + } + } + + LLVMValueRef mask_result = 0; + if (swr_fs->info.base.uses_kill || key.alphaTest.enabled) { + mask_result = lp_build_mask_end(&mask); + } + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + if (key.alphaTest.enabled) { + unsigned linkage = + locate_linkage(TGSI_SEMANTIC_COLOR, 0, &ctx->fs->info.base) + 1; + + Value *alpha = LOAD( + pPS, {0, SWR_PS_CONTEXT_shaded, linkage, 3 /* alpha */}, "alpha"); + Value *ref = VIMMED1(key.alphaTest.ref_value); + + CmpInst::Predicate cmp = CmpInst::Predicate::FCMP_FALSE; + switch (key.alphaTest.func) { + case PIPE_FUNC_NEVER: + cmp = CmpInst::Predicate::FCMP_FALSE; + break; + case PIPE_FUNC_LESS: + cmp = CmpInst::Predicate::FCMP_OLT; + break; + case PIPE_FUNC_EQUAL: + cmp = CmpInst::Predicate::FCMP_OEQ; + break; + case PIPE_FUNC_LEQUAL: + cmp = CmpInst::Predicate::FCMP_OLE; + break; + case PIPE_FUNC_GREATER: + cmp = CmpInst::Predicate::FCMP_OGT; + break; + case PIPE_FUNC_NOTEQUAL: + cmp = CmpInst::Predicate::FCMP_ONE; + break; + case PIPE_FUNC_GEQUAL: + cmp = CmpInst::Predicate::FCMP_OGE; + break; + case PIPE_FUNC_ALWAYS: + cmp = CmpInst::Predicate::FCMP_TRUE; + break; + } + + Value *alpha_result = + IRB()->CreateFCmp(cmp, alpha, ref, "alphaTestFunc"); + + mask_result = + wrap(AND(unwrap(mask_result), S_EXT(alpha_result, mSimdInt32Ty))); + } + + if (swr_fs->info.base.uses_kill || key.alphaTest.enabled) { + STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_mask}); + } + + RET_VOID(); + + gallivm_verify_function(gallivm, wrap(pFunction)); + + gallivm_compile_module(gallivm); + + PFN_PIXEL_KERNEL kernel = + (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction)); + debug_printf("frag shader %p\n", kernel); + assert(kernel && "Error: FragShader = NULL"); + +#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR >= 5) + JM()->mIsModuleFinalized = true; +#endif + + return kernel; +} + +PFN_PIXEL_KERNEL +swr_compile_fs(struct swr_context *ctx, swr_jit_key &key) +{ + BuilderSWR builder( + reinterpret_cast(swr_screen(ctx->pipe.screen)->hJitMgr)); + return builder.CompileFS(ctx, key); +} diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h new file mode 100644 index 0000000..2962646 --- /dev/null +++ b/src/gallium/drivers/swr/swr_shader.h @@ -0,0 +1,61 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#pragma once + +class swr_vertex_shader; +class swr_fragment_shader; +class swr_jit_key; + +PFN_VERTEX_FUNC +swr_compile_vs(struct pipe_context *ctx, swr_vertex_shader *swr_vs); + +PFN_PIXEL_KERNEL +swr_compile_fs(struct swr_context *ctx, swr_jit_key &key); + +void swr_generate_fs_key(struct swr_jit_key &key, + struct swr_context *ctx, + swr_fragment_shader *swr_fs); + +struct swr_jit_key { + unsigned nr_cbufs; + unsigned light_twoside; + ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS]; + unsigned nr_samplers; + unsigned nr_sampler_views; + struct swr_sampler_static_state sampler[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + struct pipe_alpha_state alphaTest; +}; + +namespace std +{ +template <> struct hash { + std::size_t operator()(const swr_jit_key &k) const + { + return util_hash_crc32(&k, sizeof(k)); + } +}; +}; + +bool operator==(const swr_jit_key &lhs, const swr_jit_key &rhs); diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp new file mode 100644 index 0000000..fa16844 --- /dev/null +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -0,0 +1,1344 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#include "common/os.h" +#include "jit_api.h" +#include "JitManager.h" +#include "state_llvm.h" + +#include "gallivm/lp_bld_tgsi.h" +#include "util/u_format.h" + +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_helpers.h" +#include "util/u_framebuffer.h" + +#include "swr_state.h" +#include "swr_context.h" +#include "swr_context_llvm.h" +#include "swr_screen.h" +#include "swr_resource.h" +#include "swr_tex_sample.h" +#include "swr_scratch.h" +#include "swr_shader.h" + +/* These should be pulled out into separate files as necessary + * Just initializing everything here to get going. */ + +static void * +swr_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state); + + memcpy(&state->pipe, blend, sizeof(*blend)); + + struct pipe_blend_state *pipe_blend = &state->pipe; + + for (int target = 0; + target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS); + target++) { + state->compileState[target].independentAlphaBlendEnable = + pipe_blend->independent_blend_enable; + + struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target]; + SWR_RENDER_TARGET_BLEND_STATE &targetState = + state->compileState[target].blendState; + + if (target != 0 && !pipe_blend->independent_blend_enable) { + memcpy(&targetState, &state->compileState[0].blendState, sizeof(SWR_RENDER_TARGET_BLEND_STATE)); + continue; + } + + targetState.colorBlendEnable = rt_blend->blend_enable; + if (targetState.colorBlendEnable) { + targetState.sourceAlphaBlendFactor = + swr_convert_blend_factor(rt_blend->alpha_src_factor); + targetState.destAlphaBlendFactor = + swr_convert_blend_factor(rt_blend->alpha_dst_factor); + targetState.sourceBlendFactor = + swr_convert_blend_factor(rt_blend->rgb_src_factor); + targetState.destBlendFactor = + swr_convert_blend_factor(rt_blend->rgb_dst_factor); + + targetState.colorBlendFunc = + swr_convert_blend_func(rt_blend->rgb_func); + targetState.alphaBlendFunc = + swr_convert_blend_func(rt_blend->alpha_func); + } + + targetState.writeDisableRed = + (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1; + targetState.writeDisableGreen = + (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1; + targetState.writeDisableBlue = + (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1; + targetState.writeDisableAlpha = + (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1; + } + + return state; +} + +static void +swr_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->blend == blend) + return; + + ctx->blend = (swr_blend_state *)blend; + + ctx->dirty |= SWR_NEW_BLEND; +} + +static void +swr_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void +swr_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *color) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->blend_color = *color; + + ctx->dirty |= SWR_NEW_BLEND; +} + +static void +swr_set_stencil_ref(struct pipe_context *pipe, + const struct pipe_stencil_ref *ref) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->stencil_ref = *ref; + + ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA; +} + +static void * +swr_create_depth_stencil_state( + struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + struct pipe_depth_stencil_alpha_state *state; + + state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil, + sizeof *depth_stencil); + + return state; +} + +static void +swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil) + return; + + ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil; + + ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA; +} + +static void +swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ + FREE(depth); +} + + +static void * +swr_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rast) +{ + struct pipe_rasterizer_state *state; + state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast); + + return state; +} + +static void +swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle) +{ + struct swr_context *ctx = swr_context(pipe); + const struct pipe_rasterizer_state *rasterizer = + (const struct pipe_rasterizer_state *)handle; + + if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer) + return; + + ctx->rasterizer = (pipe_rasterizer_state *)rasterizer; + + ctx->dirty |= SWR_NEW_RASTERIZER; +} + +static void +swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} + + +static void * +swr_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct pipe_sampler_state *state = + (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler); + + return state; +} + +static void +swr_bind_sampler_states(struct pipe_context *pipe, + unsigned shader, + unsigned start, + unsigned num, + void **samplers) +{ + struct swr_context *ctx = swr_context(pipe); + unsigned i; + + assert(shader < PIPE_SHADER_TYPES); + assert(start + num <= Elements(ctx->samplers[shader])); + + /* set the new samplers */ + ctx->num_samplers[shader] = num; + for (i = 0; i < num; i++) { + ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i]; + } + + ctx->dirty |= SWR_NEW_SAMPLER; +} + +static void +swr_delete_sampler_state(struct pipe_context *pipe, void *sampler) +{ + FREE(sampler); +} + + +static struct pipe_sampler_view * +swr_create_sampler_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ) +{ + struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); + + if (view) { + *view = *templ; + view->reference.count = 1; + view->texture = NULL; + pipe_resource_reference(&view->texture, texture); + view->context = pipe; + } + + return view; +} + +static void +swr_set_sampler_views(struct pipe_context *pipe, + unsigned shader, + unsigned start, + unsigned num, + struct pipe_sampler_view **views) +{ + struct swr_context *ctx = swr_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS); + + assert(shader < PIPE_SHADER_TYPES); + assert(start + num <= Elements(ctx->sampler_views[shader])); + + /* set the new sampler views */ + ctx->num_sampler_views[shader] = num; + for (i = 0; i < num; i++) { + /* Note: we're using pipe_sampler_view_release() here to work around + * a possible crash when the old view belongs to another context that + * was already destroyed. + */ + pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]); + pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i], + views[i]); + } + + ctx->dirty |= SWR_NEW_SAMPLER_VIEW; +} + +static void +swr_sampler_view_destroy(struct pipe_context *pipe, + struct pipe_sampler_view *view) +{ + pipe_resource_reference(&view->texture, NULL); + FREE(view); +} + +static void * +swr_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *vs) +{ + struct swr_vertex_shader *swr_vs = + (swr_vertex_shader *)CALLOC_STRUCT(swr_vertex_shader); + if (!swr_vs) + return NULL; + + swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens); + swr_vs->pipe.stream_output = vs->stream_output; + + lp_build_tgsi_info(vs->tokens, &swr_vs->info); + + swr_vs->func = swr_compile_vs(pipe, swr_vs); + + swr_vs->soState = {0}; + + if (swr_vs->pipe.stream_output.num_outputs) { + pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output; + + swr_vs->soState.soEnable = true; + // soState.rasterizerDisable set on state dirty + // soState.streamToRasterizer not used + + for (uint32_t i = 0; i < stream_output->num_outputs; i++) { + swr_vs->soState.streamMasks[stream_output->output[i].stream] |= + 1 << (stream_output->output[i].register_index - 1); + } + for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) { + swr_vs->soState.streamNumEntries[i] = + _mm_popcnt_u32(swr_vs->soState.streamMasks[i]); + } + } + + return swr_vs; +} + +static void +swr_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->vs == vs) + return; + + ctx->vs = (swr_vertex_shader *)vs; + ctx->dirty |= SWR_NEW_VS; +} + +static void +swr_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs; + FREE((void *)swr_vs->pipe.tokens); + FREE(vs); +} + +static void * +swr_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct swr_fragment_shader *swr_fs = new swr_fragment_shader; + if (!swr_fs) + return NULL; + + swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens); + + lp_build_tgsi_info(fs->tokens, &swr_fs->info); + + return swr_fs; +} + + +static void +swr_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->fs == fs) + return; + + ctx->fs = (swr_fragment_shader *)fs; + ctx->dirty |= SWR_NEW_FS; +} + +static void +swr_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs; + FREE((void *)swr_fs->pipe.tokens); + delete swr_fs; +} + + +static void +swr_set_constant_buffer(struct pipe_context *pipe, + uint shader, + uint index, + struct pipe_constant_buffer *cb) +{ + struct swr_context *ctx = swr_context(pipe); + struct pipe_resource *constants = cb ? cb->buffer : NULL; + + assert(shader < PIPE_SHADER_TYPES); + assert(index < Elements(ctx->constants[shader])); + + /* note: reference counting */ + util_copy_constant_buffer(&ctx->constants[shader][index], cb); + + if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + ctx->dirty |= SWR_NEW_VSCONSTANTS; + } else if (shader == PIPE_SHADER_FRAGMENT) { + ctx->dirty |= SWR_NEW_FSCONSTANTS; + } + + if (cb && cb->user_buffer) { + pipe_resource_reference(&constants, NULL); + } +} + + +static void * +swr_create_vertex_elements_state(struct pipe_context *pipe, + unsigned num_elements, + const struct pipe_vertex_element *attribs) +{ + struct swr_vertex_element_state *velems; + assert(num_elements <= PIPE_MAX_ATTRIBS); + velems = CALLOC_STRUCT(swr_vertex_element_state); + if (velems) { + velems->fsState.numAttribs = num_elements; + for (unsigned i = 0; i < num_elements; i++) { + // XXX: we should do this keyed on the VS usage info + + const struct util_format_description *desc = + util_format_description(attribs[i].src_format); + + velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset; + velems->fsState.layout[i].Format = + mesa_to_swr_format(attribs[i].src_format); + velems->fsState.layout[i].StreamIndex = + attribs[i].vertex_buffer_index; + velems->fsState.layout[i].InstanceEnable = + attribs[i].instance_divisor != 0; + velems->fsState.layout[i].ComponentControl0 = + desc->channel[0].type != UTIL_FORMAT_TYPE_VOID + ? ComponentControl::StoreSrc + : ComponentControl::Store0; + velems->fsState.layout[i].ComponentControl1 = + desc->channel[1].type != UTIL_FORMAT_TYPE_VOID + ? ComponentControl::StoreSrc + : ComponentControl::Store0; + velems->fsState.layout[i].ComponentControl2 = + desc->channel[2].type != UTIL_FORMAT_TYPE_VOID + ? ComponentControl::StoreSrc + : ComponentControl::Store0; + velems->fsState.layout[i].ComponentControl3 = + desc->channel[3].type != UTIL_FORMAT_TYPE_VOID + ? ComponentControl::StoreSrc + : ComponentControl::Store1Fp; + velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW; + velems->fsState.layout[i].InstanceDataStepRate = + attribs[i].instance_divisor; + + /* Calculate the pitch of each stream */ + const SWR_FORMAT_INFO &swr_desc = GetFormatInfo( + mesa_to_swr_format(attribs[i].src_format)); + velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp; + } + } + + return velems; +} + +static void +swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + struct swr_context *ctx = swr_context(pipe); + struct swr_vertex_element_state *swr_velems = + (struct swr_vertex_element_state *)velems; + + ctx->velems = swr_velems; + ctx->dirty |= SWR_NEW_VERTEX; +} + +static void +swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) +{ + /* XXX Need to destroy fetch shader? */ + FREE(velems); +} + + +static void +swr_set_vertex_buffers(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_elements, + const struct pipe_vertex_buffer *buffers) +{ + struct swr_context *ctx = swr_context(pipe); + + assert(num_elements <= PIPE_MAX_ATTRIBS); + + util_set_vertex_buffers_count(ctx->vertex_buffer, + &ctx->num_vertex_buffers, + buffers, + start_slot, + num_elements); + + ctx->dirty |= SWR_NEW_VERTEX; +} + + +static void +swr_set_index_buffer(struct pipe_context *pipe, + const struct pipe_index_buffer *ib) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ib) + memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer)); + else + memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer)); + + ctx->dirty |= SWR_NEW_VERTEX; +} + +static void +swr_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->poly_stipple = *stipple; /* struct copy */ + ctx->dirty |= SWR_NEW_STIPPLE; +} + +static void +swr_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->clip = *clip; + /* XXX Unimplemented, but prevents crash */ + + ctx->dirty |= SWR_NEW_CLIP; +} + + +static void +swr_set_scissor_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_scissor_state *scissor) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->scissor = *scissor; + ctx->dirty |= SWR_NEW_SCISSOR; +} + +static void +swr_set_viewport_states(struct pipe_context *pipe, + unsigned start_slot, + unsigned num_viewports, + const struct pipe_viewport_state *vpt) +{ + struct swr_context *ctx = swr_context(pipe); + + ctx->viewport = *vpt; + ctx->dirty |= SWR_NEW_VIEWPORT; +} + + +static void +swr_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct swr_context *ctx = swr_context(pipe); + + boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb); + + assert(fb->width <= KNOB_GUARDBAND_WIDTH); + assert(fb->height <= KNOB_GUARDBAND_HEIGHT); + + if (changed) { + unsigned i; + for (i = 0; i < fb->nr_cbufs; ++i) + pipe_surface_reference(&ctx->framebuffer.cbufs[i], fb->cbufs[i]); + for (; i < ctx->framebuffer.nr_cbufs; ++i) + pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL); + + ctx->framebuffer.nr_cbufs = fb->nr_cbufs; + + ctx->framebuffer.width = fb->width; + ctx->framebuffer.height = fb->height; + + pipe_surface_reference(&ctx->framebuffer.zsbuf, fb->zsbuf); + + ctx->dirty |= SWR_NEW_FRAMEBUFFER; + } +} + + +static void +swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) +{ + struct swr_context *ctx = swr_context(pipe); + + if (sample_mask != ctx->sample_mask) { + ctx->sample_mask = sample_mask; + ctx->dirty |= SWR_NEW_RASTERIZER; + } +} + + +void +swr_update_derived(struct swr_context *ctx, + const struct pipe_draw_info *p_draw_info) +{ + /* Any state that requires dirty flags to be re-triggered sets this mask */ + /* For example, user_buffer vertex and index buffers. */ + unsigned post_update_dirty_flags = 0; + + /* Render Targets */ + if (ctx->dirty & SWR_NEW_FRAMEBUFFER) { + struct pipe_framebuffer_state *fb = &ctx->framebuffer; + SWR_SURFACE_STATE *new_attachment[SWR_NUM_ATTACHMENTS] = {0}; + boolean changed, need_idle; + UINT i; + + /* colorbuffer targets */ + if (fb->nr_cbufs) + for (i = 0; i < fb->nr_cbufs; ++i) + if (fb->cbufs[i]) { + struct swr_resource *colorBuffer = + swr_resource(fb->cbufs[i]->texture); + new_attachment[SWR_ATTACHMENT_COLOR0 + i] = &colorBuffer->swr; + } + + /* depth/stencil target */ + if (fb->zsbuf) { + struct swr_resource *depthStencilBuffer = + swr_resource(fb->zsbuf->texture); + if (depthStencilBuffer->has_depth) { + new_attachment[SWR_ATTACHMENT_DEPTH] = &depthStencilBuffer->swr; + + if (depthStencilBuffer->has_stencil) + new_attachment[SWR_ATTACHMENT_STENCIL] = + &depthStencilBuffer->secondary; + + } else if (depthStencilBuffer->has_stencil) + new_attachment[SWR_ATTACHMENT_STENCIL] = &depthStencilBuffer->swr; + } + + /* For each attachment that has changed, store tile contents to render + * target */ + changed = FALSE; + need_idle = FALSE; + for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) { + if ((uintptr_t)ctx->current.attachment[i] + ^ (uintptr_t)new_attachment[i]) { + if (ctx->current.attachment[i]) { + enum SWR_TILE_STATE post_state; + post_state = + (new_attachment[i] ? SWR_TILE_INVALID : SWR_TILE_RESOLVED); + swr_store_render_target(ctx, i, post_state); + need_idle |= TRUE; + } + changed |= TRUE; + } + } + + /* + * Attachments are live, don't update any until idle + * (all StoreTiles, called by swr_store_render_targets, finish) + */ + if (need_idle) + SwrWaitForIdle(ctx->swrContext); + + if (changed) { + /* Update actual SWR core attachments, or clear those no longer + * attached */ + swr_draw_context *pDC = + (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); + SWR_SURFACE_STATE *renderTargets = pDC->renderTargets; + for (i = 0; i < SWR_NUM_ATTACHMENTS; i++) { + if ((uintptr_t)ctx->current.attachment[i] + ^ (uintptr_t)new_attachment[i]) { + if (new_attachment[i]) { + renderTargets[i] = *new_attachment[i]; + ctx->current.attachment[i] = new_attachment[i]; + } else { + renderTargets[i] = {0}; + ctx->current.attachment[i] = nullptr; + } + } + } + + /* rendertarget changes also necessitate updating other state */ + ctx->dirty |= SWR_NEW_BLEND | SWR_NEW_SAMPLER_VIEW | SWR_NEW_VS + | SWR_NEW_FS | SWR_NEW_RASTERIZER | SWR_NEW_VIEWPORT + | SWR_NEW_DEPTH_STENCIL_ALPHA; + } + } + + /* Raster state */ + if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_VS)) { + SWR_RASTSTATE *rastState = &ctx->current.rastState; + rastState->cullMode = swr_convert_cull_mode(ctx->rasterizer->cull_face); + rastState->frontWinding = ctx->rasterizer->front_ccw + ? SWR_FRONTWINDING_CCW + : SWR_FRONTWINDING_CW; + rastState->scissorEnable = ctx->rasterizer->scissor; + rastState->pointSize = ctx->rasterizer->point_size > 0.0f + ? ctx->rasterizer->point_size + : 1.0f; + rastState->lineWidth = ctx->rasterizer->line_width > 0.0f + ? ctx->rasterizer->line_width + : 1.0f; + + rastState->pointParam = ctx->rasterizer->point_size_per_vertex; + rastState->pointSizeAttrib = ctx->vs->pointSizeAttrib; + + rastState->pointSpriteEnable = ctx->rasterizer->sprite_coord_enable; + rastState->pointSpriteTopOrigin = + ctx->rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT; + rastState->pointSpriteFESlot = ctx->vs->info.base.num_outputs; + + /* XXX TODO: Add multisample */ + rastState->sampleCount = SWR_MULTISAMPLE_1X; + + bool do_offset = false; + switch (ctx->rasterizer->fill_front) { + case PIPE_POLYGON_MODE_FILL: + do_offset = ctx->rasterizer->offset_tri; + break; + case PIPE_POLYGON_MODE_LINE: + do_offset = ctx->rasterizer->offset_line; + break; + case PIPE_POLYGON_MODE_POINT: + do_offset = ctx->rasterizer->offset_point; + break; + } + + if (do_offset) { + rastState->depthBias = ctx->rasterizer->offset_units; + rastState->slopeScaledDepthBias = ctx->rasterizer->offset_scale; + rastState->depthBiasClamp = ctx->rasterizer->offset_clamp; + } else { + rastState->depthBias = 0; + rastState->slopeScaledDepthBias = 0; + rastState->depthBiasClamp = 0; + } + struct pipe_surface *zb = ctx->framebuffer.zsbuf; + if (zb && swr_resource(zb->texture)->has_depth) + rastState->depthFormat = swr_resource(zb->texture)->swr.format; + + rastState->depthClipEnable = ctx->rasterizer->depth_clip; + + SwrSetRastState(ctx->swrContext, rastState); + } + + /* Scissor */ + if (ctx->dirty & SWR_NEW_SCISSOR) { + BBOX bbox(ctx->scissor.miny, ctx->scissor.maxy, + ctx->scissor.minx, ctx->scissor.maxx); + SwrSetScissorRects(ctx->swrContext, 1, &bbox); + } + + /* Viewport */ + if (ctx->dirty & SWR_NEW_VIEWPORT) { + pipe_viewport_state *state = &ctx->viewport; + SWR_VIEWPORT *vp = &ctx->current.vp; + SWR_VIEWPORT_MATRIX *vpm = &ctx->current.vpm; + + const float scale_x = fabs(state->scale[0]); + const float scale_y = fabs(state->scale[1]); + const float scale_z = fabs(state->scale[2]); + + vp->x = state->translate[0] - scale_x; + vp->width = state->translate[0] + scale_x; + vp->y = state->translate[1] - scale_y; + vp->height = state->translate[1] + scale_y; + if (ctx->rasterizer->clip_halfz == 0) { + vp->minZ = state->translate[2] - scale_z; + vp->maxZ = state->translate[2] + scale_z; + } else { + vp->minZ = state->translate[2]; + vp->maxZ = state->translate[2] + scale_z; + } + + /* Flip viewport for all targets except samplable textures. */ + /* XXX This may not be sufficient for multiple rendertargets */ + struct pipe_surface *cb = ctx->framebuffer.cbufs[0]; + if (cb && + !(swr_resource(cb->texture)->base.bind & PIPE_BIND_SAMPLER_VIEW)) { + /* Flip y and y-translate in the viewport matrix. */ + vpm->m00 = (vp->width - vp->x) / 2.0f; + vpm->m11 = (vp->y - vp->height) / 2.0f; + vpm->m22 = (vp->maxZ - vp->minZ) / 2.0f; + vpm->m30 = vp->x + vpm->m00; + vpm->m31 = vp->height + vpm->m11; + vpm->m32 = vp->minZ + vpm->m22; + } else { + vpm->m00 = (vp->width - vp->x) / 2.0f; + vpm->m11 = (vp->height - vp->y) / 2.0f; + vpm->m22 = (vp->maxZ - vp->minZ) / 2.0f; + vpm->m30 = vp->x + vpm->m00; + vpm->m31 = vp->y + vpm->m11; + vpm->m32 = vp->minZ + vpm->m22; + } + + /* Now that the matrix is calculated, clip the view coords to screen + * size. OpenGL allows for -ve x,y in the viewport. + */ + vp->x = std::max(vp->x, 0.0f); + vp->y = std::max(vp->y, 0.0f); + vp->width = std::min(vp->width, (float)ctx->framebuffer.width); + vp->height = std::min(vp->height, (float)ctx->framebuffer.height); + + SwrSetViewports(ctx->swrContext, 1, vp, vpm); + } + + /* Set vertex & index buffers */ + /* (using draw info if called by swr_draw_vbo) */ + if (ctx->dirty & SWR_NEW_VERTEX) { + uint32_t size, pitch, max_vertex, partial_inbounds; + const uint8_t *p_data; + + /* If being called by swr_draw_vbo, copy draw details */ + struct pipe_draw_info info = {0}; + if (p_draw_info) + info = *p_draw_info; + + /* vertex buffers */ + SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS]; + for (UINT i = 0; i < ctx->num_vertex_buffers; i++) { + pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; + + pitch = vb->stride; + if (!vb->user_buffer) { + /* VBO + * size is based on buffer->width0 rather than info.max_index + * to prevent having to validate VBO on each draw */ + size = vb->buffer->width0; + max_vertex = size / pitch; + partial_inbounds = size % pitch; + + p_data = (const uint8_t *)swr_resource_data(vb->buffer) + + vb->buffer_offset; + } else { + /* Client buffer + * client memory is one-time use, re-trigger SWR_NEW_VERTEX to + * revalidate on each draw */ + post_update_dirty_flags |= SWR_NEW_VERTEX; + + if (pitch) { + size = (info.max_index - info.min_index + 1) * pitch; + } else { + /* pitch = 0, means constant value + * set size to 1 vertex */ + size = ctx->velems->stream_pitch[i]; + } + + max_vertex = info.max_index + 1; + partial_inbounds = 0; + + /* Copy only needed vertices to scratch space */ + size = AlignUp(size, 4); + const void *ptr = (const uint8_t *) vb->user_buffer + + info.min_index * pitch; + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->vertex_buffer, ptr, size); + p_data = (const uint8_t *)ptr - info.min_index * pitch; + } + + swrVertexBuffers[i] = {0}; + swrVertexBuffers[i].index = i; + swrVertexBuffers[i].pitch = pitch; + swrVertexBuffers[i].pData = p_data; + swrVertexBuffers[i].size = size; + swrVertexBuffers[i].maxVertex = max_vertex; + swrVertexBuffers[i].partialInboundsSize = partial_inbounds; + } + + SwrSetVertexBuffers( + ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers); + + /* index buffer, if required (info passed in by swr_draw_vbo) */ + SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */ + if (info.indexed) { + pipe_index_buffer *ib = &ctx->index_buffer; + + pitch = ib->index_size ? ib->index_size : sizeof(uint32_t); + index_type = swr_convert_index_type(pitch); + + if (!ib->user_buffer) { + /* VBO + * size is based on buffer->width0 rather than info.count + * to prevent having to validate VBO on each draw */ + size = ib->buffer->width0; + p_data = + (const uint8_t *)swr_resource_data(ib->buffer) + ib->offset; + } else { + /* Client buffer + * client memory is one-time use, re-trigger SWR_NEW_VERTEX to + * revalidate on each draw */ + post_update_dirty_flags |= SWR_NEW_VERTEX; + + size = info.count * pitch; + size = AlignUp(size, 4); + + /* Copy indices to scratch space */ + const void *ptr = ib->user_buffer; + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->index_buffer, ptr, size); + p_data = (const uint8_t *)ptr; + } + + SWR_INDEX_BUFFER_STATE swrIndexBuffer; + swrIndexBuffer.format = swr_convert_index_type(ib->index_size); + swrIndexBuffer.pIndices = p_data; + swrIndexBuffer.size = size; + + SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer); + } + + struct swr_vertex_element_state *velems = ctx->velems; + if (velems && velems->fsState.indexType != index_type) { + velems->fsFunc = NULL; + velems->fsState.indexType = index_type; + } + } + + /* VertexShader */ + if (ctx->dirty & SWR_NEW_VS) { + SwrSetVertexFunc(ctx->swrContext, ctx->vs->func); + } + + swr_jit_key key; + if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW + | SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_RASTERIZER + | SWR_NEW_FRAMEBUFFER)) { + memset(&key, 0, sizeof(key)); + swr_generate_fs_key(key, ctx, ctx->fs); + auto search = ctx->fs->map.find(key); + PFN_PIXEL_KERNEL func; + if (search != ctx->fs->map.end()) { + func = search->second; + } else { + func = swr_compile_fs(ctx, key); + ctx->fs->map.insert(std::make_pair(key, func)); + } + SWR_PS_STATE psState = {0}; + psState.pfnPixelShader = func; + psState.killsPixel = + ctx->fs->info.base.uses_kill || key.alphaTest.enabled; + psState.writesODepth = ctx->fs->info.base.writes_z; + psState.usesSourceDepth = ctx->fs->info.base.reads_z; + psState.maxRTSlotUsed = + (ctx->framebuffer.nr_cbufs != 0) ? + (ctx->framebuffer.nr_cbufs - 1) : + 0; + SwrSetPixelShaderState(ctx->swrContext, &psState); + } + + /* JIT sampler state */ + if (ctx->dirty & SWR_NEW_SAMPLER) { + swr_draw_context *pDC = + (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); + + for (unsigned i = 0; i < key.nr_samplers; i++) { + const struct pipe_sampler_state *sampler = + ctx->samplers[PIPE_SHADER_FRAGMENT][i]; + + if (sampler) { + pDC->samplersFS[i].min_lod = sampler->min_lod; + pDC->samplersFS[i].max_lod = sampler->max_lod; + pDC->samplersFS[i].lod_bias = sampler->lod_bias; + COPY_4V(pDC->samplersFS[i].border_color, sampler->border_color.f); + } + } + } + + /* JIT sampler view state */ + if (ctx->dirty & SWR_NEW_SAMPLER_VIEW) { + swr_draw_context *pDC = + (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); + + for (unsigned i = 0; i < key.nr_sampler_views; i++) { + struct pipe_sampler_view *view = + ctx->sampler_views[PIPE_SHADER_FRAGMENT][i]; + + if (view) { + struct pipe_resource *res = view->texture; + struct swr_resource *swr_res = swr_resource(res); + struct swr_jit_texture *jit_tex = &pDC->texturesFS[i]; + memset(jit_tex, 0, sizeof(*jit_tex)); + jit_tex->width = res->width0; + jit_tex->height = res->height0; + jit_tex->depth = res->depth0; + jit_tex->first_level = view->u.tex.first_level; + jit_tex->last_level = view->u.tex.last_level; + jit_tex->base_ptr = swr_res->swr.pBaseAddress; + + for (unsigned level = jit_tex->first_level; + level <= jit_tex->last_level; + level++) { + jit_tex->row_stride[level] = swr_res->row_stride[level]; + jit_tex->img_stride[level] = swr_res->img_stride[level]; + jit_tex->mip_offsets[level] = swr_res->mip_offsets[level]; + } + } + } + } + + /* VertexShader Constants */ + if (ctx->dirty & SWR_NEW_VSCONSTANTS) { + swr_draw_context *pDC = + (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); + + for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + const pipe_constant_buffer *cb = + &ctx->constants[PIPE_SHADER_VERTEX][i]; + pDC->num_constantsVS[i] = cb->buffer_size; + if (cb->buffer) + pDC->constantVS[i] = + (const float *)((const BYTE *)cb->buffer + cb->buffer_offset); + else { + /* Need to copy these constants to scratch space */ + if (cb->user_buffer && cb->buffer_size) { + const void *ptr = + ((const BYTE *)cb->user_buffer + cb->buffer_offset); + uint32_t size = AlignUp(cb->buffer_size, 4); + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->vs_constants, ptr, size); + pDC->constantVS[i] = (const float *)ptr; + } + } + } + } + + /* FragmentShader Constants */ + if (ctx->dirty & SWR_NEW_FSCONSTANTS) { + swr_draw_context *pDC = + (swr_draw_context *)SwrGetPrivateContextState(ctx->swrContext); + + for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + const pipe_constant_buffer *cb = + &ctx->constants[PIPE_SHADER_FRAGMENT][i]; + pDC->num_constantsFS[i] = cb->buffer_size; + if (cb->buffer) + pDC->constantFS[i] = + (const float *)((const BYTE *)cb->buffer + cb->buffer_offset); + else { + /* Need to copy these constants to scratch space */ + if (cb->user_buffer && cb->buffer_size) { + const void *ptr = + ((const BYTE *)cb->user_buffer + cb->buffer_offset); + uint32_t size = AlignUp(cb->buffer_size, 4); + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->fs_constants, ptr, size); + pDC->constantFS[i] = (const float *)ptr; + } + } + } + } + + /* Depth/stencil state */ + if (ctx->dirty & SWR_NEW_DEPTH_STENCIL_ALPHA) { + struct pipe_depth_state *depth = &(ctx->depth_stencil->depth); + struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil; + SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}}; + + /* XXX, incomplete. Need to flesh out stencil & alpha test state + struct pipe_stencil_state *front_stencil = + ctx->depth_stencil.stencil[0]; + struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1]; + struct pipe_alpha_state alpha; + */ + if (stencil[0].enabled) { + depthStencilState.stencilWriteEnable = 1; + depthStencilState.stencilTestEnable = 1; + depthStencilState.stencilTestFunc = + swr_convert_depth_func(stencil[0].func); + + depthStencilState.stencilPassDepthPassOp = + swr_convert_stencil_op(stencil[0].zpass_op); + depthStencilState.stencilPassDepthFailOp = + swr_convert_stencil_op(stencil[0].zfail_op); + depthStencilState.stencilFailOp = + swr_convert_stencil_op(stencil[0].fail_op); + depthStencilState.stencilWriteMask = stencil[0].writemask; + depthStencilState.stencilTestMask = stencil[0].valuemask; + depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0]; + } + if (stencil[1].enabled) { + depthStencilState.doubleSidedStencilTestEnable = 1; + + depthStencilState.backfaceStencilTestFunc = + swr_convert_depth_func(stencil[1].func); + + depthStencilState.backfaceStencilPassDepthPassOp = + swr_convert_stencil_op(stencil[1].zpass_op); + depthStencilState.backfaceStencilPassDepthFailOp = + swr_convert_stencil_op(stencil[1].zfail_op); + depthStencilState.backfaceStencilFailOp = + swr_convert_stencil_op(stencil[1].fail_op); + depthStencilState.backfaceStencilWriteMask = stencil[1].writemask; + depthStencilState.backfaceStencilTestMask = stencil[1].valuemask; + + depthStencilState.backfaceStencilRefValue = + ctx->stencil_ref.ref_value[1]; + } + + depthStencilState.depthTestEnable = depth->enabled; + depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func); + depthStencilState.depthWriteEnable = depth->writemask; + SwrSetDepthStencilState(ctx->swrContext, &depthStencilState); + } + + /* Blend State */ + if (ctx->dirty & (SWR_NEW_BLEND | SWR_NEW_FRAMEBUFFER)) { + struct pipe_framebuffer_state *fb = &ctx->framebuffer; + + SWR_BLEND_STATE blendState; + memset(&blendState, 0, sizeof(blendState)); + blendState.independentAlphaBlendEnable = + ctx->blend->pipe.independent_blend_enable; + blendState.constantColor[0] = ctx->blend_color.color[0]; + blendState.constantColor[1] = ctx->blend_color.color[1]; + blendState.constantColor[2] = ctx->blend_color.color[2]; + blendState.constantColor[3] = ctx->blend_color.color[3]; + + /* If there are no color buffers bound, disable writes on RT0 + * and skip loop */ + if (fb->nr_cbufs == 0) { + blendState.renderTarget[0].writeDisableRed = 1; + blendState.renderTarget[0].writeDisableGreen = 1; + blendState.renderTarget[0].writeDisableBlue = 1; + blendState.renderTarget[0].writeDisableAlpha = 1; + } + else + for (int target = 0; + target < std::min(SWR_NUM_RENDERTARGETS, + PIPE_MAX_COLOR_BUFS); + target++) { + if (!fb->cbufs[target]) + continue; + + BLEND_COMPILE_STATE *compileState = + &ctx->blend->compileState[target]; + + struct swr_resource *colorBuffer = + swr_resource(fb->cbufs[target]->texture); + compileState->format = colorBuffer->swr.format; + + memcpy(&blendState.renderTarget[target], + &compileState->blendState, + sizeof(compileState->blendState)); + + PFN_BLEND_JIT_FUNC func = NULL; + auto search = ctx->blendJIT->find(*compileState); + if (search != ctx->blendJIT->end()) { + func = search->second; + } else { + HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr; + func = JitCompileBlend(hJitMgr, *compileState); + debug_printf("BLEND shader %p\n", func); + assert(func && "Error: BlendShader = NULL"); + + ctx->blendJIT->insert(std::make_pair(*compileState, func)); + } + SwrSetBlendFunc(ctx->swrContext, target, func); + } + + SwrSetBlendState(ctx->swrContext, &blendState); + } + + if (ctx->dirty & SWR_NEW_STIPPLE) { + /* XXX What to do with this one??? SWR doesn't stipple */ + } + + if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) { + ctx->vs->soState.rasterizerDisable = + ctx->rasterizer->rasterizer_discard; + SwrSetSoState(ctx->swrContext, &ctx->vs->soState); + + pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output; + + for (uint32_t i = 0; i < ctx->num_so_targets; i++) { + SWR_STREAMOUT_BUFFER buffer = {0}; + if (!ctx->so_targets[i]) + continue; + buffer.enable = true; + buffer.pBuffer = + (uint32_t *)swr_resource_data(ctx->so_targets[i]->buffer); + buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2; + buffer.pitch = stream_output->stride[i]; + buffer.streamOffset = ctx->so_targets[i]->buffer_offset >> 2; + + SwrSetSoBuffers(ctx->swrContext, &buffer, i); + } + } + + uint32_t linkage = ctx->vs->linkageMask; + if (ctx->rasterizer->sprite_coord_enable) + linkage |= (1 << ctx->vs->info.base.num_outputs); + + SwrSetLinkage(ctx->swrContext, linkage, NULL); + + // set up frontend state + SWR_FRONTEND_STATE feState = {0}; + SwrSetFrontendState(ctx->swrContext, &feState); + + // set up backend state + SWR_BACKEND_STATE backendState = {0}; + backendState.numAttributes = 1; + backendState.numComponents[0] = 4; + backendState.constantInterpolationMask = ctx->fs->constantMask; + SwrSetBackendState(ctx->swrContext, &backendState); + + ctx->dirty = post_update_dirty_flags; +} + +static struct pipe_stream_output_target * +swr_create_so_target(struct pipe_context *pipe, + struct pipe_resource *buffer, + unsigned buffer_offset, + unsigned buffer_size) +{ + struct pipe_stream_output_target *target; + + target = CALLOC_STRUCT(pipe_stream_output_target); + if (!target) + return NULL; + + target->context = pipe; + target->reference.count = 1; + pipe_resource_reference(&target->buffer, buffer); + target->buffer_offset = buffer_offset; + target->buffer_size = buffer_size; + return target; +} + +static void +swr_destroy_so_target(struct pipe_context *pipe, + struct pipe_stream_output_target *target) +{ + pipe_resource_reference(&target->buffer, NULL); + FREE(target); +} + +static void +swr_set_so_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + const unsigned *offsets) +{ + struct swr_context *swr = swr_context(pipe); + uint32_t i; + + assert(num_targets < MAX_SO_STREAMS); + + for (i = 0; i < num_targets; i++) { + pipe_so_target_reference( + (struct pipe_stream_output_target **)&swr->so_targets[i], + targets[i]); + } + + for (/* fall-through */; i < swr->num_so_targets; i++) { + pipe_so_target_reference( + (struct pipe_stream_output_target **)&swr->so_targets[i], NULL); + } + + swr->num_so_targets = num_targets; + + swr->dirty = SWR_NEW_SO; +} + + +void +swr_state_init(struct pipe_context *pipe) +{ + pipe->create_blend_state = swr_create_blend_state; + pipe->bind_blend_state = swr_bind_blend_state; + pipe->delete_blend_state = swr_delete_blend_state; + + pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state; + pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state; + pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state; + + pipe->create_rasterizer_state = swr_create_rasterizer_state; + pipe->bind_rasterizer_state = swr_bind_rasterizer_state; + pipe->delete_rasterizer_state = swr_delete_rasterizer_state; + + pipe->create_sampler_state = swr_create_sampler_state; + pipe->bind_sampler_states = swr_bind_sampler_states; + pipe->delete_sampler_state = swr_delete_sampler_state; + + pipe->create_sampler_view = swr_create_sampler_view; + pipe->set_sampler_views = swr_set_sampler_views; + pipe->sampler_view_destroy = swr_sampler_view_destroy; + + pipe->create_vs_state = swr_create_vs_state; + pipe->bind_vs_state = swr_bind_vs_state; + pipe->delete_vs_state = swr_delete_vs_state; + + pipe->create_fs_state = swr_create_fs_state; + pipe->bind_fs_state = swr_bind_fs_state; + pipe->delete_fs_state = swr_delete_fs_state; + + pipe->set_constant_buffer = swr_set_constant_buffer; + + pipe->create_vertex_elements_state = swr_create_vertex_elements_state; + pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state; + pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state; + + pipe->set_vertex_buffers = swr_set_vertex_buffers; + pipe->set_index_buffer = swr_set_index_buffer; + + pipe->set_polygon_stipple = swr_set_polygon_stipple; + pipe->set_clip_state = swr_set_clip_state; + pipe->set_scissor_states = swr_set_scissor_states; + pipe->set_viewport_states = swr_set_viewport_states; + + pipe->set_framebuffer_state = swr_set_framebuffer_state; + + pipe->set_blend_color = swr_set_blend_color; + pipe->set_stencil_ref = swr_set_stencil_ref; + + pipe->set_sample_mask = swr_set_sample_mask; + + pipe->create_stream_output_target = swr_create_so_target; + pipe->stream_output_target_destroy = swr_destroy_so_target; + pipe->set_stream_output_targets = swr_set_so_targets; +} diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h new file mode 100644 index 0000000..fdacd42 --- /dev/null +++ b/src/gallium/drivers/swr/swr_state.h @@ -0,0 +1,240 @@ +/**************************************************************************** + * Copyright (C) 2015 Intel Corporation. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + ***************************************************************************/ + +#ifndef SWR_STATE_H +#define SWR_STATE_H + +#include "pipe/p_defines.h" +#include "tgsi/tgsi_scan.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "gallivm/lp_bld_tgsi.h" +#include "util/u_hash.h" +#include "api.h" +#include "swr_tex_sample.h" +#include "swr_shader.h" +#include + +/* skeleton */ +struct swr_vertex_shader { + struct pipe_shader_state pipe; + struct lp_tgsi_info info; + unsigned linkageMask; + unsigned pointSizeAttrib; + PFN_VERTEX_FUNC func; + SWR_STREAMOUT_STATE soState; + PFN_SO_FUNC soFunc[PIPE_PRIM_MAX]; +}; + +struct swr_fragment_shader { + struct pipe_shader_state pipe; + struct lp_tgsi_info info; + unsigned constantMask; + std::unordered_map map; +}; + +/* Vertex element state */ +struct swr_vertex_element_state { + FETCH_COMPILE_STATE fsState; + PFN_FETCH_FUNC fsFunc; +#if 1 //BMCDEBUG + uint32_t stream_pitch[PIPE_MAX_ATTRIBS]; +#endif +}; + +struct swr_blend_state { + struct pipe_blend_state pipe; + BLEND_COMPILE_STATE compileState[PIPE_MAX_COLOR_BUFS]; +}; + +/* Shadows of SWR API DrawState */ +struct swr_shadow_state { + SWR_SURFACE_STATE *attachment[SWR_NUM_ATTACHMENTS]; + SWR_RASTSTATE rastState; + SWR_VIEWPORT vp; + SWR_VIEWPORT_MATRIX vpm; +}; + +void swr_update_derived(struct swr_context *, + const struct pipe_draw_info * = nullptr); + +/* + * Conversion functions: Convert mesa state defines to SWR. + */ + +static INLINE SWR_STENCILOP +swr_convert_stencil_op(const UINT op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return STENCILOP_INVERT; + default: + assert(0 && "Unsupported stencil op"); + return STENCILOP_KEEP; + } +} + +static INLINE SWR_FORMAT +swr_convert_index_type(const UINT index_size) +{ + switch (index_size) { + case sizeof(unsigned char): + return R8_UINT; + case sizeof(unsigned short): + return R16_UINT; + case sizeof(unsigned int): + return R32_UINT; + default: + assert(0 && "Unsupported index type"); + return R32_UINT; + } +} + + +static INLINE UINT +swr_convert_depth_func(const UINT pipe_func) +{ + switch (pipe_func) { + case PIPE_FUNC_NEVER: + return ZFUNC_NEVER; + case PIPE_FUNC_LESS: + return ZFUNC_LT; + case PIPE_FUNC_EQUAL: + return ZFUNC_EQ; + case PIPE_FUNC_LEQUAL: + return ZFUNC_LE; + case PIPE_FUNC_GREATER: + return ZFUNC_GT; + case PIPE_FUNC_NOTEQUAL: + return ZFUNC_NE; + case PIPE_FUNC_GEQUAL: + return ZFUNC_GE; + case PIPE_FUNC_ALWAYS: + return ZFUNC_ALWAYS; + default: + assert(0 && "Unsupported depth func"); + return ZFUNC_ALWAYS; + } +} + + +static INLINE SWR_CULLMODE +swr_convert_cull_mode(const UINT cull_face) +{ + switch (cull_face) { + case PIPE_FACE_NONE: + return SWR_CULLMODE_NONE; + case PIPE_FACE_FRONT: + return SWR_CULLMODE_FRONT; + case PIPE_FACE_BACK: + return SWR_CULLMODE_BACK; + case PIPE_FACE_FRONT_AND_BACK: + return SWR_CULLMODE_BOTH; + default: + assert(0 && "Invalid cull mode"); + return SWR_CULLMODE_NONE; + } +} + +static INLINE SWR_BLEND_OP +swr_convert_blend_func(const UINT blend_func) +{ + switch (blend_func) { + case PIPE_BLEND_ADD: + return BLENDOP_ADD; + case PIPE_BLEND_SUBTRACT: + return BLENDOP_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLENDOP_REVSUBTRACT; + case PIPE_BLEND_MIN: + return BLENDOP_MIN; + case PIPE_BLEND_MAX: + return BLENDOP_MAX; + default: + assert(0 && "Invalid blend func"); + return BLENDOP_ADD; + } +} + +static INLINE SWR_BLEND_FACTOR +swr_convert_blend_factor(const UINT blend_factor) +{ + switch (blend_factor) { + case PIPE_BLENDFACTOR_ONE: + return BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_SRC1_COLOR: + return BLENDFACTOR_SRC1_COLOR; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + return BLENDFACTOR_SRC1_ALPHA; + case PIPE_BLENDFACTOR_ZERO: + return BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BLENDFACTOR_INV_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + return BLENDFACTOR_INV_SRC1_COLOR; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + return BLENDFACTOR_INV_SRC1_ALPHA; + default: + assert(0 && "Invalid blend factor"); + return BLENDFACTOR_ONE; + } +} +#endif diff --git a/src/gallium/drivers/swr/swr_tex_sample.cpp b/src/gallium/drivers/swr/swr_tex_sample.cpp new file mode 100644 index 0000000..8e01e32 --- /dev/null +++ b/src/gallium/drivers/swr/swr_tex_sample.cpp @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Largely a copy of llvmpipe's lp_tex_sample.c + */ + +/** + * Texture sampling code generation + * + * This file is nothing more than ugly glue between three largely independent + * entities: + * - TGSI -> LLVM translation (i.e., lp_build_tgsi_soa) + * - texture sampling code generation (i.e., lp_build_sample_soa) + * - SWR driver + * + * All interesting code is in the functions mentioned above. There is really + * nothing to see here. + * + * @author Jose Fonseca + */ + +#include "state.h" +#include "JitManager.h" +#include "state_llvm.h" + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_type.h" +#include "gallivm/lp_bld_sample.h" +#include "gallivm/lp_bld_tgsi.h" +#include "util/u_memory.h" + +#include "swr_tex_sample.h" +#include "swr_context_llvm.h" + + +/** + * This provides the bridge between the sampler state store in + * lp_jit_context and lp_jit_texture and the sampler code + * generator. It provides the texture layout information required by + * the texture sampler code generator in terms of the state stored in + * lp_jit_context and lp_jit_texture in runtime. + */ +struct swr_sampler_dynamic_state { + struct lp_sampler_dynamic_state base; + + const struct swr_sampler_static_state *static_state; +}; + + +/** + * This is the bridge between our sampler and the TGSI translator. + */ +struct swr_sampler_soa { + struct lp_build_sampler_soa base; + + struct swr_sampler_dynamic_state dynamic_state; +}; + + +/** + * Fetch the specified member of the lp_jit_texture structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +swr_texture_member(const struct lp_sampler_dynamic_state *base, + struct gallivm_state *gallivm, + LLVMValueRef context_ptr, + unsigned texture_unit, + unsigned member_index, + const char *member_name, + boolean emit_load) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS); + + /* context[0] */ + indices[0] = lp_build_const_int32(gallivm, 0); + /* context[0].textures */ + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesFS); + /* context[0].textures[unit] */ + indices[2] = lp_build_const_int32(gallivm, texture_unit); + /* context[0].textures[unit].member */ + indices[3] = lp_build_const_int32(gallivm, member_index); + + ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), ""); + + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; + + lp_build_name(res, "context.texture%u.%s", texture_unit, member_name); + + return res; +} + + +/** + * Helper macro to instantiate the functions that generate the code to + * fetch the members of lp_jit_texture to fulfill the sampler code + * generator requests. + * + * This complexity is the price we have to pay to keep the texture + * sampler code generator a reusable module without dependencies to + * swr internals. + */ +#define SWR_TEXTURE_MEMBER(_name, _emit_load) \ + static LLVMValueRef swr_texture_##_name( \ + const struct lp_sampler_dynamic_state *base, \ + struct gallivm_state *gallivm, \ + LLVMValueRef context_ptr, \ + unsigned texture_unit) \ + { \ + return swr_texture_member(base, \ + gallivm, \ + context_ptr, \ + texture_unit, \ + swr_jit_texture_##_name, \ + #_name, \ + _emit_load); \ + } + + +SWR_TEXTURE_MEMBER(width, TRUE) +SWR_TEXTURE_MEMBER(height, TRUE) +SWR_TEXTURE_MEMBER(depth, TRUE) +SWR_TEXTURE_MEMBER(first_level, TRUE) +SWR_TEXTURE_MEMBER(last_level, TRUE) +SWR_TEXTURE_MEMBER(base_ptr, TRUE) +SWR_TEXTURE_MEMBER(row_stride, FALSE) +SWR_TEXTURE_MEMBER(img_stride, FALSE) +SWR_TEXTURE_MEMBER(mip_offsets, FALSE) + + +/** + * Fetch the specified member of the lp_jit_sampler structure. + * \param emit_load if TRUE, emit the LLVM load instruction to actually + * fetch the field's value. Otherwise, just emit the + * GEP code to address the field. + * + * @sa http://llvm.org/docs/GetElementPtr.html + */ +static LLVMValueRef +swr_sampler_member(const struct lp_sampler_dynamic_state *base, + struct gallivm_state *gallivm, + LLVMValueRef context_ptr, + unsigned sampler_unit, + unsigned member_index, + const char *member_name, + boolean emit_load) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices[4]; + LLVMValueRef ptr; + LLVMValueRef res; + + assert(sampler_unit < PIPE_MAX_SAMPLERS); + + /* context[0] */ + indices[0] = lp_build_const_int32(gallivm, 0); + /* context[0].samplers */ + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersFS); + /* context[0].samplers[unit] */ + indices[2] = lp_build_const_int32(gallivm, sampler_unit); + /* context[0].samplers[unit].member */ + indices[3] = lp_build_const_int32(gallivm, member_index); + + ptr = LLVMBuildGEP(builder, context_ptr, indices, Elements(indices), ""); + + if (emit_load) + res = LLVMBuildLoad(builder, ptr, ""); + else + res = ptr; + + lp_build_name(res, "context.sampler%u.%s", sampler_unit, member_name); + + return res; +} + + +#define SWR_SAMPLER_MEMBER(_name, _emit_load) \ + static LLVMValueRef swr_sampler_##_name( \ + const struct lp_sampler_dynamic_state *base, \ + struct gallivm_state *gallivm, \ + LLVMValueRef context_ptr, \ + unsigned sampler_unit) \ + { \ + return swr_sampler_member(base, \ + gallivm, \ + context_ptr, \ + sampler_unit, \ + swr_jit_sampler_##_name, \ + #_name, \ + _emit_load); \ + } + + +SWR_SAMPLER_MEMBER(min_lod, TRUE) +SWR_SAMPLER_MEMBER(max_lod, TRUE) +SWR_SAMPLER_MEMBER(lod_bias, TRUE) +SWR_SAMPLER_MEMBER(border_color, FALSE) + + +static void +swr_sampler_soa_destroy(struct lp_build_sampler_soa *sampler) +{ + FREE(sampler); +} + + +/** + * Fetch filtered values from texture. + * The 'texel' parameter returns four vectors corresponding to R, G, B, A. + */ +static void +swr_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base, + struct gallivm_state *gallivm, + const struct lp_sampler_params *params) +{ + struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base; + unsigned texture_index = params->texture_index; + unsigned sampler_index = params->sampler_index; + + assert(sampler_index < PIPE_MAX_SAMPLERS); + assert(texture_index < PIPE_MAX_SHADER_SAMPLER_VIEWS); + +#if 0 + lp_build_sample_nop(gallivm, params->type, params->coords, params->texel); +#else + lp_build_sample_soa( + &sampler->dynamic_state.static_state[texture_index].texture_state, + &sampler->dynamic_state.static_state[sampler_index].sampler_state, + &sampler->dynamic_state.base, + gallivm, + params); +#endif +} + +/** + * Fetch the texture size. + */ +static void +swr_sampler_soa_emit_size_query(const struct lp_build_sampler_soa *base, + struct gallivm_state *gallivm, + struct lp_type type, + unsigned texture_unit, + unsigned target, + LLVMValueRef context_ptr, + boolean is_sviewinfo, + enum lp_sampler_lod_property lod_property, + LLVMValueRef explicit_lod, /* optional */ + LLVMValueRef *sizes_out) +{ + struct swr_sampler_soa *sampler = (struct swr_sampler_soa *)base; + + assert(texture_unit < PIPE_MAX_SHADER_SAMPLER_VIEWS); + + lp_build_size_query_soa( + gallivm, + &sampler->dynamic_state.static_state[texture_unit].texture_state, + &sampler->dynamic_state.base, + type, + texture_unit, + target, + context_ptr, + is_sviewinfo, + lod_property, + explicit_lod, + sizes_out); +} + + +struct lp_build_sampler_soa * +swr_sampler_soa_create(const struct swr_sampler_static_state *static_state) +{ + struct swr_sampler_soa *sampler; + + sampler = CALLOC_STRUCT(swr_sampler_soa); + if (!sampler) + return NULL; + + sampler->base.destroy = swr_sampler_soa_destroy; + sampler->base.emit_tex_sample = swr_sampler_soa_emit_fetch_texel; + sampler->base.emit_size_query = swr_sampler_soa_emit_size_query; + sampler->dynamic_state.base.width = swr_texture_width; + sampler->dynamic_state.base.height = swr_texture_height; + sampler->dynamic_state.base.depth = swr_texture_depth; + sampler->dynamic_state.base.first_level = swr_texture_first_level; + sampler->dynamic_state.base.last_level = swr_texture_last_level; + sampler->dynamic_state.base.base_ptr = swr_texture_base_ptr; + sampler->dynamic_state.base.row_stride = swr_texture_row_stride; + sampler->dynamic_state.base.img_stride = swr_texture_img_stride; + sampler->dynamic_state.base.mip_offsets = swr_texture_mip_offsets; + sampler->dynamic_state.base.min_lod = swr_sampler_min_lod; + sampler->dynamic_state.base.max_lod = swr_sampler_max_lod; + sampler->dynamic_state.base.lod_bias = swr_sampler_lod_bias; + sampler->dynamic_state.base.border_color = swr_sampler_border_color; + + sampler->dynamic_state.static_state = static_state; + + return &sampler->base; +} diff --git a/src/gallium/drivers/swr/swr_tex_sample.h b/src/gallium/drivers/swr/swr_tex_sample.h new file mode 100644 index 0000000..f5c368c --- /dev/null +++ b/src/gallium/drivers/swr/swr_tex_sample.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2007 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#pragma once + +#include "gallivm/lp_bld.h" + +struct swr_sampler_static_state { + /* + * These attributes are effectively interleaved for more sane key handling. + * However, there might be lots of null space if the amount of samplers and + * textures isn't the same. + */ + struct lp_static_sampler_state sampler_state; + struct lp_static_texture_state texture_state; +}; + +/** + * Pure-LLVM texture sampling code generator. + * + */ +struct lp_build_sampler_soa * +swr_sampler_soa_create(const struct swr_sampler_static_state *key); diff --git a/src/gallium/targets/libgl-xlib/Makefile.am b/src/gallium/targets/libgl-xlib/Makefile.am index d99caae..527d01b 100644 --- a/src/gallium/targets/libgl-xlib/Makefile.am +++ b/src/gallium/targets/libgl-xlib/Makefile.am @@ -84,4 +84,9 @@ endif EXTRA_lib@GL_LIB@_la_DEPENDENCIES = libgl-xlib.sym EXTRA_DIST = SConscript libgl-xlib.sym +if HAVE_GALLIUM_SWR +lib@GL_LIB@_la_LIBADD += $(top_builddir)/src/gallium/drivers/swr/libmesaswr.la $(LLVM_LIBS) +AM_CPPFLAGS += -DGALLIUM_SWR +endif + include $(top_srcdir)/install-gallium-links.mk diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript index df5a220..da77ad5 100644 --- a/src/gallium/targets/libgl-xlib/SConscript +++ b/src/gallium/targets/libgl-xlib/SConscript @@ -46,6 +46,10 @@ if env['llvm']: env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE']) env.Prepend(LIBS = [llvmpipe]) +if env['llvm']: + env.Append(CPPDEFINES = ['GALLIUM_SWR']) + env.Prepend(LIBS = [swr]) + # Disallow undefined symbols if env['platform'] != 'darwin': env.Append(SHLINKFLAGS = ['-Wl,-z,defs']) diff --git a/src/gallium/targets/osmesa/Makefile.am b/src/gallium/targets/osmesa/Makefile.am index 38e515f..5d39486 100644 --- a/src/gallium/targets/osmesa/Makefile.am +++ b/src/gallium/targets/osmesa/Makefile.am @@ -74,6 +74,12 @@ lib@OSMESA_LIB@_la_LDFLAGS += $(LLVM_LDFLAGS) lib@OSMESA_LIB@_la_LIBADD += $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la $(LLVM_LIBS) endif +if HAVE_GALLIUM_SWR +AM_CPPFLAGS += -DGALLIUM_SWR +lib@OSMESA_LIB@_la_LDFLAGS += $(LLVM_LDFLAGS) +lib@OSMESA_LIB@_la_LIBADD += $(top_builddir)/src/gallium/drivers/swr/libmesaswr.la $(LLVM_LIBS) +endif + EXTRA_lib@OSMESA_LIB@_la_DEPENDENCIES = osmesa.sym EXTRA_DIST = \ osmesa.sym \ -- 2.6.2