Skip to content

Commit

Permalink
Emit errors when invalid capabilities are used (shader-slang#4510)
Browse files Browse the repository at this point in the history
* Fix invalid capabilities being allowed

fixes: shader-slang#4506
fixes: shader-slang#4508

1. As per shader-slang#4506, no longer allow invalid `[require(...)]`
2. As per shader-slang#4508, no longer allow mismatch between `case` and `require` of a calling function
3. Fixes incorrect hlsl.meta capabilities
4. Added a ref to the parent function/decl for when erroring with capabilities to help debug meta.slang files for when weird source locations are generated.

* rename vars and copy lambdas by value

* fix some more capabilities

* incorrect capabilities on a texture test

* push capabilities fix

note: seperated capabilities for glsl,spirv,cuda,hlsl since not all functions support all targets (source of capability error)

* fix cmd line arg by using `xslang` to passthrough to slangc

* let auto-infer run for certain capabilities to reduce simple mistakes

---------

Co-authored-by: Jay Kwak <[email protected]>
  • Loading branch information
ArielG-NV and jkwak-work authored Jul 2, 2024
1 parent bd01bd3 commit d5d03d1
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 85 deletions.
30 changes: 15 additions & 15 deletions source/slang/glsl.meta.slang
Original file line number Diff line number Diff line change
Expand Up @@ -1870,7 +1870,7 @@ public ivec3 textureSize(Sampler2DMSArray<vector<T,N>,sampleCount> sampler)

__generic<T, let isArray:int, let sampleCount:int, let isShadow:int, let format:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylod)]
[require(glsl_hlsl_metal_spirv, texture_querylod)]
public vec2 textureQueryLod(__TextureImpl<
T,
__Shape1D,
Expand Down Expand Up @@ -1903,7 +1903,7 @@ public vec2 textureQueryLod(__TextureImpl<

__generic<T, Shape: __ITextureShape, let isArray:int, let sampleCount:int, let isShadow:int, let format:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylod)]
[require(glsl_hlsl_metal_spirv, texture_querylod)]
public vec2 textureQueryLod(__TextureImpl<
T,
Shape,
Expand Down Expand Up @@ -1940,7 +1940,7 @@ public vec2 textureQueryLod(__TextureImpl<

__generic<T:__BuiltinArithmeticType, let N:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(Sampler1D<vector<T,N>> sampler)
{
int width;
Expand All @@ -1951,7 +1951,7 @@ public int textureQueryLevels(Sampler1D<vector<T,N>> sampler)

__generic<T:__BuiltinArithmeticType, let N:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(Sampler2D<vector<T,N>> sampler)
{
vector<int,2> dim;
Expand All @@ -1962,7 +1962,7 @@ public int textureQueryLevels(Sampler2D<vector<T,N>> sampler)

__generic<T:__BuiltinArithmeticType, let N:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(Sampler3D<vector<T,N>> sampler)
{
vector<int,3> dim;
Expand All @@ -1973,7 +1973,7 @@ public int textureQueryLevels(Sampler3D<vector<T,N>> sampler)

__generic<T:__BuiltinArithmeticType, let N:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(SamplerCube<vector<T,N>> sampler)
{
vector<int,2> dim;
Expand All @@ -1984,7 +1984,7 @@ public int textureQueryLevels(SamplerCube<vector<T,N>> sampler)

__generic<T:__BuiltinArithmeticType, let N:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(Sampler1DArray<vector<T,N>> sampler)
{
vector<int,2> dim;
Expand All @@ -1995,7 +1995,7 @@ public int textureQueryLevels(Sampler1DArray<vector<T,N>> sampler)

__generic<T:__BuiltinArithmeticType, let N:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(Sampler2DArray<vector<T,N>> sampler)
{
vector<int,3> dim;
Expand All @@ -2006,7 +2006,7 @@ public int textureQueryLevels(Sampler2DArray<vector<T,N>> sampler)

__generic<T:__BuiltinArithmeticType, let N:int>
[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(SamplerCubeArray<vector<T,N>> sampler)
{
vector<int,3> dim;
Expand All @@ -2016,7 +2016,7 @@ public int textureQueryLevels(SamplerCubeArray<vector<T,N>> sampler)
}

[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(sampler1DShadow sampler)
{
int dim;
Expand All @@ -2026,7 +2026,7 @@ public int textureQueryLevels(sampler1DShadow sampler)
}

[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(sampler2DShadow sampler)
{
vector<int,2> dim;
Expand All @@ -2036,7 +2036,7 @@ public int textureQueryLevels(sampler2DShadow sampler)
}

[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(samplerCubeShadow sampler)
{
vector<int,2> dim;
Expand All @@ -2046,7 +2046,7 @@ public int textureQueryLevels(samplerCubeShadow sampler)
}

[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(sampler1DArrayShadow sampler)
{
vector<int,2> dim;
Expand All @@ -2056,7 +2056,7 @@ public int textureQueryLevels(sampler1DArrayShadow sampler)
}

[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(sampler2DArrayShadow sampler)
{
vector<int,3> dim;
Expand All @@ -2066,7 +2066,7 @@ public int textureQueryLevels(sampler2DArrayShadow sampler)
}

[ForceInline]
[require(cpp_cuda_glsl_hlsl_spirv, texture_querylevels)]
[require(cpp_glsl_hlsl_metal_spirv, texture_querylevels)]
public int textureQueryLevels(samplerCubeArrayShadow sampler)
{
vector<int,3> dim;
Expand Down
28 changes: 11 additions & 17 deletions source/slang/hlsl.meta.slang
Original file line number Diff line number Diff line change
Expand Up @@ -2901,7 +2901,6 @@ extension __TextureImpl<T,Shape,isArray,0,sampleCount,$(access),isShadow, 0,form

[__readNone]
[ForceInline]
[require(hlsl, texture_sm_4_1)]
T Load(vector<int, Shape.dimensions+isArray> location, vector<int, Shape.dimensions+isArray> offset, out uint status)
{
__target_switch
Expand Down Expand Up @@ -3124,7 +3123,6 @@ extension __TextureImpl<T,Shape,isArray,1,sampleCount,$(access),isShadow, 0,form

[__readNone]
[ForceInline]
[require(hlsl, texture_sm_4_1_compute_fragment)]
T Load(vector<int, Shape.dimensions+isArray> location, int sampleIndex, vector<int, Shape.dimensions+isArray> offset, out uint status)
{
__target_switch
Expand Down Expand Up @@ -3292,11 +3290,7 @@ ${{{{
if (shape == kStdlibShapeIndex3D && isArray == 1)
continue;
const char* textureTypeName = isCombined ? "Sampler" : "Texture";

StringBuilder requireStringBuilder;
auto requireString = requireStringBuilder.toString();
}}}}
$(requireString)
typealias $(accessPrefix[access])$(textureTypeName)$(shapeTypeNames[shape])$(msPostFix[isMS])$(arrayPostFix[isArray])<T=float4, let sampleCount:int=0, let format:int=0> = __TextureImpl<T, __Shape$(shapeTypeNames[shape]), $(isArray), $(isMS), sampleCount, $(access), 0, $(isCombined), format>;
${{{{
}
Expand Down Expand Up @@ -14117,7 +14111,7 @@ vector<T, N> WaveActive$(opName.hlslName)(vector<T, N> expr)
}

__generic<T : __BuiltinIntegerType, let N : int, let M : int>
[require(cuda_hlsl, subgroup_arithmetic)]
[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)]
matrix<T, N, M> WaveActive$(opName.hlslName)(matrix<T, N, M> expr)
{
__target_switch
Expand Down Expand Up @@ -14189,7 +14183,7 @@ vector<T, N> WaveActive$(opName)(vector<T, N> expr)
}

__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
[require(cuda_hlsl, subgroup_arithmetic)]
[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)]
matrix<T, N, M> WaveActive$(opName)(matrix<T, N, M> expr)
{
__target_switch
Expand Down Expand Up @@ -14283,7 +14277,7 @@ vector<T,N> WaveActive$(opName.hlslName)(vector<T,N> expr)
}

__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
[require(cuda_hlsl, subgroup_arithmetic)]
[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)]
matrix<T, N, M> WaveActive$(opName.hlslName)(matrix<T, N, M> expr)
{
__target_switch
Expand Down Expand Up @@ -14600,7 +14594,7 @@ vector<T,N> WavePrefixProduct(vector<T,N> expr)
}

__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
[require(cuda_hlsl, subgroup_arithmetic)]
[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)]
matrix<T, N, M> WavePrefixProduct(matrix<T, N, M> expr)
{
__target_switch
Expand Down Expand Up @@ -14676,7 +14670,7 @@ vector<T,N> WavePrefixSum(vector<T,N> expr)
}

__generic<T : __BuiltinArithmeticType, let N : int, let M : int>
[require(cuda_hlsl, subgroup_arithmetic)]
[require(cuda_glsl_hlsl_spirv, subgroup_arithmetic)]
matrix<T,N,M> WavePrefixSum(matrix<T,N,M> expr)
{
__target_switch
Expand Down Expand Up @@ -14732,7 +14726,7 @@ vector<T,N> WaveReadLaneFirst(vector<T,N> expr)
}

__generic<T : __BuiltinType, let N : int, let M : int>
[require(cuda_hlsl, subgroup_ballot)]
[require(cuda_glsl_hlsl_spirv, subgroup_ballot)]
matrix<T,N,M> WaveReadLaneFirst(matrix<T,N,M> expr)
{
__target_switch
Expand Down Expand Up @@ -14796,7 +14790,7 @@ vector<T,N> WaveBroadcastLaneAt(vector<T,N> value, constexpr int lane)
}

__generic<T : __BuiltinType, let N : int, let M : int>
[require(cuda_hlsl, subgroup_ballot)]
[require(cuda_glsl_hlsl_spirv, subgroup_ballot)]
matrix<T, N, M> WaveBroadcastLaneAt(matrix<T, N, M> value, constexpr int lane)
{
__target_switch
Expand Down Expand Up @@ -14857,7 +14851,7 @@ vector<T,N> WaveReadLaneAt(vector<T,N> value, int lane)
}

__generic<T : __BuiltinType, let N : int, let M : int>
[require(cuda_hlsl, subgroup_shuffle)]
[require(cuda_glsl_hlsl_spirv, subgroup_shuffle)]
matrix<T, N, M> WaveReadLaneAt(matrix<T, N, M> value, int lane)
{
__target_switch
Expand Down Expand Up @@ -15023,7 +15017,7 @@ uint4 WaveMatch(vector<T,N> value)
}

__generic<T : __BuiltinType, let N : int, let M : int>
[require(cuda_glsl_hlsl, subgroup_partitioned)]
[require(cuda_glsl_hlsl_spirv, subgroup_partitioned)]
uint4 WaveMatch(matrix<T,N,M> value)
{
__target_switch
Expand Down Expand Up @@ -16243,7 +16237,7 @@ void SetMeshOutputCounts(uint vertexCount, uint primitiveCount)
// This function cannot be inlined due to a legalization pass happening mid-way through processing
// and later more processing happening to the function which requires eventual inlining.
[KnownBuiltin("DispatchMesh")]
[require(glsl_hlsl_spirv, meshshading)]
[require(glsl_hlsl_metal_spirv, meshshading)]
[noRefInline]
void DispatchMesh<P>(uint threadGroupCountX, uint threadGroupCountY, uint threadGroupCountZ, __ref P meshPayload)
{
Expand Down Expand Up @@ -18533,7 +18527,7 @@ struct HitObject

/// Returns the attributes of a hit. Valid if the hit object represents a hit or a miss.
[ForceInline]
[require(glsl_spirv, ser_raygen_closesthit_miss)]
[require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)]
attr_t GetAttributes<attr_t>()
{
__target_switch
Expand Down
32 changes: 18 additions & 14 deletions source/slang/slang-capabilities.capdef
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ alias rayquery = GL_EXT_ray_query | _sm_6_3;
alias raytracing_motionblur = raytracing + motionblur | cuda;
alias ser_motion = ser + motionblur;
alias shaderclock = GL_EXT_shader_realtime_clock | hlsl_nvapi | cpp | cuda;
alias meshshading_internal = GL_EXT_mesh_shader | _sm_6_5;
alias meshshading_internal = GL_EXT_mesh_shader | _sm_6_5 | metal;
alias meshshading = amplification_mesh + meshshading_internal;
alias fragmentshaderinterlock = _GL_ARB_fragment_shader_interlock | hlsl_nvapi | spvFragmentShaderPixelInterlockEXT;
alias atomic64 = GL_EXT_shader_atomic_int64 | _sm_6_6 | cpp | cuda;
Expand Down Expand Up @@ -447,7 +447,7 @@ alias sm_4_0_version = _sm_4_0
;
alias sm_4_0 = sm_4_0_version
| SPV_GOOGLE_user_type + spvMinLod
| GL_ARB_sparse_texture_clamp + GL_EXT_samplerless_texture_functions + GL_EXT_texture_query_lod + GL_EXT_texture_shadow_lod + GL_EXT_debug_printf
| GL_ARB_sparse_texture_clamp + GL_EXT_samplerless_texture_functions + GL_EXT_texture_query_lod + GL_EXT_texture_shadow_lod + GL_EXT_debug_printf + GL_ARB_shader_image_size
;

alias sm_4_1_version = _sm_4_1
Expand All @@ -458,6 +458,7 @@ alias sm_4_1_version = _sm_4_1
| cpp
;
alias sm_4_1 = sm_4_1_version
| GL_ARB_texture_gather + GL_ARB_texture_query_levels + GL_ARB_shader_texture_image_samples
// previous
| sm_4_0
;
Expand Down Expand Up @@ -737,6 +738,9 @@ alias consumestructuredbuffer = sm_5_0_version;
alias fragmentprocessing = fragment + _sm_5_0
| fragment + glsl_spirv
| raytracingstages_compute_amplification_mesh + GL_NV_compute_shader_derivatives
| fragment + metal
| fragment + cpp
| fragment + cuda
;
alias fragmentprocessing_derivativecontrol = fragment + _sm_5_0
| fragment + GL_ARB_derivative_control
Expand All @@ -758,15 +762,15 @@ alias texture_sm_4_1_samplerless = texture_sm_4_1
// supposedly works on only limited stages, support all stages for now
alias texture_sm_4_1_compute_fragment = texture_sm_4_1;
alias texture_sm_4_0_fragment = texture_sm_4_0;
alias texture_sm_4_1_clamp_fragment = texture_sm_4_0_fragment + GL_ARB_sparse_texture_clamp;
alias texture_sm_4_1_clamp_fragment = texture_sm_4_0_fragment | GL_ARB_sparse_texture_clamp;
alias texture_sm_4_1_vertex_fragment_geometry = texture_sm_4_1;
alias texture_gather = texture_sm_4_1_vertex_fragment_geometry + GL_ARB_texture_gather;
alias image_samples = texture_sm_4_1_compute_fragment + GL_ARB_shader_texture_image_samples;
alias image_size = texture_sm_4_1_compute_fragment + GL_ARB_shader_image_size;
alias texture_size = texture_sm_4_1 + GL_ARB_shader_image_size;
alias texture_querylod = texture_sm_4_1 + GL_EXT_texture_query_lod;
alias texture_querylevels = texture_sm_4_1 + GL_ARB_texture_query_levels;
alias texture_shadowlod = texture_sm_4_1 + GL_EXT_texture_shadow_lod
alias texture_gather = texture_sm_4_1_vertex_fragment_geometry | GL_ARB_texture_gather;
alias image_samples = texture_sm_4_1_compute_fragment | GL_ARB_shader_texture_image_samples;
alias image_size = texture_sm_4_1_compute_fragment | GL_ARB_shader_image_size;
alias texture_size = texture_sm_4_1 | GL_ARB_shader_image_size;
alias texture_querylod = texture_sm_4_1 | GL_EXT_texture_query_lod;
alias texture_querylevels = texture_sm_4_1 | GL_ARB_texture_query_levels;
alias texture_shadowlod = texture_sm_4_1 | GL_EXT_texture_shadow_lod
| texture_sm_4_1;

alias atomic_glsl_float1 = GL_EXT_shader_atomic_float;
Expand Down Expand Up @@ -803,19 +807,19 @@ alias subgroup_basic_ballot = glsl + GL_KHR_shader_subgroup_basic + subgroup_bal
alias subgroup_vote = GL_KHR_shader_subgroup_vote | _sm_6_0 | _cuda_sm_7_0;
alias shaderinvocationgroup = subgroup_vote;
alias subgroup_arithmetic = GL_KHR_shader_subgroup_arithmetic | _sm_6_0 | _cuda_sm_7_0;
alias subgroup_shuffle = glsl + GL_KHR_shader_subgroup_shuffle | _sm_6_0 | _cuda_sm_7_0;
alias subgroup_shuffle = GL_KHR_shader_subgroup_shuffle | _sm_6_0 | _cuda_sm_7_0;
alias subgroup_shufflerelative = GL_KHR_shader_subgroup_shuffle_relative | _sm_6_0 | _cuda_sm_7_0;
alias subgroup_clustered = GL_KHR_shader_subgroup_clustered | _sm_6_0 | _cuda_sm_7_0;
alias subgroup_quad = GL_KHR_shader_subgroup_quad | _sm_6_0 | _cuda_sm_7_0;
alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5;
alias subgroup_partitioned = GL_NV_shader_subgroup_partitioned + subgroup_ballot_activemask | _sm_6_5 | _cuda_sm_7_0;

alias atomic_glsl_hlsl_nvapi_cuda_float1 = atomic_glsl_float1 | hlsl_nvapi + _sm_4_0 | _cuda_sm_2_0;
alias atomic_glsl_hlsl_nvapi_cuda5_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0;
alias atomic_glsl_hlsl_nvapi_cuda6_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_6_0;
alias atomic_glsl_hlsl_nvapi_cuda9_int64 = atomic_glsl_int64 | hlsl_nvapi + _sm_4_0 | _cuda_sm_9_0;

alias atomic_glsl_hlsl_cuda = atomic_glsl | _sm_5_0 | _cuda_sm_2_0;
alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | _sm_6_6 | _cuda_sm_9_0;
alias atomic_glsl_hlsl_cuda = atomic_glsl | _sm_5_0 | _cuda_sm_2_0 | metal;
alias atomic_glsl_hlsl_cuda9_int64 = atomic_glsl_int64 | _sm_6_6 | _cuda_sm_9_0 | metal;

alias helper_lane = _sm_6_0 + fragment
| GL_EXT_demote_to_helper_invocation + fragment
Expand Down
Loading

0 comments on commit d5d03d1

Please sign in to comment.