From cea738da8fd25e2936dd557126589d8f19ac9176 Mon Sep 17 00:00:00 2001 From: Rex Xu Date: Wed, 6 Dec 2023 16:21:07 +0800 Subject: [PATCH] Rewrite some handling codes of unpacked output locations We have the logic to keep all locations in some conditions to initialize more location map entries than actually needed. The original logic is somewhat vague when TCS dynamic indexing is involved (the same to mesh shader outputs). For example: layout(location = 10) out float data[5] ... data[i] = XXX Location 0-9 should be initialized to 'unmapped' status and we only map the actual location range, which is location 10-14. The original code mapped all locations 0-14. This is incorrect if we have two arrays: layout(location = 5) out float data1[5] layout(location = 10) out float data2[5] ... data1[i] = XXX data2[j] = YYY In resource collecting pass, also re-write some handling codes of unpacked output locations. This is to address such case: layout(location = 0) out float data1[5] layout(location = 5) out float data2 layout(location = 6) out float data3[5] ... data1[i] = XXX data2 = 0.5 data3[j] = YYY When handing dynamic indexing, data1 is assigned to location 0-5 and data3 is assigned to location 6-10 without location mapping needed in resource collecting pass. data2 could take location 5 but our current logic assigns it to 0 without checking already-occupied locations. Such error is found when compiling a separate TCS (not full pipeline). So we must first colllect occupied locations and do location mapping after that. --- lgc/builder/InOutBuilder.cpp | 94 ++++++++++++++++++------- lgc/patch/PatchResourceCollect.cpp | 106 ++++++++++++++++++++++------- 2 files changed, 152 insertions(+), 48 deletions(-) diff --git a/lgc/builder/InOutBuilder.cpp b/lgc/builder/InOutBuilder.cpp index b84c2597e3..3c20bbe2f4 100644 --- a/lgc/builder/InOutBuilder.cpp +++ b/lgc/builder/InOutBuilder.cpp @@ -181,7 +181,7 @@ Value *BuilderImpl::readGenericInputOutput(bool isOutput, Type *resultTy, unsign assert(isOutput == false || m_shaderStage == ShaderStageTessControl); // Fold constant locationOffset into location. (Currently a variable locationOffset is only supported in - // TCS, TES, and FS custom interpolation.) + // TCS, TES, mesh shader, and FS custom interpolation.) bool isDynLocOffset = true; if (auto constLocOffset = dyn_cast(locationOffset)) { location += constLocOffset->getZExtValue(); @@ -267,8 +267,8 @@ Instruction *BuilderImpl::CreateWriteGenericOutput(Value *valueToWrite, unsigned Value *vertexOrPrimitiveIndex) { assert(valueToWrite->getType()->isAggregateType() == false); - // Fold constant locationOffset into location. (Currently a variable locationOffset is only supported in - // TCS.) + // Fold constant locationOffset into location (Currently a variable locationOffset is only supported in + // TCS or mesh shader). bool isDynLocOffset = true; if (auto constLocOffset = dyn_cast(locationOffset)) { location += constLocOffset->getZExtValue(); @@ -346,7 +346,7 @@ Instruction *BuilderImpl::CreateWriteGenericOutput(Value *valueToWrite, unsigned // // @param isOutput : False for input, true for output // @param location : Input/output base location -// @param locationCount : Count of locations taken by the input +// @param locationCount : Count of locations taken by the input/output // @param inOutInfo : Extra input/output information // @param vertexOrPrimIndex : For TCS/TES/GS/mesh shader per-vertex input/output: vertex index; // for mesh shader per-primitive output: primitive index; @@ -389,44 +389,92 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, } } - if (!isOutput || m_shaderStage != ShaderStageGeometry) { + if (!(m_shaderStage == ShaderStageGeometry && isOutput)) { + // Not GS output bool keepAllLocations = false; if (getPipelineState()->isUnlinked()) { - if (isOutput && m_shaderStage != ShaderStageFragment) { - ShaderStage nextStage = m_pipelineState->getNextShaderStage(m_shaderStage); - keepAllLocations = nextStage == ShaderStageFragment || nextStage == ShaderStageInvalid; + if (isOutput) { + // Keep all locations if the next stage of the output is fragment shader or is unspecified + if (m_shaderStage != ShaderStageFragment) { + ShaderStage nextStage = m_pipelineState->getNextShaderStage(m_shaderStage); + keepAllLocations = nextStage == ShaderStageFragment || nextStage == ShaderStageInvalid; + } + } else { + // Keep all locations if it is the input of fragment shader + keepAllLocations = m_shaderStage == ShaderStageFragment; } - if (m_shaderStage == ShaderStageFragment && !isOutput) - keepAllLocations = true; } - unsigned startLocation = (keepAllLocations ? 0 : location); - // NOTE: The non-invalid value as initial new Location info or new location is used to identify the dynamic indexing - // location. - // Non-GS-output case. + if (inOutLocInfoMap) { - for (unsigned i = startLocation; i < location + locationCount; ++i) { + // Handle per-vertex input/output + if (keepAllLocations) { + // If keeping all locations, add location map entries whose locations are before this input/output + for (unsigned i = 0; i < location; ++i) { + InOutLocationInfo origLocationInfo; + origLocationInfo.setLocation(i); + if (inOutLocInfoMap->count(origLocationInfo) == 0) { + // Add this location map entry only if it doesn't exist + auto &newLocationInfo = (*inOutLocInfoMap)[origLocationInfo]; + newLocationInfo.setData(InvalidValue); + } + } + } + + // Add location map entries for this input/output + for (unsigned i = 0; i < locationCount; ++i) { InOutLocationInfo origLocationInfo; - origLocationInfo.setLocation(i); + origLocationInfo.setLocation(location + i); origLocationInfo.setComponent(inOutInfo.getComponent()); auto &newLocationInfo = (*inOutLocInfoMap)[origLocationInfo]; - newLocationInfo.setData(isDynLocOffset ? i : InvalidValue); + if (isDynLocOffset) { + // When dynamic indexing, map the location directly + newLocationInfo.setLocation(location + i); + newLocationInfo.setComponent(inOutInfo.getComponent()); + } else + newLocationInfo.setData(InvalidValue); } } + if (perPatchInOutLocMap) { - for (unsigned i = startLocation; i < location + locationCount; ++i) - (*perPatchInOutLocMap)[i] = isDynLocOffset ? i : InvalidValue; + // Handle per-patch input/output + if (keepAllLocations) { + // If keeping all locations, add location map entries whose locations are before this input/output + for (unsigned i = 0; i < location; ++i) { + // Add this location map entry only if it doesn't exist + if (perPatchInOutLocMap->count(i) == 0) + (*perPatchInOutLocMap)[i] = InvalidValue; + } + } + + // Add location map entries for this input/output + for (unsigned i = 0; i < locationCount; ++i) + (*perPatchInOutLocMap)[location + i] = + isDynLocOffset ? location + i : InvalidValue; // When dynamic indexing, map the location } + if (perPrimitiveInOutLocMap) { - for (unsigned i = startLocation; i < location + locationCount; ++i) - (*perPrimitiveInOutLocMap)[i] = isDynLocOffset ? i : InvalidValue; + // Handle per-primitive input/output + if (keepAllLocations) { + // If keeping all locations, add location map entries whose locations are before this input/output + for (unsigned i = 0; i < location; ++i) { + // Add this location map entry only if it doesn't exist + if (perPrimitiveInOutLocMap->count(i) == 0) + (*perPrimitiveInOutLocMap)[i] = InvalidValue; + } + } + + // Add location map entries for this input/output + for (unsigned i = 0; i < locationCount; ++i) + (*perPrimitiveInOutLocMap)[location + i] = + isDynLocOffset ? location + i : InvalidValue; // When dynamic indexing, map the location } } else { - // GS output. We include the stream ID with the location in the map key. + // GS output for (unsigned i = 0; i < locationCount; ++i) { InOutLocationInfo outLocationInfo; outLocationInfo.setLocation(location + i); outLocationInfo.setComponent(inOutInfo.getComponent()); - outLocationInfo.setStreamId(inOutInfo.getStreamId()); + outLocationInfo.setStreamId(inOutInfo.getStreamId()); // Include the stream ID in the map key. auto &newLocationInfo = (*inOutLocInfoMap)[outLocationInfo]; newLocationInfo.setData(InvalidValue); } diff --git a/lgc/patch/PatchResourceCollect.cpp b/lgc/patch/PatchResourceCollect.cpp index d49c2ed82d..3b6c678661 100644 --- a/lgc/patch/PatchResourceCollect.cpp +++ b/lgc/patch/PatchResourceCollect.cpp @@ -2907,68 +2907,124 @@ void PatchResourceCollect::updateOutputLocInfoMapWithUnpack() { // Update the value of outputLocInfoMap if (!outputLocInfoMap.empty()) { - unsigned nextMapLoc[MaxGsStreams] = {}; - DenseMap alreadyMappedLocs[MaxGsStreams]; // Map from original location to new location + unsigned nextAvailableLoc[MaxGsStreams] = {}; + DenseMap locMap[MaxGsStreams]; // Map from original location to new location + DenseSet occupiedLocs[MaxGsStreams]; // Collection of already-occupied locations in location mapping + // Collect all mapped locations before we do location mapping for those unmapped + for (auto &locInfoPair : outputLocInfoMap) { + const auto &locationInfo = locInfoPair.first; + auto &newLocationInfo = locInfoPair.second; + + const unsigned streamId = m_shaderStage == ShaderStageGeometry ? locationInfo.getStreamId() : 0; + + if (!newLocationInfo.isInvalid()) { + // Record mapped locations + const unsigned locAlreadyMapped = locationInfo.getLocation(); + const unsigned newLocMappedTo = newLocationInfo.getLocation(); + assert(newLocMappedTo != InvalidValue); + + locMap[streamId][locAlreadyMapped] = newLocMappedTo; + occupiedLocs[streamId].insert(newLocMappedTo); + } + } + + // Do location mapping for (auto &locInfoPair : outputLocInfoMap) { const auto &locationInfo = locInfoPair.first; auto &newLocationInfo = locInfoPair.second; if (!newLocationInfo.isInvalid()) - continue; // Skip any location that is mapped + continue; // Skip mapped locations + + const unsigned streamId = m_shaderStage == ShaderStageGeometry ? locationInfo.getStreamId() : 0; newLocationInfo.setData(0); newLocationInfo.setComponent(locationInfo.getComponent()); - const unsigned streamId = m_shaderStage == ShaderStageGeometry ? locationInfo.getStreamId() : 0; newLocationInfo.setStreamId(streamId); const unsigned locToBeMapped = locationInfo.getLocation(); - unsigned mappedLoc = InvalidValue; + unsigned newLocMappedTo = InvalidValue; const bool keepLocation = m_shaderStage == ShaderStageGeometry && !canChangeOutputLocationsForGs(); if (keepLocation) { // Keep location unchanged - mappedLoc = locToBeMapped; + newLocMappedTo = locToBeMapped; } else { // Map to new location - if (alreadyMappedLocs[streamId].count(locToBeMapped) > 0) { - mappedLoc = alreadyMappedLocs[streamId][locToBeMapped]; + if (locMap[streamId].count(locToBeMapped) > 0) { + newLocMappedTo = locMap[streamId][locToBeMapped]; } else { - mappedLoc = nextMapLoc[streamId]++; + do { + // Try to find a new location that has not been occupied + newLocMappedTo = nextAvailableLoc[streamId]++; + } while (occupiedLocs[streamId].count(newLocMappedTo) > 0); + // NOTE: Record the map because we are handling multiple pairs of . Some pairs have the // same location while the components are different. - alreadyMappedLocs[streamId][locToBeMapped] = mappedLoc; + locMap[streamId][locToBeMapped] = newLocMappedTo; + occupiedLocs[streamId].insert(newLocMappedTo); } } - assert(mappedLoc != InvalidValue); - newLocationInfo.setLocation(mappedLoc); + assert(newLocMappedTo != InvalidValue); + newLocationInfo.setLocation(newLocMappedTo); if (m_shaderStage == ShaderStageGeometry) - inOutUsage.gs.outLocCount[streamId] = std::max(inOutUsage.gs.outLocCount[streamId], mappedLoc + 1); + inOutUsage.gs.outLocCount[streamId] = std::max(inOutUsage.gs.outLocCount[streamId], newLocMappedTo + 1); } } // Update the value of perPatchOutputLocMap if (!perPatchOutputLocMap.empty()) { - unsigned nextMapLoc = 0; + assert(m_shaderStage == ShaderStageTessControl); + + unsigned nextAvailableLoc = 0; + DenseSet occupiedLocs; // Collection of already-occupied locations in location mapping + + // Collect all mapped locations before we do location mapping for those unmapped + for (auto &locPair : perPatchOutputLocMap) { + if (locPair.second != InvalidValue) + occupiedLocs.insert(locPair.second); // Record mapped locations + } + + // Do location mapping for (auto &locPair : perPatchOutputLocMap) { - if (locPair.second == InvalidValue) { - // Only do location mapping if the per-patch output has not been mapped - locPair.second = nextMapLoc++; - } else - assert(m_shaderStage == ShaderStageTessControl); + if (locPair.second != InvalidValue) + continue; // Skip mapped locations + + unsigned newLocMappedTo = InvalidValue; + do { + // Try to find a new location that has not been occupied + newLocMappedTo = nextAvailableLoc++; + } while (occupiedLocs.count(newLocMappedTo) > 0); + locPair.second = newLocMappedTo; } } // Update the value of perPrimitiveOutputLocMap if (!perPrimitiveOutputLocMap.empty()) { - unsigned nextMapLoc = 0; + assert(m_shaderStage == ShaderStageMesh); + + unsigned nextAvailableLoc = 0; + DenseSet occupiedLocs; // Collection of already-occupied locations in location mapping + + // Collect all mapped locations before we do location mapping for those unmapped + for (auto &locPair : perPrimitiveOutputLocMap) { + if (locPair.second != InvalidValue) + occupiedLocs.insert(locPair.second); // Record mapped locations + } + + // Do location mapping for (auto &locPair : perPrimitiveOutputLocMap) { - if (locPair.second == InvalidValue) { - // Only do location mapping if the per-primitive output has not been mapped - locPair.second = nextMapLoc++; - } else - assert(m_shaderStage == ShaderStageMesh); + if (locPair.second != InvalidValue) + continue; // Skip mapped locations + + unsigned newLocMappedTo = InvalidValue; + do { + // Try to find a new location that has not been occupied + newLocMappedTo = nextAvailableLoc++; + } while (occupiedLocs.count(newLocMappedTo) > 0); + locPair.second = newLocMappedTo; } }