diff --git a/CppCheckSuppressions.txt b/CppCheckSuppressions.txt new file mode 100644 index 000000000..c81515ca8 --- /dev/null +++ b/CppCheckSuppressions.txt @@ -0,0 +1,84 @@ +arrayIndexThenCheck:src/bootstrap.cc:304 +arrayIndexThenCheck:src/debug.cc:88 +arrayIndexThenCheck:src/graph/search.cc:844 +arrayIndexThenCheck:src/graph/search.cc:916 +arrayIndexThenCheck:src/graph/search.cc:927 +clarifyCalculation:src/graph/topo.cc:702 +clarifyCalculation:src/graph/topo.cc:720 +clarifyCondition:src/enqueue.cc:416 +funcArgNamesDifferent:src/graph/topo.cc:135 +funcArgNamesDifferent:src/graph/topo.h:144 +nullPointerRedundantCheck:src/misc/utils.cc:102 +nullPointerRedundantCheck:src/misc/utils.cc:109 +nullPointerRedundantCheck:src/proxy.cc:143 +nullPointerRedundantCheck:src/proxy.cc:144 +nullPointerRedundantCheck:src/proxy.cc:147 +nullPointerRedundantCheck:src/proxy.cc:148 +nullPointerRedundantCheck:src/proxy.cc:149 +nullPointerRedundantCheck:src/proxy.cc:150 +nullPointerRedundantCheck:src/proxy.cc:151 +nullPointerRedundantCheck:src/proxy.cc:155 +nullPointerRedundantCheck:src/proxy.cc:159 +nullPointerRedundantCheck:src/proxy.cc:160 +nullPointerRedundantCheck:src/proxy.cc:161 +nullPointerRedundantCheck:src/proxy.cc:163 +nullPointerRedundantCheck:src/proxy.cc:165 +nullPointerRedundantCheck:src/proxy.cc:167 +nullPointerRedundantCheck:src/proxy.cc:168 +nullPointerRedundantCheck:src/proxy.cc:340 +nullPointerRedundantCheck:src/proxy.cc:342 +nullPointerRedundantCheck:src/proxy.cc:93 +nullPointerRedundantCheck:src/proxy.cc:94 +redundantAssignment:src/proxy.cc:161 +redundantAssignment:src/proxy.cc:163 +redundantCopy:src/graph/rings.cc:16 +redundantCopy:src/graph/rings.cc:17 +terminateStrncpy:src/misc/utils.cc:99 +terminateStrncpy:src/transport/net_socket.cc:245 +unreachableCode:src/transport/net.cc:555 +unreadVariable:src/graph/tuning.cc:109 +unreadVariable:src/graph/tuning.cc:110 +unreadVariable:src/graph/tuning.cc:113 +unusedFunction:src/graph/topo.cc:37 +unusedFunction:src/graph/topo.cc:836 +unusedFunction:src/misc/gdrwrap.cc:109 +unusedFunction:src/misc/gdrwrap.cc:117 +unusedFunction:src/misc/gdrwrap.cc:130 +unusedFunction:src/misc/gdrwrap.cc:144 +unusedFunction:src/misc/gdrwrap.cc:158 +unusedFunction:src/misc/gdrwrap.cc:172 +unusedFunction:src/misc/gdrwrap.cc:186 +unusedFunction:src/misc/gdrwrap.cc:200 +unusedFunction:src/misc/gdrwrap.cc:209 +unusedFunction:src/misc/gdrwrap.cc:218 +unusedFunction:src/misc/gdrwrap.cc:232 +unusedFunction:src/misc/gdrwrap.cc:52 +unusedFunction:src/misc/ibvwrap.cc:203 +unusedFunction:src/misc/ibvwrap.cc:239 +unusedFunction:src/misc/ibvwrap.cc:255 +unusedFunction:src/misc/nvmlwrap.cc:112 +unusedFunction:src/misc/nvmlwrap_stub.cc:31 +unusedFunction:src/misc/nvmlwrap_stub.cc:35 +unusedFunction:src/transport.cc:71 +unusedLabel:src/bootstrap.cc:349 +unusedLabel:src/clique/ShmObject.h:112 +unusedLabel:src/clique/ShmObject.h:204 +unusedLabel:src/enqueue.cc:108 +unusedLabel:src/enqueue.cc:1093 +unusedLabel:src/enqueue.cc:989 +unusedLabel:src/init.cc:1189 +unusedLabel:src/init.cc:1240 +unusedLabel:src/init.cc:1267 +unusedLabel:src/transport.cc:238 +unusedStructMember:src/graph/xml.cc:410 +unusedStructMember:src/graph/xml.cc:411 +unusedStructMember:src/graph/xml.cc:412 +unusedStructMember:src/graph/xml.cc:428 +unusedStructMember:src/graph/xml.cc:431 +unusedStructMember:src/graph/xml.cc:432 +unusedStructMember:src/graph/xml.cc:435 +unusedStructMember:src/graph/xml.cc:437 +variableScope:src/graph/search.cc:494 +variableScope:src/init.cc:240 +variableScope:src/transport/net_ib.cc:117 +variableScope:src/transport/net_socket.cc:431 diff --git a/src/clique/CliqueManager.cc b/src/clique/CliqueManager.cc index 7870ff9be..78c00974a 100644 --- a/src/clique/CliqueManager.cc +++ b/src/clique/CliqueManager.cc @@ -56,14 +56,26 @@ CliqueManager::CliqueManager(int const rank, cliqueMode_t const cliqueMode) : m_rank(rank), m_numRanks(numRanks), + m_hash(0), m_cliqueMode(cliqueMode), m_opIndexHead(0), m_opIndexTail(0), m_init(false), + m_gcnArch(0), + m_allReduceByteLimit(0), m_pinnedCliquePtrs(NULL), - m_fineGrainBarrierMem(NULL) -{ -} + m_gpuBarrierGlobalCount(NULL), + m_gpuBarrierGlobalSense(NULL), + m_gpuBarrierLocalSense(NULL), + m_cpuBarrierCount(NULL), + m_shmHandles(), + m_ipcHandleSendCache(), + m_ipcHandleRecvCache(), + m_sharedCpuMemory(), + m_sharedIpcHandle(), + m_fineGrainBarrierMem(NULL), + m_sharedBarrierCount(NULL) +{} CliqueManager::~CliqueManager() { @@ -128,11 +140,6 @@ ncclResult_t CliqueManager::Init(ncclUniqueId const* commId, int suffix) WARN("Invalid rank specified. Expected 0 <= %d < %d for CliqueManager", m_rank, m_numRanks); return ncclInvalidUsage; } - if (commId == NULL) - { - WARN("CommId should not be empty"); - return ncclInvalidUsage; - } // For now, opt-into clique based kernels via RCCL_ENABLE_CLIQUE env var if (!rcclParamEnableClique()) @@ -350,7 +357,7 @@ ncclResult_t CliqueManager::GetNumChannelsToUse(ncclFunc_t const coll, ncclDataType_t const datatype, ncclRedOp_t const op, int const totalNumChannels, - uint8_t* numChannelstoUse) + uint8_t* numChannelstoUse) const { size_t const totalBytes = count * ncclTypeSize(datatype); *numChannelstoUse = 1; @@ -467,20 +474,6 @@ ncclResult_t CliqueManager::WaitForPointers(ncclWorkElem* args) return ncclSuccess; } -std::string HandleToString(hipIpcMemHandle_t handle) -{ - char mapping[17] = "0123456789ABCDEF"; - std::string result; - for (int i = 0; i < 4; i++) - { - unsigned char val = (unsigned char)handle.reserved[i]; - result += mapping[val / 16]; - result += mapping[val % 16]; - } - return result; -} - - ncclResult_t CliqueManager::CheckCacheForPtr(void* devPtr, NcclIpcHandleSendCache* cache, int rank, diff --git a/src/clique/CliqueManager.h b/src/clique/CliqueManager.h index 45f566c32..2fbef0631 100644 --- a/src/clique/CliqueManager.h +++ b/src/clique/CliqueManager.h @@ -69,7 +69,7 @@ class CliqueManager ncclDataType_t const datatype, ncclRedOp_t const op, int const totalNumChannels, - uint8_t* numChannelstoUse); + uint8_t* numChannelstoUse) const; // Blocking call that only returns the in-progress clique pointers are ready // This needs to be called in same order as DeclarePointers @@ -79,14 +79,14 @@ class CliqueManager static ncclResult_t BootstrapRootInit(int pid, unsigned long hash); protected: - ncclResult_t CheckCacheForPtr(void* devPtr, - NcclIpcHandleSendCache* cache, - int rank, - std::pair* handlePair); - - ncclResult_t CheckCacheForHandle(std::pair const& handlePair, - NcclIpcHandleRecvCache* cache, - void** ptr); + static ncclResult_t CheckCacheForPtr(void* devPtr, + NcclIpcHandleSendCache* cache, + int rank, + std::pair* handlePair); + + static ncclResult_t CheckCacheForHandle(std::pair const& handlePair, + NcclIpcHandleRecvCache* cache, + void** ptr); int m_rank; // Associated rank int m_numRanks; // Total number of ranks diff --git a/src/clique/HandleCache.h b/src/clique/HandleCache.h index dc479e00e..978e19d41 100644 --- a/src/clique/HandleCache.h +++ b/src/clique/HandleCache.h @@ -114,7 +114,7 @@ class NcclIpcHandleCache void updateHistory(const iterator& it) { - if (m_lruHistory.size() > 0) + if (!m_lruHistory.empty()) { m_lruHistory.splice(m_lruHistory.end(), m_lruHistory, (it->second).second); } diff --git a/src/clique/HandleShm.cc b/src/clique/HandleShm.cc index 937390cf2..4e73fb4e3 100644 --- a/src/clique/HandleShm.cc +++ b/src/clique/HandleShm.cc @@ -28,7 +28,7 @@ THE SOFTWARE. #include "Hash.h" #include "shm.h" -NcclIpcHandleShm::NcclIpcHandleShm(int rank, int numRanks, int projid, int numHandlesPerRank, int capacity, std::string suffix) : +NcclIpcHandleShm::NcclIpcHandleShm(int rank, int numRanks, int projid, int numHandlesPerRank, int capacity, std::string const& suffix) : ShmObject>(numRanks * numHandlesPerRank * capacity * sizeof(std::pair), CliqueShmNames["IpcHandles"] + suffix, rank, @@ -39,7 +39,9 @@ NcclIpcHandleShm::NcclIpcHandleShm(int rank, int numRanks, int projid, int numHa { } -NcclIpcHandleShm::NcclIpcHandleShm() +NcclIpcHandleShm::NcclIpcHandleShm() : + m_numHandlesPerRank(0), + m_numHandlesPerOpCount(0) { } diff --git a/src/clique/HandleShm.h b/src/clique/HandleShm.h index c681de0eb..c3f47cb45 100644 --- a/src/clique/HandleShm.h +++ b/src/clique/HandleShm.h @@ -33,7 +33,7 @@ THE SOFTWARE. class NcclIpcHandleShm : public ShmObject> { public: - NcclIpcHandleShm(int rank, int numRanks, int projid, int numHandlesPerRank, int capacity, std::string suffix); + NcclIpcHandleShm(int rank, int numRanks, int projid, int numHandlesPerRank, int capacity, std::string const& suffix); NcclIpcHandleShm(); diff --git a/src/clique/MsgQueue.cc b/src/clique/MsgQueue.cc index ba1da846b..22cfbbfb2 100644 --- a/src/clique/MsgQueue.cc +++ b/src/clique/MsgQueue.cc @@ -28,7 +28,7 @@ THE SOFTWARE. #define MSG_SIZE 1 #define MSG_QUEUE_TIMEOUT 60 -ncclResult_t MsgQueueGetId(std::string name, bool exclusive, mqd_t& mq_desc) +ncclResult_t MsgQueueGetId(std::string const& name, bool exclusive, mqd_t& mq_desc) { int flag = (exclusive == true ? O_CREAT | O_EXCL : O_CREAT); struct mq_attr attr; @@ -83,7 +83,7 @@ ncclResult_t MsgQueueWaitUntilEmpty(mqd_t const& mq_desc) return ncclSuccess; } -ncclResult_t MsgQueueClose(std::string name, mqd_t& mq_desc, bool unlink) +ncclResult_t MsgQueueClose(std::string const& name, mqd_t& mq_desc, bool unlink) { if (unlink) { @@ -93,9 +93,9 @@ ncclResult_t MsgQueueClose(std::string name, mqd_t& mq_desc, bool unlink) return ncclSuccess; } -ncclResult_t MsgQueueUnlink(std::string name) +ncclResult_t MsgQueueUnlink(std::string const& name) { std::string mq_name = "/" + name; SYSCHECK(mq_unlink(mq_name.c_str()), "mq_unlink"); return ncclSuccess; -} \ No newline at end of file +} diff --git a/src/clique/MsgQueue.h b/src/clique/MsgQueue.h index af91add38..74513d912 100644 --- a/src/clique/MsgQueue.h +++ b/src/clique/MsgQueue.h @@ -29,11 +29,11 @@ THE SOFTWARE. #include "nccl.h" #include "core.h" -ncclResult_t MsgQueueGetId(std::string name, bool exclusive, mqd_t& mq_desc); +ncclResult_t MsgQueueGetId(std::string const& name, bool exclusive, mqd_t& mq_desc); ncclResult_t MsgQueueSend(mqd_t const& mq_desc, const char* msgp, size_t msgsz); ncclResult_t MsgQueueRecv(mqd_t const& mq_desc, char* msgp, size_t msgsz); ncclResult_t MsgQueueWaitUntilEmpty(mqd_t const& mq_desc); -ncclResult_t MsgQueueClose(std::string name, mqd_t& mq_desc, bool unlink); -ncclResult_t MsgQueueUnlink(std::string name); +ncclResult_t MsgQueueClose(std::string const& name, mqd_t& mq_desc, bool unlink); +ncclResult_t MsgQueueUnlink(std::string const& name); #endif diff --git a/src/clique/ShmObject.h b/src/clique/ShmObject.h index e62adec65..4663084f9 100644 --- a/src/clique/ShmObject.h +++ b/src/clique/ShmObject.h @@ -53,8 +53,8 @@ template class ShmObject { public: -ShmObject(size_t size, std::string fileName, int rank, int numRanks, int projid) : - m_shmSize(size), + ShmObject(size_t size, std::string const& fileName, int rank, int numRanks, int projid) : + m_shmSize(size), m_shmName(fileName), m_rank(rank), m_numRanks(numRanks), @@ -62,7 +62,14 @@ ShmObject(size_t size, std::string fileName, int rank, int numRanks, int projid) m_alloc(false), m_shmPtr(nullptr) {} - ShmObject() {} + ShmObject() : + m_shmSize(0), + m_shmName(""), + m_rank(0), + m_numRanks(0), + m_projid(0), + m_alloc(false), + m_shmPtr(nullptr) {} ~ShmObject() {} @@ -82,7 +89,7 @@ ShmObject(size_t size, std::string fileName, int rank, int numRanks, int projid) return m_shmPtr; } protected: - ncclResult_t BroadcastMessage(mqd_t& mq_desc, bool pass) + ncclResult_t BroadcastMessage(mqd_t& mq_desc, bool pass) const { char msg_text[1]; msg_text[0] = (pass == 0 ? 'F': 'P'); @@ -112,11 +119,11 @@ ShmObject(size_t size, std::string fileName, int rank, int numRanks, int projid) template struct OpenTag{}; - ncclResult_t InitIfSemaphore(OpenTag tag); + static ncclResult_t InitIfSemaphore(OpenTag tag); ncclResult_t InitIfSemaphore(OpenTag tag); - ncclResult_t InitIfSemaphore(OpenTag tag); + static ncclResult_t InitIfSemaphore(OpenTag tag); ncclResult_t InitIfSemaphore(OpenTag tag); - ncclResult_t InitIfSemaphore(OpenTag> tag); + static ncclResult_t InitIfSemaphore(OpenTag> tag); size_t m_shmSize; std::string m_shmName; @@ -134,9 +141,6 @@ ncclResult_t ShmObject::Open() if (m_alloc == false) { int shmFd; - int protection = PROT_READ | PROT_WRITE; - int visibility = MAP_SHARED; - INFO(NCCL_INIT, "Rank %d Initializing message queue for %s\n", m_rank, m_shmName.c_str()); NCCLCHECK(MsgQueueGetId(m_shmName, false, mq_desc));