Skip to content

Commit

Permalink
Merge pull request #116 from opcm/push_201902
Browse files Browse the repository at this point in the history
Push 201902
  • Loading branch information
opcm authored Feb 7, 2019
2 parents bf28a6a + 58de288 commit 0e9461a
Show file tree
Hide file tree
Showing 9 changed files with 180 additions and 145 deletions.
154 changes: 72 additions & 82 deletions cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -794,27 +794,56 @@ bool PCM::discoverSystemTopology()
socketIdMap_type socketIdMap;

PCM_CPUID_INFO cpuid_args;
pcm_cpuid(1, cpuid_args);

int apic_ids_per_package = extract_bits_ui(cpuid_args.array[1], 16, 23);
int apic_ids_per_core;
// init constants for CPU topology leaf 0xB
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
int wasCoreReported = 0, wasThreadReported = 0;
int subleaf = 0, levelType, levelShift;
//uint32 coreSelectMask = 0, smtSelectMask = 0;
uint32 smtMaskWidth = 0;
//uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
uint32 corePlusSMTMaskWidth = 0;
uint32 coreMaskWidth = 0;

if (apic_ids_per_package == 0)
{
std::cout << "apic_ids_per_package == 0" << std::endl;
return false;
TemporalThreadAffinity aff0(0);
do
{
pcm_cpuid(0xb, subleaf, cpuid_args);
if (cpuid_args.array[1] == 0)
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
break;
}
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
switch (levelType)
{
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
smtMaskWidth = levelShift;
wasThreadReported = 1;
break;
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
corePlusSMTMaskWidth = levelShift;
wasCoreReported = 1;
break;
default:
break;
}
subleaf++;
} while (1);
}

pcm_cpuid(0xb, 0x0, cpuid_args);

if (extract_bits_ui(cpuid_args.array[2], 8, 15) == 0x1)
apic_ids_per_core = extract_bits_ui(cpuid_args.array[1], 0, 15);
if (wasThreadReported && wasCoreReported)
{
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
}
else if (!wasCoreReported && wasThreadReported)
{
coreMaskWidth = smtMaskWidth;
}
else
apic_ids_per_core = 1;

if (apic_ids_per_core == 0)
{
std::cout << "apic_ids_per_core == 0" << std::endl;
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11." << std::endl;
return false;
}

Expand All @@ -838,6 +867,14 @@ bool PCM::discoverSystemTopology()
<< " [the most significant bit = " << l2CacheMaskShift << "]" << std::endl;
#endif

auto populateEntry = [&smtMaskWidth, &coreMaskWidth, &l2CacheMaskShift](TopologyEntry & entry, const int apic_id)
{
entry.thread_id = extract_bits_ui(apic_id, 0, smtMaskWidth - 1);
entry.core_id = extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth + coreMaskWidth - 1);
entry.socket = extract_bits_ui(apic_id, smtMaskWidth + coreMaskWidth, 31);
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
};

#ifdef _MSC_VER
// version for Windows 7 and later version

Expand Down Expand Up @@ -897,8 +934,8 @@ bool PCM::discoverSystemTopology()

TopologyEntry entry;
entry.os_id = i;
entry.socket = apic_id / apic_ids_per_package;
entry.core_id = (apic_id % apic_ids_per_package) / apic_ids_per_core;

populateEntry(entry, apic_id);

topology.push_back(entry);
socketIdMap[entry.socket] = 0;
Expand All @@ -912,58 +949,6 @@ bool PCM::discoverSystemTopology()
TopologyEntry entry;

#ifdef __linux__
// init constants for CPU topology leaf 0xB
// adapted from Topology Enumeration Reference code for Intel 64 Architecture
// https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
int wasCoreReported = 0, wasThreadReported = 0;
int subleaf = 0, levelType, levelShift;
//uint32 coreSelectMask = 0, smtSelectMask = 0;
uint32 smtMaskWidth = 0;
//uint32 pkgSelectMask = (-1), pkgSelectMaskShift = 0;
uint32 corePlusSMTMaskWidth = 0;
uint32 coreMaskWidth = 0;

// This code needs to run affinitized to a single core, how do we make sure of that?
do
{
pcm_cpuid(0xb, subleaf, cpuid_args);
if (cpuid_args.array[1] == 0)
{ // if EBX ==0 then this subleaf is not valid, we can exit the loop
break;
}
levelType = extract_bits_ui(cpuid_args.array[2], 8, 15);
levelShift = extract_bits_ui(cpuid_args.array[0], 0, 4);
switch (levelType)
{
case 1: //level type is SMT, so levelShift is the SMT_Mask_Width
smtMaskWidth = levelShift;
wasThreadReported = 1;
break;
case 2: //level type is Core, so levelShift is the CorePlusSMT_Mask_Width
corePlusSMTMaskWidth = levelShift;
wasCoreReported = 1;
break;
default:
break;
}
subleaf++;
} while (1);

if(wasThreadReported && wasCoreReported)
{
coreMaskWidth = corePlusSMTMaskWidth - smtMaskWidth;
}
else if (!wasCoreReported && wasThreadReported)
{
coreMaskWidth = smtMaskWidth;
}
else
{
std::cerr << "ERROR: Major problem? No leaf 0 under cpuid function 11." << std::endl;
return false;
}


num_cores = readMaxFromSysFS("/sys/devices/system/cpu/present");
if(num_cores == -1)
{
Expand Down Expand Up @@ -995,10 +980,7 @@ bool PCM::discoverSystemTopology()
pcm_cpuid(0xb, 0x0, cpuid_args);
int apic_id = cpuid_args.array[3];

entry.thread_id = extract_bits_ui(apic_id, 0, smtMaskWidth-1);
entry.core_id = extract_bits_ui(apic_id, smtMaskWidth, smtMaskWidth+coreMaskWidth-1);
entry.socket = extract_bits_ui(apic_id, smtMaskWidth+coreMaskWidth, 31);
entry.tile_id = extract_bits_ui(apic_id, l2CacheMaskShift, 31);
populateEntry(entry, apic_id);

topology[entry.os_id] = entry;
socketIdMap[entry.socket] = 0;
Expand Down Expand Up @@ -1096,8 +1078,8 @@ bool PCM::discoverSystemTopology()
apic_id = cpuid_args_freebsd.data[3];

entry.os_id = i;
entry.socket = apic_id / apic_ids_per_package;
entry.core_id = (apic_id % apic_ids_per_package) / apic_ids_per_core;

populateEntry(entry, apic_id);

if (entry.socket == 0 && entry.core_id == 0) ++threads_per_core;

Expand Down Expand Up @@ -4187,34 +4169,42 @@ ServerUncorePowerState PCM::getServerUncorePowerState(uint32 socket)
server_pcicfg_uncore[socket]->freezeCounters();
for(uint32 port=0;port < (uint32)server_pcicfg_uncore[socket]->getNumQPIPorts();++port)
{
assert(port < result.QPIClocks.size());
result.QPIClocks[port] = server_pcicfg_uncore[socket]->getQPIClocks(port);
assert(port < result.QPIL0pTxCycles.size());
result.QPIL0pTxCycles[port] = server_pcicfg_uncore[socket]->getQPIL0pTxCycles(port);
assert(port < result.QPIL1Cycles.size());
result.QPIL1Cycles[port] = server_pcicfg_uncore[socket]->getQPIL1Cycles(port);
}
for (uint32 channel = 0; channel < (uint32)server_pcicfg_uncore[socket]->getNumMCChannels(); ++channel)
{
assert(channel < result.DRAMClocks.size());
result.DRAMClocks[channel] = server_pcicfg_uncore[socket]->getDRAMClocks(channel);
for(uint32 cnt=0;cnt<4;++cnt)
result.MCCounter[channel][cnt] = server_pcicfg_uncore[socket]->getMCCounter(channel,cnt);
assert(channel < result.MCCounter.size());
for (uint32 cnt = 0; cnt < ServerUncorePowerState::maxCounters; ++cnt)
result.MCCounter[channel][cnt] = server_pcicfg_uncore[socket]->getMCCounter(channel, cnt);
}
for (uint32 channel = 0; channel < (uint32)server_pcicfg_uncore[socket]->getNumEDCChannels(); ++channel)
{
assert(channel < result.MCDRAMClocks.size());
result.MCDRAMClocks[channel] = server_pcicfg_uncore[socket]->getMCDRAMClocks(channel);
for(uint32 cnt=0;cnt<4;++cnt)
result.EDCCounter[channel][cnt] = server_pcicfg_uncore[socket]->getEDCCounter(channel,cnt);
assert(channel < result.EDCCounter.size());
for (uint32 cnt = 0; cnt < ServerUncorePowerState::maxCounters; ++cnt)
result.EDCCounter[channel][cnt] = server_pcicfg_uncore[socket]->getEDCCounter(channel, cnt);
}
for (uint32 controller = 0; controller < (uint32)server_pcicfg_uncore[socket]->getNumMC(); ++controller)
{
for(uint32 cnt=0;cnt<4;++cnt)
result.M2MCounter[controller][cnt] = server_pcicfg_uncore[socket]->getM2MCounter(controller,cnt);
assert(controller < result.M2MCounter.size());
for (uint32 cnt = 0; cnt < ServerUncorePowerState::maxCounters; ++cnt)
result.M2MCounter[controller][cnt] = server_pcicfg_uncore[socket]->getM2MCounter(controller, cnt);
}
server_pcicfg_uncore[socket]->unfreezeCounters();
}
if(MSR.size())
{
uint32 refCore = socketRefCore[socket];
TemporalThreadAffinity tempThreadAffinity(refCore);
for (int i = 0; i < 4 && socket < pcuPMUs.size(); ++i)
for (int i = 0; i < ServerUncorePowerState::maxCounters && socket < pcuPMUs.size(); ++i)
result.PCUCounter[i] = *pcuPMUs[socket].counterValue[i];
// std::cout<< "values read: " << result.PCUCounter[0]<<" "<<result.PCUCounter[1] << " " << result.PCUCounter[2] << " " << result.PCUCounter[3] << std::endl;
uint64 val=0;
Expand Down
48 changes: 26 additions & 22 deletions cpucounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#include "exceptions/unsupported_processor_exception.hpp"

#include <vector>
#include <array>
#include <limits>
#include <string>
#include <memory>
Expand Down Expand Up @@ -1573,8 +1574,9 @@ class PCM_API PCM
return (
cpu_model == PCM::HASWELLX
|| cpu_model == PCM::BDX
|| cpu_model == PCM::SKX
);
|| cpu_model == PCM::SKX
|| cpu_model == PCM::SKL
);
}

bool PMMTrafficMetricsAvailable() const
Expand Down Expand Up @@ -2215,13 +2217,21 @@ class UncoreCounterState
//!
class ServerUncorePowerState : public UncoreCounterState
{
uint64 QPIClocks[3], QPIL0pTxCycles[3], QPIL1Cycles[3];
uint64 DRAMClocks[8];
uint64 MCDRAMClocks[16];
uint64 MCCounter[8][4]; // channel X counter
uint64 M2MCounter[2][4]; // M2M/iMC boxes x counter
uint64 EDCCounter[8][4]; // EDC controller X counter
uint64 PCUCounter[4];
public:
enum {
maxControllers = 2,
maxChannels = 8,
maxXPILinks = 3,
maxCounters = 4
};
private:
std::array<uint64, maxXPILinks> QPIClocks, QPIL0pTxCycles, QPIL1Cycles;
std::array<uint64, maxChannels> DRAMClocks;
std::array<uint64, maxChannels> MCDRAMClocks;
std::array<std::array<uint64, maxCounters>, maxChannels> MCCounter; // channel X counter
std::array<std::array<uint64, maxCounters>, maxControllers> M2MCounter; // M2M/iMC boxes x counter
std::array<std::array<uint64, maxCounters>, maxChannels> EDCCounter; // EDC controller X counter
std::array<uint64, maxCounters> PCUCounter;
int32 PackageThermalHeadroom;
uint64 InvariantTSC; // invariant time stamp counter
friend class PCM;
Expand Down Expand Up @@ -2254,22 +2264,16 @@ class ServerUncorePowerState : public UncoreCounterState
//! Returns current thermal headroom below TjMax
int32 getPackageThermalHeadroom() const { return PackageThermalHeadroom; }
ServerUncorePowerState() :
QPIClocks{}, QPIL0pTxCycles{}, QPIL1Cycles{},
DRAMClocks{},
MCDRAMClocks{},
MCCounter{},
M2MCounter{},
EDCCounter{},
PCUCounter{},
PackageThermalHeadroom(0),
InvariantTSC(0)
{
memset(&(QPIClocks[0]), 0, 3 * sizeof(uint64));
memset(&(QPIL0pTxCycles[0]), 0, 3 * sizeof(uint64));
memset(&(QPIL1Cycles[0]), 0, 3 * sizeof(uint64));
memset(&(DRAMClocks[0]), 0, 8 * sizeof(uint64));
memset(&(MCDRAMClocks[0]), 0, 16 * sizeof(uint64));
memset(&(PCUCounter[0]), 0, 4 * sizeof(uint64));
for (int i = 0; i < 8; ++i) {
memset(&(MCCounter[i][0]), 0, 4 * sizeof(uint64));
memset(&(EDCCounter[i][0]), 0, 4 * sizeof(uint64));
}
for (int i = 0; i < 2; ++i) {
memset(&(M2MCounter[i][0]), 0, 4 * sizeof(uint64));
}
}
};

Expand Down
8 changes: 4 additions & 4 deletions daemon/client/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace PCMDaemon {

//Set last updated timestamp to avoid a detected change
//when the client starts
lastUpdatedClientTsc_ = sharedPCMState_->lastUpdateTsc;
lastUpdatedClientTsc_ = sharedPCMState_->lastUpdateTscEnd;
}

PCMDaemon::SharedPCMState& Client::read()
Expand All @@ -75,7 +75,7 @@ namespace PCMDaemon {
// Check client version matches daemon version
if(strlen(sharedPCMState_->version) > 0 && strcmp(sharedPCMState_->version, VERSION) != 0)
{
std::cout << sharedPCMState_->lastUpdateTsc << " " << lastUpdatedClientTsc_ << std::endl;
std::cout << sharedPCMState_->lastUpdateTscEnd << " " << lastUpdatedClientTsc_ << std::endl;
std::stringstream ss;
ss << "Out of date PCM daemon client. Client version: " << VERSION << " Daemon version: " << sharedPCMState_->version;

Expand All @@ -85,7 +85,7 @@ namespace PCMDaemon {
if(countersHaveUpdated())
{
//There is new data
lastUpdatedClientTsc_ = sharedPCMState_->lastUpdateTsc;
lastUpdatedClientTsc_ = sharedPCMState_->lastUpdateTscEnd;

return *sharedPCMState_;
}
Expand All @@ -99,7 +99,7 @@ namespace PCMDaemon {

bool Client::countersHaveUpdated()
{
return lastUpdatedClientTsc_ != sharedPCMState_->lastUpdateTsc;
return lastUpdatedClientTsc_ != sharedPCMState_->lastUpdateTscEnd;
}

void Client::setupSharedMemory()
Expand Down
2 changes: 1 addition & 1 deletion daemon/client/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ int main(int argc, char *argv[])

// Display internal metrics
printTitle("Last updated TSC");
std::cout << state.lastUpdateTsc << std::endl;
std::cout << state.lastUpdateTscEnd << std::endl;

printTitle("Timestamp");
std::cout << state.timestamp << std::endl;
Expand Down
13 changes: 9 additions & 4 deletions daemon/daemon/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <stdint.h>

static const char DEFAULT_SHM_ID_LOCATION[] = "/tmp/opcm-daemon-shm-id";
static const char VERSION[] = "1.0.4";
static const char VERSION[] = "1.0.5";

#define MAX_CPU_CORES 4096
#define MAX_SOCKETS 256
Expand Down Expand Up @@ -231,16 +231,21 @@ namespace PCMDaemon {

struct SharedPCMState {
char version[VERSION_SIZE];
uint64 lastUpdateTsc;
uint64 lastUpdateTscBegin;
uint64 timestamp;
uint64 cyclesToGetPCMState;
uint32 pollMs;
SharedPCMCounters pcm;
uint64 lastUpdateTscEnd;

public:
SharedPCMState() :
lastUpdateTsc(0),
pollMs(-1) {
lastUpdateTscBegin(0),
timestamp(0),
cyclesToGetPCMState(0),
pollMs(-1),
lastUpdateTscEnd(0)
{
memset(this->version, '\0', sizeof(char)*VERSION_SIZE);
}
} ALIGN(ALIGNMENT);
Expand Down
Loading

0 comments on commit 0e9461a

Please sign in to comment.