Skip to content

Commit

Permalink
Merge pull request #234 from opcm/opcm-push-29-09-2020
Browse files Browse the repository at this point in the history
Opcm push 29 09 2020
  • Loading branch information
opcm authored Oct 1, 2020
2 parents e712007 + edd35b9 commit f510546
Show file tree
Hide file tree
Showing 10 changed files with 361 additions and 116 deletions.
1 change: 0 additions & 1 deletion PCM_RAW_README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ pcm-raw allows to collect arbitrary core and uncore PMU events by providing raw
Currently supported PMUs: core, m3upi, upi(ll)/qpi(ll), imc, m2m, pcu, cha/cbo, iio, ubox

Current limitations:
- programming fixed PMU counters is not supported yet
- event multiplexing not supported

Recommended usage (as priviliged/root user):
Expand Down
137 changes: 91 additions & 46 deletions cpucounters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1566,22 +1566,46 @@ void PCM::initUncorePMUsDirect()
case BDX:
handle->write(MSR_UNCORE_PMON_GLOBAL_CTL, 1ULL << 29ULL);
break;
case IVYTOWN:
handle->write(IVT_MSR_UNCORE_PMON_GLOBAL_CTL, 1ULL << 29ULL);
break;
}
if (IVYTOWN == cpu_model || JAKETOWN == cpu_model)
{
uboxPMUs.push_back(
UncorePMU(
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTL0_ADDR),
std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTL1_ADDR),
std::shared_ptr<MSRRegister>(),
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTR0_ADDR),
std::make_shared<MSRRegister>(handle, JKTIVT_UBOX_MSR_PMON_CTR1_ADDR),
std::shared_ptr<MSRRegister>(),
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, JKTIVT_UCLK_FIXED_CTL_ADDR),
std::make_shared<MSRRegister>(handle, JKTIVT_UCLK_FIXED_CTR_ADDR)
)
);
}
else
{
uboxPMUs.push_back(
UncorePMU(
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTL0_ADDR),
std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTL1_ADDR),
std::shared_ptr<MSRRegister>(),
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTR0_ADDR),
std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTR1_ADDR),
std::shared_ptr<MSRRegister>(),
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, UCLK_FIXED_CTL_ADDR),
std::make_shared<MSRRegister>(handle, UCLK_FIXED_CTR_ADDR)
)
);
}
uboxPMUs.push_back(
UncorePMU(
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTL0_ADDR),
std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTL1_ADDR),
std::shared_ptr<MSRRegister>(),
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTR0_ADDR),
std::make_shared<MSRRegister>(handle, UBOX_MSR_PMON_CTR1_ADDR),
std::shared_ptr<MSRRegister>(),
std::shared_ptr<MSRRegister>(),
std::make_shared<MSRRegister>(handle, UCLK_FIXED_CTL_ADDR),
std::make_shared<MSRRegister>(handle, UCLK_FIXED_CTR_ADDR)
)
);
switch (cpu_model)
{
case IVYTOWN:
Expand Down Expand Up @@ -3819,47 +3843,56 @@ void PCM::programPCU(uint32* PCUCntConf, const uint64 filter)

pcuPMUs[i].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);

*pcuPMUs[i].filter[0] = filter;
if (pcuPMUs[i].filter[0].get())
{
*pcuPMUs[i].filter[0] = filter;
}

program(pcuPMUs[i], &PCUCntConf[0], &PCUCntConf[4], UNC_PMON_UNIT_CTL_FRZ_EN);
}
}

PCM::ErrorCode PCM::program(const RawPMUConfigs& allPMUConfigs_)
{
if (MSR.empty() || server_pcicfg_uncore.empty()) return PCM::MSRAccessDenied;
if (MSR.empty()) return PCM::MSRAccessDenied;
RawPMUConfigs allPMUConfigs = allPMUConfigs_;
constexpr auto globalRegPos = 0;
if (allPMUConfigs.count("core"))
{
// need to program core PMU first
EventSelectRegister regs[PERF_MAX_COUNTERS];
EventSelectRegister regs[PERF_MAX_CUSTOM_COUNTERS];
PCM::ExtendedCustomCoreEventDescription conf;
conf.OffcoreResponseMsrValue[0] = 0;
conf.OffcoreResponseMsrValue[1] = 0;
FixedEventControlRegister fixedReg;

auto corePMUConfig = allPMUConfigs["core"];
if (corePMUConfig.size() > (size_t)getMaxCustomCoreEvents())
if (corePMUConfig.programmable.size() > (size_t)getMaxCustomCoreEvents())
{
std::cerr << "ERROR: trying to program " << corePMUConfig.size() << " core PMU counters, which exceeds the max num possible ("<< getMaxCustomCoreEvents() << ").";
std::cerr << "ERROR: trying to program " << corePMUConfig.programmable.size() << " core PMU counters, which exceeds the max num possible ("<< getMaxCustomCoreEvents() << ").";
return PCM::UnknownError;
}
size_t c = 0;
for (; c < corePMUConfig.size() && c < (size_t)getMaxCustomCoreEvents() && c < PERF_MAX_COUNTERS; ++c)
{
regs[c].value = corePMUConfig[c].first[0];
}
if (corePMUConfig.size() > 0 && corePMUConfig[globalRegPos].first[1] != 0)
for (; c < corePMUConfig.programmable.size() && c < (size_t)getMaxCustomCoreEvents() && c < PERF_MAX_COUNTERS; ++c)
{
conf.OffcoreResponseMsrValue[0] = corePMUConfig[globalRegPos].first[1];
regs[c].value = corePMUConfig.programmable[c].first[0];
}
if (corePMUConfig.size() > 0 && corePMUConfig[globalRegPos].first[2] != 0)
if (globalRegPos < corePMUConfig.programmable.size())
{
conf.OffcoreResponseMsrValue[1] = corePMUConfig[globalRegPos].first[2];
conf.OffcoreResponseMsrValue[0] = corePMUConfig.programmable[globalRegPos].first[1];
conf.OffcoreResponseMsrValue[1] = corePMUConfig.programmable[globalRegPos].first[2];
}
conf.fixedCfg = NULL; // default
conf.nGPCounters = (uint32)c;
conf.gpCounterCfg = regs;
if (corePMUConfig.fixed.empty())
{
conf.fixedCfg = NULL; // default
}
else
{
fixedReg.value = corePMUConfig.fixed[0].first[0];
conf.fixedCfg = &fixedReg;
}

const auto status = program(PCM::EXT_CUSTOM_CORE_EVENTS, &conf);
if (status != PCM::Success)
Expand All @@ -3872,21 +3905,21 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& allPMUConfigs_)
{
const auto & type = pmuConfig.first;
const auto & events = pmuConfig.second;
if (events.empty())
if (events.programmable.empty() && events.fixed.empty())
{
continue;
}
if (events.size() > ServerUncoreCounterState::maxCounters)
if (events.programmable.size() > ServerUncoreCounterState::maxCounters)
{
std::cerr << "ERROR: trying to program " << events.size() << " core PMU counters, which exceeds the max num possible (" << ServerUncoreCounterState::maxCounters << ").";
std::cerr << "ERROR: trying to program " << events.programmable.size() << " core PMU counters, which exceeds the max num possible (" << ServerUncoreCounterState::maxCounters << ").";
return PCM::UnknownError;
}
uint32 events32[ServerUncoreCounterState::maxCounters] = { 0,0,0,0 };
uint64 events64[ServerUncoreCounterState::maxCounters] = { 0,0,0,0 };
for (size_t c = 0; c < events.size() && c < ServerUncoreCounterState::maxCounters; ++c)
for (size_t c = 0; c < events.programmable.size() && c < ServerUncoreCounterState::maxCounters; ++c)
{
events32[c] = (uint32)events[c].first[0];
events64[c] = events[c].first[0];
events32[c] = (uint32)events.programmable[c].first[0];
events64[c] = events.programmable[c].first[0];
}
if (type == "m3upi")
{
Expand All @@ -3913,20 +3946,31 @@ PCM::ErrorCode PCM::program(const RawPMUConfigs& allPMUConfigs_)
{
for (auto uncore : server_pcicfg_uncore)
{
uncore->programM2M(events32);
uncore->programM2M(events64);
}
}
else if (type == "pcu")
{
programPCU(events32, events[globalRegPos].first[1]);
uint64 filter = 0;
if (globalRegPos < events.programmable.size())
{
filter = events.programmable[globalRegPos].first[1];
}
programPCU(events32, filter);
}
else if (type == "ubox")
{
programUBOX(events64);
}
else if (type == "cbo" || type == "cha")
{
programCboRaw(events64, events[globalRegPos].first[1], events[globalRegPos].first[2]);
uint64 filter0 = 0, filter1 = 0;
if (globalRegPos < events.programmable.size())
{
filter0 = events.programmable[globalRegPos].first[1];
filter1 = events.programmable[globalRegPos].first[2];
}
programCboRaw(events64, filter0, filter1);
}
else if (type == "iio")
{
Expand Down Expand Up @@ -4541,6 +4585,7 @@ ServerUncoreCounterState PCM::getServerUncoreCounterState(uint32 socket)
for (int i = 0; i < 2 && socket < uboxPMUs.size(); ++i)
{
result.UBOXCounter[i] = *(uboxPMUs[socket].counterValue[i]);
result.UncClocks = getUncoreClocks(socket);
}
for (int i = 0; i < ServerUncoreCounterState::maxCounters && socket < pcuPMUs.size(); ++i)
result.PCUCounter[i] = *pcuPMUs[socket].counterValue[i];
Expand Down Expand Up @@ -5194,10 +5239,10 @@ void ServerPCICFGUncore::initDirect(uint32 socket_, const PCM * pcm)
m2mPMUs.push_back(
UncorePMU(
std::make_shared<PCICFGRegister32>(handle, M2M_PCI_PMON_BOX_CTL_ADDR),
std::make_shared<PCICFGRegister32>(handle, M2M_PCI_PMON_CTL0_ADDR),
std::make_shared<PCICFGRegister32>(handle, M2M_PCI_PMON_CTL1_ADDR),
std::make_shared<PCICFGRegister32>(handle, M2M_PCI_PMON_CTL2_ADDR),
std::make_shared<PCICFGRegister32>(handle, M2M_PCI_PMON_CTL3_ADDR),
std::make_shared<PCICFGRegister64>(handle, M2M_PCI_PMON_CTL0_ADDR),
std::make_shared<PCICFGRegister64>(handle, M2M_PCI_PMON_CTL1_ADDR),
std::make_shared<PCICFGRegister64>(handle, M2M_PCI_PMON_CTL2_ADDR),
std::make_shared<PCICFGRegister64>(handle, M2M_PCI_PMON_CTL3_ADDR),
std::make_shared<PCICFGRegister64>(handle, M2M_PCI_PMON_CTR0_ADDR),
std::make_shared<PCICFGRegister64>(handle, M2M_PCI_PMON_CTR1_ADDR),
std::make_shared<PCICFGRegister64>(handle, M2M_PCI_PMON_CTR2_ADDR),
Expand Down Expand Up @@ -5941,15 +5986,15 @@ void ServerPCICFGUncore::programEDC(const uint32 * EDCCntConfig)

void ServerPCICFGUncore::programM2M()
{
uint32 cfg[4] = {0, 0, 0, 0};
uint64 cfg[4] = {0, 0, 0, 0};
cfg[EventPosition::NM_HIT] = M2M_PCI_PMON_CTL_EVENT(0x2c) + M2M_PCI_PMON_CTL_UMASK(3); // UNC_M2M_TAG_HIT.NM_DRD_HIT_* events (CLEAN | DIRTY)
cfg[EventPosition::M2M_CLOCKTICKS] = 0; // CLOCKTICKS
cfg[EventPosition::PMM_READ] = M2M_PCI_PMON_CTL_EVENT(0x37) + M2M_PCI_PMON_CTL_UMASK(0x8); // UNC_M2M_IMC_READS.TO_PMM
cfg[EventPosition::PMM_WRITE] = M2M_PCI_PMON_CTL_EVENT(0x38) + M2M_PCI_PMON_CTL_UMASK(0x20); // UNC_M2M_IMC_WRITES.TO_PMM
programM2M(cfg);
}

void ServerPCICFGUncore::programM2M(const uint32* M2MCntConfig)
void ServerPCICFGUncore::programM2M(const uint64* M2MCntConfig)
{
{
for (auto & pmu : m2mPMUs)
Expand Down Expand Up @@ -6586,12 +6631,12 @@ void PCM::programCboRaw(const uint64* events, const uint64 filter0, const uint64
{
cboPMUs[i][cbo].initFreeze(UNC_PMON_UNIT_CTL_FRZ_EN);

if (filter0)
if (cboPMUs[i][cbo].filter[0].get())
{
*cboPMUs[i][cbo].filter[0] = filter0;
}

if (filter1)
if (cboPMUs[i][cbo].filter[1].get())
{
*cboPMUs[i][cbo].filter[1] = filter1;
}
Expand Down
28 changes: 23 additions & 5 deletions cpucounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,12 @@ class PCICFGRegister64 : public HWRegister
offset(offset_)
{
}
void operator = (uint64 /*val*/) override
void operator = (uint64 val) override
{
std::cerr << "PCICFGRegister64 write operation is not supported\n";
throw std::exception();
cvt_ds cvt;
cvt.ui64 = val;
handle->write32(offset, cvt.ui32.low);
handle->write32(offset + sizeof(uint32), cvt.ui32.high);
}
operator uint64 () override
{
Expand Down Expand Up @@ -328,7 +330,7 @@ class ServerPCICFGUncore
PciHandleType * createIntelPerfMonDevice(uint32 groupnr, int32 bus, uint32 dev, uint32 func, bool checkVendor = false);
void programIMC(const uint32 * MCCntConfig);
void programEDC(const uint32 * EDCCntConfig);
void programM2M(const uint32 * M2MCntConfig);
void programM2M(const uint64 * M2MCntConfig);
void programM2M();
void programHA(const uint32 * config);
void programHA();
Expand Down Expand Up @@ -1066,7 +1068,11 @@ class PCM_API PCM
// or for cha/cbo {raw event, filter value}, etc
// + user-supplied name
typedef std::pair<std::array<uint64, 3>, std::string> RawEventConfig;
typedef std::vector<RawEventConfig> RawPMUConfig;
struct RawPMUConfig
{
std::vector<RawEventConfig> programmable;
std::vector<RawEventConfig> fixed;
};
typedef std::map<std::string, RawPMUConfig> RawPMUConfigs;
ErrorCode program(const RawPMUConfigs& allPMUConfigs);

Expand Down Expand Up @@ -2339,6 +2345,16 @@ uint64 getDRAMConsumedEnergy(const CounterStateType & before, const CounterState
return after.DRAMEnergyStatus - before.DRAMEnergyStatus;
}

/*! \brief Returns uncore clock ticks
\param before CPU counter state before the experiment
\param after CPU counter state after the experiment
*/
template <class CounterStateType>
uint64 getUncoreClocks(const CounterStateType& before, const CounterStateType& after)
{
return after.UncClocks - before.UncClocks;
}

/*! \brief Returns Joules consumed by processor (excluding DRAM)
\param before CPU counter state before the experiment
\param after CPU counter state after the experiment
Expand Down Expand Up @@ -2410,6 +2426,8 @@ class UncoreCounterState
template <class CounterStateType>
friend uint64 getDRAMConsumedEnergy(const CounterStateType & before, const CounterStateType & after);
template <class CounterStateType>
friend uint64 getUncoreClocks(const CounterStateType& before, const CounterStateType& after);
template <class CounterStateType>
friend double getPackageCStateResidency(int state, const CounterStateType & before, const CounterStateType & after);
template <class CounterStateType>
friend uint64 getPackageCStateResidency(int state, const CounterStateType& now);
Expand Down
20 changes: 13 additions & 7 deletions pcm-core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,25 +147,29 @@ void print_usage(const string progname)
template <class StateType>
void print_custom_stats(const StateType & BeforeState, const StateType & AfterState ,bool csv, uint64 txn_rate)
{
uint64 cycles = getCycles(BeforeState, AfterState);
uint64 instr = getInstructionsRetired(BeforeState, AfterState);
const uint64 cycles = getCycles(BeforeState, AfterState);
const uint64 refCycles = getRefCycles(BeforeState, AfterState);
const uint64 instr = getInstructionsRetired(BeforeState, AfterState);
if(!csv)
{
cout << double(instr)/double(cycles);
if(txn_rate == 1)
{
cout << setw(14) << unit_format(instr);
cout << setw(11) << unit_format(cycles);
cout << setw(12) << unit_format(refCycles);
} else {
cout << setw(14) << double(instr)/double(txn_rate);
cout << setw(11) << double(cycles)/double(txn_rate);
cout << setw(12) << double(refCycles) / double(txn_rate);
}
}
else
{
cout << double(instr)/double(cycles) << ",";
cout << double(instr)/double(txn_rate) << ",";
cout << double(cycles)/double(txn_rate) << ",";
cout << double(refCycles) / double(txn_rate) << ",";
}
const auto max_ctr = PCM::getInstance()->getMaxCustomCoreEvents();
for (int i = 0; i < max_ctr; ++i)
Expand Down Expand Up @@ -532,8 +536,10 @@ int main(int argc, char * argv[])
calibrated_delay_ms = delay_ms - diff_usec/1000.0;
}
#endif

MySleepMs(calibrated_delay_ms);
if (sysCmd == NULL || numberOfIterations != 0 || m->isBlocked() == false)
{
MySleepMs(calibrated_delay_ms);
}

#ifndef _MSC_VER
calibrated = (calibrated + 1) % PCM_CALIBRATION_INTERVAL;
Expand Down Expand Up @@ -561,7 +567,7 @@ int main(int argc, char * argv[])
cout << "\n";
if (csv)
{
cout << "Core,IPC,Instructions,Cycles";
cout << "Core,IPC,Instructions,Cycles,RefCycles";
for (unsigned i = 0; i < conf.nGPCounters; ++i)
{
cout << ",Event" << i;
Expand All @@ -570,7 +576,7 @@ int main(int argc, char * argv[])
}
else
{
cout << "Core | IPC | Instructions | Cycles ";
cout << "Core | IPC | Instructions | Cycles | RefCycles ";
for (unsigned i = 0; i < conf.nGPCounters; ++i)
{
cout << "| Event" << i << " ";
Expand All @@ -592,7 +598,7 @@ int main(int argc, char * argv[])
cout << "*,";
else
{
cout << "-------------------------------------------------------------------------------------------------------------------\n";
cout << "---------------------------------------------------------------------------------------------------------------------------------\n";
cout << " * ";
}
print_custom_stats(SysBeforeState, SysAfterState, csv, txn_rate);
Expand Down
Loading

0 comments on commit f510546

Please sign in to comment.