feat add profiling
This commit is contained in:
171
3party/cppuprofile/lib/monitors/nvidiamonitor.cpp
Normal file
171
3party/cppuprofile/lib/monitors/nvidiamonitor.cpp
Normal file
@@ -0,0 +1,171 @@
|
||||
// Software Name : cppuprofile
|
||||
// SPDX-FileCopyrightText: Copyright (c) 2023 Orange
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
//
|
||||
// This software is distributed under the BSD License;
|
||||
// see the LICENSE file for more details.
|
||||
//
|
||||
// Author: Cédric CHEDALEUX <cedric.chedaleux@orange.com> et al.
|
||||
|
||||
#include "nvidiamonitor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
const string errorMsg = "Failed to monitor nvidia-smi process";
|
||||
|
||||
#if defined(__linux__)
|
||||
int read_nvidia_smi_stdout(int fd, string& gpuUsage, string& usedMem, string& totalMem)
|
||||
{
|
||||
string line;
|
||||
while (line.find('\n') == string::npos) { // full line read
|
||||
char buffer[4096];
|
||||
ssize_t count = read(fd, buffer, sizeof(buffer)); // if child process crashes, we gonna be blocked here forever
|
||||
if (count == -1) {
|
||||
return errno;
|
||||
} else if (count > 0) { // there is something to read
|
||||
line += string(buffer, count);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove colon to have only spaces and use istringstream
|
||||
auto noSpaceEnd = remove(line.begin(), line.end(), ',');
|
||||
if (noSpaceEnd == line.end()) { // output trace does not have comma so something went wrong with the command
|
||||
return ENODATA;
|
||||
}
|
||||
|
||||
line.erase(noSpaceEnd, line.end());
|
||||
std::istringstream ss(line);
|
||||
ss >> gpuUsage >> usedMem >> totalMem;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
uprofile::NvidiaMonitor::NvidiaMonitor()
|
||||
{
|
||||
}
|
||||
|
||||
uprofile::NvidiaMonitor::~NvidiaMonitor()
|
||||
{
|
||||
stop();
|
||||
}
|
||||
|
||||
void uprofile::NvidiaMonitor::start(int period)
|
||||
{
|
||||
watchGPU(period);
|
||||
}
|
||||
|
||||
void uprofile::NvidiaMonitor::stop()
|
||||
{
|
||||
abortWatchGPU();
|
||||
}
|
||||
|
||||
float uprofile::NvidiaMonitor::getUsage() const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
return m_gpuUsage;
|
||||
}
|
||||
|
||||
void uprofile::NvidiaMonitor::getMemory(int& usedMem, int& totalMem) const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
usedMem = m_usedMem;
|
||||
totalMem = m_totalMem;
|
||||
}
|
||||
|
||||
void uprofile::NvidiaMonitor::watchGPU(int period)
|
||||
{
|
||||
if (m_watching) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(__linux__)
|
||||
char* args[5];
|
||||
args[0] = (char*)"/usr/bin/nvidia-smi";
|
||||
string period_arg = "-lms=" + to_string(period); // lms stands for continuous watching
|
||||
args[1] = (char*)period_arg.c_str();
|
||||
args[2] = (char*)"--query-gpu=utilization.gpu,memory.used,memory.total";
|
||||
args[3] = (char*)"--format=csv,noheader,nounits";
|
||||
args[4] = NULL;
|
||||
string output;
|
||||
int pipes[2];
|
||||
|
||||
// Create the pipe
|
||||
if (pipe(pipes) == -1) {
|
||||
cerr << errorMsg << ": pipe creation failed" << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a child process for calling nvidia-smi
|
||||
pid_t pid = fork();
|
||||
|
||||
switch (pid) {
|
||||
case -1: /* Error */
|
||||
cerr << errorMsg << ": process fork failed" << endl;
|
||||
return;
|
||||
case 0: /* We are in the child process */
|
||||
while ((dup2(pipes[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {
|
||||
}
|
||||
close(pipes[1]);
|
||||
close(pipes[0]);
|
||||
execv(args[0], args);
|
||||
cerr << "Failed to execute '" << args[0] << "': " << strerror(errno) << endl; /* execl doesn't return unless there's an error */
|
||||
exit(1);
|
||||
default: /* We are in the parent process */
|
||||
int stdout_fd = pipes[0];
|
||||
|
||||
// Start a thread to retrieve the child process stdout
|
||||
m_watching = true;
|
||||
m_watcherThread = unique_ptr<std::thread>(new thread([stdout_fd, pid, this]() {
|
||||
while (watching()) {
|
||||
string gpuUsage, usedMem, totalMem;
|
||||
// if the child process crashes, an error is raised here and threads ends up
|
||||
int err = read_nvidia_smi_stdout(stdout_fd, gpuUsage, usedMem, totalMem);
|
||||
if (err != 0) {
|
||||
cerr << errorMsg << ": read_error = " << strerror(err) << endl;
|
||||
m_mutex.lock();
|
||||
m_watching = false;
|
||||
m_mutex.unlock();
|
||||
break;
|
||||
}
|
||||
m_mutex.lock();
|
||||
m_gpuUsage = !gpuUsage.empty() ? stof(gpuUsage) : 0.f;
|
||||
m_usedMem = !usedMem.empty() ? stoi(usedMem) * 1024 : 0; // MiB to KiB
|
||||
m_totalMem = !totalMem.empty() ? stoi(totalMem) * 1024 : 0; // MiB to KiB
|
||||
m_mutex.unlock();
|
||||
}
|
||||
}));
|
||||
}
|
||||
#else
|
||||
cerr << errorMsg << endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
void uprofile::NvidiaMonitor::abortWatchGPU()
|
||||
{
|
||||
#if defined(__linux__)
|
||||
if (m_watcherThread) {
|
||||
m_mutex.lock();
|
||||
m_watching = false;
|
||||
m_mutex.unlock();
|
||||
m_watcherThread->join();
|
||||
m_watcherThread.reset();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool uprofile::NvidiaMonitor::watching() const
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
return m_watching;
|
||||
}
|
49
3party/cppuprofile/lib/monitors/nvidiamonitor.h
Normal file
49
3party/cppuprofile/lib/monitors/nvidiamonitor.h
Normal file
@@ -0,0 +1,49 @@
|
||||
// Software Name : cppuprofile
|
||||
// SPDX-FileCopyrightText: Copyright (c) 2023 Orange
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
//
|
||||
// This software is distributed under the BSD License;
|
||||
// see the LICENSE file for more details.
|
||||
//
|
||||
// Author: Cédric CHEDALEUX <cedric.chedaleux@orange.com> et al.
|
||||
|
||||
#ifndef NVIDIAMONITOR_H_
|
||||
#define NVIDIAMONITOR_H_
|
||||
|
||||
#include "api.h"
|
||||
#include "igpumonitor.h"
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace uprofile
|
||||
{
|
||||
class NvidiaMonitor : public IGPUMonitor
|
||||
{
|
||||
public:
|
||||
UPROFAPI explicit NvidiaMonitor();
|
||||
UPROFAPI virtual ~NvidiaMonitor();
|
||||
|
||||
UPROFAPI void start(int period) override;
|
||||
UPROFAPI void stop() override;
|
||||
UPROFAPI bool watching() const override;
|
||||
UPROFAPI float getUsage() const override;
|
||||
UPROFAPI void getMemory(int& usedMem, int& totalMem) const override;
|
||||
|
||||
private:
|
||||
void watchGPU(int period);
|
||||
void abortWatchGPU();
|
||||
|
||||
mutable std::mutex m_mutex;
|
||||
std::unique_ptr<std::thread> m_watcherThread;
|
||||
bool m_watching = false;
|
||||
int m_totalMem = 0;
|
||||
int m_usedMem = 0;
|
||||
float m_gpuUsage = 0.f;
|
||||
};
|
||||
|
||||
}
|
||||
#endif /* NVIDIAMONITOR_H_ */
|
Reference in New Issue
Block a user