feat add profiling

2024-03-16 22:56:10 +08:00
parent e05c1f4894
commit 719fecd4bc
30 changed files with 4673 additions and 1 deletions
--- a/3party/cppuprofile/lib/monitors/nvidiamonitor.cpp
+++ b/3party/cppuprofile/lib/monitors/nvidiamonitor.cpp
@@ -0,0 +1,171 @@
+// Software Name : cppuprofile
+// SPDX-FileCopyrightText: Copyright (c) 2023 Orange
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// This software is distributed under the BSD License;
+// see the LICENSE file for more details.
+//
+// Author: Cédric CHEDALEUX <cedric.chedaleux@orange.com> et al.
+
+#include "nvidiamonitor.h"
+
+#include <algorithm>
+#include <errno.h>
+#include <fcntl.h>
+#include <iostream>
+#include <sstream>
+#include <string.h>
+
+#if defined(__linux__)
+#include <sys/wait.h>
+#include <unistd.h>
+#endif
+
+const string errorMsg = "Failed to monitor nvidia-smi process";
+
+#if defined(__linux__)
+int read_nvidia_smi_stdout(int fd, string& gpuUsage, string& usedMem, string& totalMem)
+{
+    string line;
+    while (line.find('\n') == string::npos) { // full line read
+        char buffer[4096];
+        ssize_t count = read(fd, buffer, sizeof(buffer)); // if child process crashes, we gonna be blocked here forever
+        if (count == -1) {
+            return errno;
+        } else if (count > 0) { // there is something to read
+            line += string(buffer, count);
+        }
+    }
+
+    // Remove colon to have only spaces and use istringstream
+    auto noSpaceEnd = remove(line.begin(), line.end(), ',');
+    if (noSpaceEnd == line.end()) { // output trace does not have comma so something went wrong with the command
+        return ENODATA;
+    }
+
+    line.erase(noSpaceEnd, line.end());
+    std::istringstream ss(line);
+    ss >> gpuUsage >> usedMem >> totalMem;
+
+    return 0;
+}
+#endif
+
+uprofile::NvidiaMonitor::NvidiaMonitor()
+{
+}
+
+uprofile::NvidiaMonitor::~NvidiaMonitor()
+{
+    stop();
+}
+
+void uprofile::NvidiaMonitor::start(int period)
+{
+    watchGPU(period);
+}
+
+void uprofile::NvidiaMonitor::stop()
+{
+    abortWatchGPU();
+}
+
+float uprofile::NvidiaMonitor::getUsage() const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    return m_gpuUsage;
+}
+
+void uprofile::NvidiaMonitor::getMemory(int& usedMem, int& totalMem) const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    usedMem = m_usedMem;
+    totalMem = m_totalMem;
+}
+
+void uprofile::NvidiaMonitor::watchGPU(int period)
+{
+    if (m_watching) {
+        return;
+    }
+
+#if defined(__linux__)
+    char* args[5];
+    args[0] = (char*)"/usr/bin/nvidia-smi";
+    string period_arg = "-lms=" + to_string(period); // lms stands for continuous watching
+    args[1] = (char*)period_arg.c_str();
+    args[2] = (char*)"--query-gpu=utilization.gpu,memory.used,memory.total";
+    args[3] = (char*)"--format=csv,noheader,nounits";
+    args[4] = NULL;
+    string output;
+    int pipes[2];
+
+    // Create the pipe
+    if (pipe(pipes) == -1) {
+        cerr << errorMsg << ": pipe creation failed" << endl;
+        return;
+    }
+
+    // Create a child process for calling nvidia-smi
+    pid_t pid = fork();
+
+    switch (pid) {
+    case -1: /* Error */
+        cerr << errorMsg << ": process fork failed" << endl;
+        return;
+    case 0: /* We are in the child process */
+        while ((dup2(pipes[1], STDOUT_FILENO) == -1) && (errno == EINTR)) {
+        }
+        close(pipes[1]);
+        close(pipes[0]);
+        execv(args[0], args);
+        cerr << "Failed to execute '" << args[0] << "': " << strerror(errno) << endl; /* execl doesn't return unless there's an error */
+        exit(1);
+    default: /* We are in the parent process */
+        int stdout_fd = pipes[0];
+
+        // Start a thread to retrieve the child process stdout
+        m_watching = true;
+        m_watcherThread = unique_ptr<std::thread>(new thread([stdout_fd, pid, this]() {
+            while (watching()) {
+                string gpuUsage, usedMem, totalMem;
+                // if the child process crashes, an error is raised here and threads ends up
+                int err = read_nvidia_smi_stdout(stdout_fd, gpuUsage, usedMem, totalMem);
+                if (err != 0) {
+                    cerr << errorMsg << ": read_error = " << strerror(err) << endl;
+                    m_mutex.lock();
+                    m_watching = false;
+                    m_mutex.unlock();
+                    break;
+                }
+                m_mutex.lock();
+                m_gpuUsage = !gpuUsage.empty() ? stof(gpuUsage) : 0.f;
+                m_usedMem = !usedMem.empty() ? stoi(usedMem) * 1024 : 0;    // MiB to KiB
+                m_totalMem = !totalMem.empty() ? stoi(totalMem) * 1024 : 0; // MiB to KiB
+                m_mutex.unlock();
+            }
+        }));
+    }
+#else
+    cerr << errorMsg << endl;
+#endif
+}
+
+void uprofile::NvidiaMonitor::abortWatchGPU()
+{
+#if defined(__linux__)
+    if (m_watcherThread) {
+        m_mutex.lock();
+        m_watching = false;
+        m_mutex.unlock();
+        m_watcherThread->join();
+        m_watcherThread.reset();
+    }
+#endif
+}
+
+bool uprofile::NvidiaMonitor::watching() const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    return m_watching;
+}
--- a/3party/cppuprofile/lib/monitors/nvidiamonitor.h
+++ b/3party/cppuprofile/lib/monitors/nvidiamonitor.h
@@ -0,0 +1,49 @@
+// Software Name : cppuprofile
+// SPDX-FileCopyrightText: Copyright (c) 2023 Orange
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// This software is distributed under the BSD License;
+// see the LICENSE file for more details.
+//
+// Author: Cédric CHEDALEUX <cedric.chedaleux@orange.com> et al.
+
+#ifndef NVIDIAMONITOR_H_
+#define NVIDIAMONITOR_H_
+
+#include "api.h"
+#include "igpumonitor.h"
+#include <mutex>
+#include <string>
+#include <thread>
+#include <vector>
+
+using namespace std;
+
+namespace uprofile
+{
+class NvidiaMonitor : public IGPUMonitor
+{
+public:
+    UPROFAPI explicit NvidiaMonitor();
+    UPROFAPI virtual ~NvidiaMonitor();
+
+    UPROFAPI void start(int period) override;
+    UPROFAPI void stop() override;
+    UPROFAPI bool watching() const override;
+    UPROFAPI float getUsage() const override;
+    UPROFAPI void getMemory(int& usedMem, int& totalMem) const override;
+
+private:
+    void watchGPU(int period);
+    void abortWatchGPU();
+
+    mutable std::mutex m_mutex;
+    std::unique_ptr<std::thread> m_watcherThread;
+    bool m_watching = false;
+    int m_totalMem = 0;
+    int m_usedMem = 0;
+    float m_gpuUsage = 0.f;
+};
+
+}
+#endif /* NVIDIAMONITOR_H_ */