tqcq 719fecd4bc
All checks were successful
linux-x64-gcc / linux-gcc (Debug) (push) Successful in 48s
linux-x64-gcc / linux-gcc (Release) (push) Successful in 48s
feat add profiling
2024-03-16 22:56:10 +08:00

240 lines
8.4 KiB
Python
Executable File

#!/usr/bin/env python3
# Software Name : uprofile
# SPDX-FileCopyrightText: Copyright (c) 2022 Orange
# SPDX-License-Identifier: BSD-3-Clause
#
# This software is distributed under the BSD License;
# see the LICENSE file for more details.
#
# Author: Cédric CHEDALEUX <cedric.chedaleux@orange.com> et al.
import argparse
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots
METRICS = {
'time_exec': 'Execution task',
'cpu': 'CPU load',
'sys_mem': 'System memory (in MB)',
'proc_mem': 'Process Memory (in MB)',
'gpu': 'GPU load',
'gpu_mem': 'GPU memory (in MB)'
}
def read(csv_file):
"""
Generate a DataFrame from the CSV file
Metrics event can have up to 3 extra parameters in addition to its type and its timestamp
:param csv_file:
:return: Dataframe
"""
return pd.read_csv(csv_file, sep=';', names=['metric', 'timestamp', 'extra_1', 'extra_2', 'extra_3'])
def filter_dataframe(df, metric):
"""
Filter the datafram by metric type
:param df:
:param metric:
:return: Dataframe
"""
return df[df['metric'] == metric]
def gen_time_exec_df(df):
"""
Format the dataframe to represent time exec data as gant tasks
:param df:
:return: dataframe with gant tasks
"""
if df.empty:
return None
# 'time_exec' format is 'time_exec:<end_timestamp>:<start_timestamp>:<task_name>')
time_exec_df = df[['extra_2', 'extra_1', 'timestamp']].copy()
time_exec_df.rename(columns={"extra_2": "Task", "extra_1": "Start", "timestamp": "Finish"}, inplace=True)
time_exec_df['Description'] = time_exec_df.apply(lambda row: "Task: {} (duration = {} ms)"
.format(row['Task'], int(row['Finish']) - int(row['Start'])),
axis=1)
time_exec_df['Start'] = pd.to_datetime(time_exec_df['Start'], unit='ms')
time_exec_df['Finish'] = pd.to_datetime(time_exec_df['Finish'], unit='ms')
return time_exec_df
def create_gantt_graph(df):
"""
Create a gant graph to represent the tasks
(see https://chart-studio.plotly.com/~empet/15242/gantt-chart-in-a-subplot-httpscommun/ and https://plotly.com/python/gantt/)
:param df:
:return: graph
"""
size = len(df['Start'])
colors = px.colors.sample_colorscale("turbo", [n / (size - 1) for n in range(size)])
return ff.create_gantt(df, colors=colors, show_colorbar=True, showgrid_x=True, showgrid_y=True,
show_hover_fill=True)
def create_cpu_graphs(df):
if df.empty:
return None
# 'cpu' metrics (format is 'cpu:<timestamp>:<cpu_number>:<percentage_usage>')
cpus = pd.unique(df['extra_1'])
for cpu in cpus:
cpu_df = df[df['extra_1'] == cpu]
yield go.Scatter(x=pd.to_datetime(cpu_df['timestamp'], unit='ms'),
y=pd.to_numeric(cpu_df['extra_2']),
name="CPU {}".format(cpu),
showlegend=True)
def create_sys_mem_graphs(df):
# 'sys_mem' metrics (format is 'mem:<timestamp>:<total>:<available>:<free>')
if df.empty:
return None
names = ["Total", "Available", "Free"]
for index in range(3):
yield go.Scatter(x=pd.to_datetime(df['timestamp'], unit='ms'),
y=(pd.to_numeric(df["extra_{}".format(index + 1)], downcast="integer") / 1024),
name=names[index],
showlegend=True)
def create_proc_mem_graphs(df):
# 'proc_mem' metrics (format is 'mem:<timestamp>:<rss>:<shared>')
if df.empty:
return None
names = ["RSS", "Shared"]
for index in range(2):
yield go.Scatter(x=pd.to_datetime(df['timestamp'], unit='ms'),
y=(pd.to_numeric(df["extra_{}".format(index + 1)], downcast="integer") / 1024),
name=names[index],
showlegend=True)
def create_gpu_mem_graphs(df):
# 'gpu_mem' metrics (format is 'gpu_mem:<timestamp>:<total>:<used>')
if df.empty:
return None
names = ["Used", "Total"]
for index in range(2):
yield go.Scatter(x=pd.to_datetime(df['timestamp'], unit='ms'),
y=(pd.to_numeric(df["extra_{}".format(index + 1)], downcast="integer") / 1024),
name=names[index],
showlegend=True)
def create_gpu_usage_graphs(df):
# 'gpu' metrics (format is 'gpu:<timestamp>:<percentage_usage>')
if df.empty:
return None
yield go.Scatter(x=pd.to_datetime(df['timestamp'], unit='ms'),
y=pd.to_numeric(df['extra_1']),
name="GPU usage",
showlegend=True)
def build_graphs(input_file, metrics):
# Use a multiple subplots (https://plotly.com/python/subplots/) to display
# - the execution task graph
# - the CPU usage
# - the memory usage (system and process)
# - the GPU usage and memory
graph_titles = []
for metric in metrics:
graph_titles.append(METRICS[metric])
figs = make_subplots(rows=len(metrics),
cols=1,
shared_xaxes=True,
vertical_spacing=0.025,
subplot_titles=graph_titles
)
with open(input_file) as file:
global_df = read(file)
row_index = 1
for metric in metrics:
if metric == 'time_exec':
# Display a grant graph for representing task execution durations
time_exec_df = gen_time_exec_df(filter_dataframe(global_df, metric))
if time_exec_df is not None:
for trace in create_gantt_graph(time_exec_df).data:
figs.add_trace(trace, row=row_index, col=1)
elif metric == 'cpu':
# Display all CPU usages in the same graph
for trace in create_cpu_graphs(filter_dataframe(global_df, metric)):
figs.add_trace(trace, row=row_index, col=1)
elif metric == 'sys_mem':
# Display memory usage
for trace in create_sys_mem_graphs(filter_dataframe(global_df, metric)):
figs.add_trace(trace, row=row_index, col=1)
figs.update_yaxes(row=row_index, col=1, rangemode="tozero")
elif metric == 'proc_mem':
# Display process memory usage
for trace in create_proc_mem_graphs(filter_dataframe(global_df, metric)):
figs.add_trace(trace, row=row_index, col=1)
figs.update_yaxes(row=row_index, col=1, rangemode="tozero")
elif metric == 'gpu':
# Display gpu usage
for trace in create_gpu_usage_graphs(filter_dataframe(global_df, metric)):
figs.add_trace(trace, row=row_index, col=1)
figs.update_yaxes(row=row_index, col=1, rangemode="tozero")
elif metric == 'gpu_mem':
# Display gpu memory
for trace in create_gpu_mem_graphs(filter_dataframe(global_df, 'gpu_mem')):
figs.add_trace(trace, row=row_index, col=1)
figs.update_yaxes(row=row_index, col=1, rangemode="tozero")
row_index += 1
figs.update_layout(
height=1200,
xaxis_tickformat='%M:%S',
showlegend=False
)
return figs
def main():
"""
Show the duration task graph and the cpu usage graph on a single view
The tools reads the metrics file generated by the uprofile library
"""
parser = argparse.ArgumentParser()
parser.add_argument('INPUT_FILE', type=str, help='Input file that contains profiling data')
parser.add_argument('--output', '-o', type=str,
help='Save the graph to the given HTML file')
parser.add_argument('--metric', type=str, dest='metrics', choices=METRICS.keys(), action='append', default=[],
help='Select the metric to display')
args = parser.parse_args()
if args.INPUT_FILE is None:
parser.error('no INPUT_FILE given')
if not args.metrics:
args.metrics = METRICS.keys()
graphs = build_graphs(args.INPUT_FILE, args.metrics)
graphs.show()
if args.output is not None:
print("Saving graph to '{}'".format(args.output))
graphs.write_html(args.output)
if __name__ == '__main__':
main()