PyTorch模型部署中的性能监控方案
在实际部署PyTorch模型时,性能监控是确保系统稳定运行的关键环节。本文将分享一套可复现的监控方案,涵盖推理延迟、内存使用和GPU利用率等核心指标。
核心监控组件
import torch
import time
import psutil
import GPUtil
from collections import defaultdict
class ModelProfiler:
def __init__(self):
self.metrics = defaultdict(list)
def measure_inference(self, model, input_tensor, iterations=100):
# 预热
for _ in range(10):
with torch.no_grad():
model(input_tensor)
# 实际测量
times = []
for _ in range(iterations):
start_time = time.time()
with torch.no_grad():
output = model(input_tensor)
end_time = time.time()
times.append(end_time - start_time)
avg_time = sum(times) / len(times)
self.metrics['latency'].append(avg_time)
return avg_time
def get_gpu_stats(self):
gpus = GPUtil.getGPUs()
stats = {
'memory_used': gpus[0].memoryUsed,
'memory_total': gpus[0].memoryTotal,
'utilization': gpus[0].load
}
self.metrics['gpu_memory'].append(stats['memory_used'])
return stats
# 使用示例
model = torch.load('model.pth')
model.eval()
profiler = ModelProfiler()
input_tensor = torch.randn(1, 3, 224, 224)
latency = profiler.measure_inference(model, input_tensor)
gpu_stats = profiler.get_gpu_stats()
print(f'平均延迟: {latency:.4f}s')
print(f'GPU内存使用: {gpu_stats["memory_used"]}MB')
讨论