Speed benchmarks

Speed benchmarks#

import torch
from medusa.benchmark import FancyTimer
from medusa.detect import SCRFDetector, YunetDetector

torch.set_grad_enabled(False)

timer_ = FancyTimer()
params = {
    "model_cls": [SCRFDetector, YunetDetector],
    "device": ['cuda', "cpu"],
    "batch_size": [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
}

from medusa.data import get_example_image

for p in timer_.iter(params):

    if p["model_cls"] == YunetDetector and p["device"] == "cuda":
        continue

    model = p["model_cls"](device=p["device"])
    img = get_example_image(device=p["device"])
    img = img.repeat(p["batch_size"], 1, 1, 1)

    with torch.inference_mode():
        timer_.time(model, [img], n_warmup=3, repeats=20, params=p)

    torch.cuda.empty_cache()

df_detect = timer_.to_df()

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[2], line 9
      6     continue
      8 model = p["model_cls"](device=p["device"])
----> 9 img = get_example_image(device=p["device"])
     10 img = img.repeat(p["batch_size"], 1, 1, 1)
     12 with torch.inference_mode():

File ~/work/medusa/medusa/medusa/data/example_data.py:79, in get_example_image(n_faces, load, device, channels_last, dtype)
     77     imgs.append(f)
     78 else:
---> 79     img = read_image(str(f)).to(device)
     80     if channels_last:
     81         img = img.permute(1, 2, 0)

File ~/.cache/pypoetry/virtualenvs/medusa-2VgPi4zZ-py3.10/lib/python3.10/site-packages/torch/cuda/__init__.py:247, in _lazy_init()
    245 if 'CUDA_MODULE_LOADING' not in os.environ:
    246     os.environ['CUDA_MODULE_LOADING'] = 'LAZY'
--> 247 torch._C._cuda_init()
    248 # Some of the queued calls may reentrantly call _lazy_init();
    249 # we need to just return without initializing in that case.
    250 # However, we must not let any *other* threads in!
    251 _tls.is_initializing = True

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

import seaborn.objects as so

df_detect['BPS'] = 1 / df_detect['duration']
df_detect['FPS'] = df_detect['BPS'] * df_detect['batch_size']

(
    so.Plot(df_detect, x='batch_size', y='FPS', color='model_cls')
    .facet(col="device")
    .share(y=True, x=True)
    .add(so.Dot(), so.Jitter(.3))
    .add(so.Line(), so.Est('median', errorbar='sd'), so.Jitter(.3))
    .scale(
        x=so.Continuous(trans="log2").tick(count=len(params['batch_size']), between=(1, max(params['batch_size']))),
        #y=so.Continuous(trans='log')
    )
    .label(
         x="Batch size", y="Frames per second"
    )
)

../_images/5fa2082741343129380268cc0cefe9a57872519165a532910e95f64b9c7e4271.png

import torch
from medusa.recon import DecaReconModel

timer_ = FancyTimer()
params = {
    "device": ['cuda', "cpu"],
    "batch_size": [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
}

for p in timer_.iter(params):

    model = DecaReconModel(name='emoca-coarse', device=p["device"])
    img = get_example_image(device=p["device"])
    img = img.repeat(p["batch_size"], 1, 1, 1)
    with torch.inference_mode():
        timer_.time(model, [img], n_warmup=3, repeats=5, params=p)

    torch.cuda.empty_cache()

df_recon = timer_.to_df()

100%|██████████| 20/20 [04:54<00:00, 14.74s/it]

df_recon['BPS'] = 1 / df_recon['duration']
df_recon['FPS'] = df_recon['BPS'] * df_recon['batch_size']
(
    so.Plot(df_recon, x='batch_size', y='FPS', color='device')
    .add(so.Dot(), so.Jitter(.3))
    .add(so.Line(), so.Est('median', errorbar=('ci', 99.99)), so.Jitter(.3))
    .scale(
        x=so.Continuous(trans="log2").tick(count=len(params['batch_size']), between=(1, max(params['batch_size']))),
    )
    .label(
         x="Batch size", y="Frames per second"
    )
)

../_images/966ff1fae9e52a4df9c63fe3b6c48b8e7fb390ef43c6871d029495c21fc5a9b4.png