293 lines
12 KiB
Python
293 lines
12 KiB
Python
import contextlib
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
|
|
import pytest
|
|
import torch
|
|
import torchvision.io as io
|
|
from common_utils import assert_equal, cpu_and_cuda
|
|
from torchvision import get_video_backend
|
|
|
|
|
|
try:
|
|
import av
|
|
|
|
# Do a version test too
|
|
io.video._check_av_available()
|
|
except ImportError:
|
|
av = None
|
|
|
|
|
|
VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos")
|
|
|
|
|
|
def _create_video_frames(num_frames, height, width):
|
|
y, x = torch.meshgrid(torch.linspace(-2, 2, height), torch.linspace(-2, 2, width), indexing="ij")
|
|
data = []
|
|
for i in range(num_frames):
|
|
xc = float(i) / num_frames
|
|
yc = 1 - float(i) / (2 * num_frames)
|
|
d = torch.exp(-((x - xc) ** 2 + (y - yc) ** 2) / 2) * 255
|
|
data.append(d.unsqueeze(2).repeat(1, 1, 3).byte())
|
|
|
|
return torch.stack(data, 0)
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None):
|
|
if lossless:
|
|
if video_codec is not None:
|
|
raise ValueError("video_codec can't be specified together with lossless")
|
|
if options is not None:
|
|
raise ValueError("options can't be specified together with lossless")
|
|
video_codec = "libx264rgb"
|
|
options = {"crf": "0"}
|
|
|
|
if video_codec is None:
|
|
if get_video_backend() == "pyav":
|
|
video_codec = "libx264"
|
|
else:
|
|
# when video_codec is not set, we assume it is libx264rgb which accepts
|
|
# RGB pixel formats as input instead of YUV
|
|
video_codec = "libx264rgb"
|
|
if options is None:
|
|
options = {}
|
|
|
|
data = _create_video_frames(num_frames, height, width)
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
|
|
f.close()
|
|
io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options)
|
|
yield f.name, data
|
|
os.unlink(f.name)
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
get_video_backend() != "pyav" and not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend not available"
|
|
)
|
|
@pytest.mark.skipif(av is None, reason="PyAV unavailable")
|
|
class TestVideo:
|
|
# compression adds artifacts, thus we add a tolerance of
|
|
# 6 in 0-255 range
|
|
TOLERANCE = 6
|
|
|
|
def test_write_read_video(self):
|
|
with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
|
|
lv, _, info = io.read_video(f_name)
|
|
assert_equal(data, lv)
|
|
assert info["video_fps"] == 5
|
|
|
|
@pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen")
|
|
def test_probe_video_from_file(self):
|
|
with temp_video(10, 300, 300, 5) as (f_name, data):
|
|
video_info = io._probe_video_from_file(f_name)
|
|
assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
|
|
assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps
|
|
|
|
@pytest.mark.skipif(not io._HAS_CPU_VIDEO_DECODER, reason="video_reader backend is not chosen")
|
|
def test_probe_video_from_memory(self):
|
|
with temp_video(10, 300, 300, 5) as (f_name, data):
|
|
with open(f_name, "rb") as fp:
|
|
filebuffer = fp.read()
|
|
video_info = io._probe_video_from_memory(filebuffer)
|
|
assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
|
|
assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps
|
|
|
|
def test_read_timestamps(self):
|
|
with temp_video(10, 300, 300, 5) as (f_name, data):
|
|
pts, _ = io.read_video_timestamps(f_name)
|
|
# note: not all formats/codecs provide accurate information for computing the
|
|
# timestamps. For the format that we use here, this information is available,
|
|
# so we use it as a baseline
|
|
with av.open(f_name) as container:
|
|
stream = container.streams[0]
|
|
pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
|
|
num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
|
|
expected_pts = [i * pts_step for i in range(num_frames)]
|
|
|
|
assert pts == expected_pts
|
|
|
|
@pytest.mark.parametrize("start", range(5))
|
|
@pytest.mark.parametrize("offset", range(1, 4))
|
|
def test_read_partial_video(self, start, offset):
|
|
with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
|
|
pts, _ = io.read_video_timestamps(f_name)
|
|
|
|
lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
|
|
s_data = data[start : (start + offset)]
|
|
assert len(lv) == offset
|
|
assert_equal(s_data, lv)
|
|
|
|
if get_video_backend() == "pyav":
|
|
# for "video_reader" backend, we don't decode the closest early frame
|
|
# when the given start pts is not matching any frame pts
|
|
lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
|
|
assert len(lv) == 4
|
|
assert_equal(data[4:8], lv)
|
|
|
|
@pytest.mark.parametrize("start", range(0, 80, 20))
|
|
@pytest.mark.parametrize("offset", range(1, 4))
|
|
def test_read_partial_video_bframes(self, start, offset):
|
|
# do not use lossless encoding, to test the presence of B-frames
|
|
options = {"bframes": "16", "keyint": "10", "min-keyint": "4"}
|
|
with temp_video(100, 300, 300, 5, options=options) as (f_name, data):
|
|
pts, _ = io.read_video_timestamps(f_name)
|
|
|
|
lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
|
|
s_data = data[start : (start + offset)]
|
|
assert len(lv) == offset
|
|
assert_equal(s_data, lv, rtol=0.0, atol=self.TOLERANCE)
|
|
|
|
lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
|
|
# TODO fix this
|
|
if get_video_backend() == "pyav":
|
|
assert len(lv) == 4
|
|
assert_equal(data[4:8], lv, rtol=0.0, atol=self.TOLERANCE)
|
|
else:
|
|
assert len(lv) == 3
|
|
assert_equal(data[5:8], lv, rtol=0.0, atol=self.TOLERANCE)
|
|
|
|
def test_read_packed_b_frames_divx_file(self):
|
|
name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi"
|
|
f_name = os.path.join(VIDEO_DIR, name)
|
|
pts, fps = io.read_video_timestamps(f_name)
|
|
|
|
assert pts == sorted(pts)
|
|
assert fps == 30
|
|
|
|
def test_read_timestamps_from_packet(self):
|
|
with temp_video(10, 300, 300, 5, video_codec="mpeg4") as (f_name, data):
|
|
pts, _ = io.read_video_timestamps(f_name)
|
|
# note: not all formats/codecs provide accurate information for computing the
|
|
# timestamps. For the format that we use here, this information is available,
|
|
# so we use it as a baseline
|
|
with av.open(f_name) as container:
|
|
stream = container.streams[0]
|
|
# make sure we went through the optimized codepath
|
|
assert b"Lavc" in stream.codec_context.extradata
|
|
pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
|
|
num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
|
|
expected_pts = [i * pts_step for i in range(num_frames)]
|
|
|
|
assert pts == expected_pts
|
|
|
|
def test_read_video_pts_unit_sec(self):
|
|
with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
|
|
lv, _, info = io.read_video(f_name, pts_unit="sec")
|
|
|
|
assert_equal(data, lv)
|
|
assert info["video_fps"] == 5
|
|
assert info == {"video_fps": 5}
|
|
|
|
def test_read_timestamps_pts_unit_sec(self):
|
|
with temp_video(10, 300, 300, 5) as (f_name, data):
|
|
pts, _ = io.read_video_timestamps(f_name, pts_unit="sec")
|
|
|
|
with av.open(f_name) as container:
|
|
stream = container.streams[0]
|
|
pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
|
|
num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
|
|
expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)]
|
|
|
|
assert pts == expected_pts
|
|
|
|
@pytest.mark.parametrize("start", range(5))
|
|
@pytest.mark.parametrize("offset", range(1, 4))
|
|
def test_read_partial_video_pts_unit_sec(self, start, offset):
|
|
with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
|
|
pts, _ = io.read_video_timestamps(f_name, pts_unit="sec")
|
|
|
|
lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1], pts_unit="sec")
|
|
s_data = data[start : (start + offset)]
|
|
assert len(lv) == offset
|
|
assert_equal(s_data, lv)
|
|
|
|
with av.open(f_name) as container:
|
|
stream = container.streams[0]
|
|
lv, _, _ = io.read_video(
|
|
f_name, int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], pts_unit="sec"
|
|
)
|
|
if get_video_backend() == "pyav":
|
|
# for "video_reader" backend, we don't decode the closest early frame
|
|
# when the given start pts is not matching any frame pts
|
|
assert len(lv) == 4
|
|
assert_equal(data[4:8], lv)
|
|
|
|
def test_read_video_corrupted_file(self):
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
|
|
f.write(b"This is not an mpg4 file")
|
|
video, audio, info = io.read_video(f.name)
|
|
assert isinstance(video, torch.Tensor)
|
|
assert isinstance(audio, torch.Tensor)
|
|
assert video.numel() == 0
|
|
assert audio.numel() == 0
|
|
assert info == {}
|
|
|
|
def test_read_video_timestamps_corrupted_file(self):
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
|
|
f.write(b"This is not an mpg4 file")
|
|
video_pts, video_fps = io.read_video_timestamps(f.name)
|
|
assert video_pts == []
|
|
assert video_fps is None
|
|
|
|
@pytest.mark.skip(reason="Temporarily disabled due to new pyav")
|
|
def test_read_video_partially_corrupted_file(self):
|
|
with temp_video(5, 4, 4, 5, lossless=True) as (f_name, data):
|
|
with open(f_name, "r+b") as f:
|
|
size = os.path.getsize(f_name)
|
|
bytes_to_overwrite = size // 10
|
|
# seek to the middle of the file
|
|
f.seek(5 * bytes_to_overwrite)
|
|
# corrupt 10% of the file from the middle
|
|
f.write(b"\xff" * bytes_to_overwrite)
|
|
# this exercises the container.decode assertion check
|
|
video, audio, info = io.read_video(f.name, pts_unit="sec")
|
|
# check that size is not equal to 5, but 3
|
|
# TODO fix this
|
|
if get_video_backend() == "pyav":
|
|
assert len(video) == 3
|
|
else:
|
|
assert len(video) == 4
|
|
# but the valid decoded content is still correct
|
|
assert_equal(video[:3], data[:3])
|
|
# and the last few frames are wrong
|
|
with pytest.raises(AssertionError):
|
|
assert_equal(video, data)
|
|
|
|
@pytest.mark.skipif(sys.platform == "win32", reason="temporarily disabled on Windows")
|
|
@pytest.mark.parametrize("device", cpu_and_cuda())
|
|
def test_write_video_with_audio(self, device, tmpdir):
|
|
f_name = os.path.join(VIDEO_DIR, "R6llTwEh07w.mp4")
|
|
video_tensor, audio_tensor, info = io.read_video(f_name, pts_unit="sec")
|
|
|
|
out_f_name = os.path.join(tmpdir, "testing.mp4")
|
|
io.video.write_video(
|
|
out_f_name,
|
|
video_tensor.to(device),
|
|
round(info["video_fps"]),
|
|
video_codec="libx264rgb",
|
|
options={"crf": "0"},
|
|
audio_array=audio_tensor.to(device),
|
|
audio_fps=info["audio_fps"],
|
|
audio_codec="aac",
|
|
)
|
|
|
|
out_video_tensor, out_audio_tensor, out_info = io.read_video(out_f_name, pts_unit="sec")
|
|
|
|
assert info["video_fps"] == out_info["video_fps"]
|
|
assert_equal(video_tensor, out_video_tensor)
|
|
|
|
audio_stream = av.open(f_name).streams.audio[0]
|
|
out_audio_stream = av.open(out_f_name).streams.audio[0]
|
|
|
|
assert info["audio_fps"] == out_info["audio_fps"]
|
|
assert audio_stream.rate == out_audio_stream.rate
|
|
assert pytest.approx(out_audio_stream.frames, rel=0.0, abs=1) == audio_stream.frames
|
|
assert audio_stream.frame_size == out_audio_stream.frame_size
|
|
|
|
# TODO add tests for audio
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main(__file__)
|