16 lines
440 B
Python
16 lines
440 B
Python
from typing import Tuple
|
|
|
|
import torchvision
|
|
from torch import Tensor
|
|
|
|
|
|
class KineticsWithVideoId(torchvision.datasets.Kinetics):
|
|
def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]:
|
|
video, audio, info, video_idx = self.video_clips.get_clip(idx)
|
|
label = self.samples[video_idx][1]
|
|
|
|
if self.transform is not None:
|
|
video = self.transform(video)
|
|
|
|
return video, audio, label, video_idx
|