import numpy as np class Utterance: def __init__(self, frames_fpath, wave_fpath): self.frames_fpath = frames_fpath self.wave_fpath = wave_fpath def get_frames(self): return np.load(self.frames_fpath) def random_partial(self, n_frames): """ Crops the frames into a partial utterance of n_frames :param n_frames: The number of frames of the partial utterance :return: the partial utterance frames and a tuple indicating the start and end of the partial utterance in the complete utterance. """ frames = self.get_frames() if frames.shape[0] == n_frames: start = 0 else: start = np.random.randint(0, frames.shape[0] - n_frames) end = start + n_frames return frames[start:end], (start, end)