PyTorch pydub.AudioSegmentをtorch.Tensorに変換する
import numpy as np
import torch
import torchaudio
import torchaudio.transforms as T
'''
in: pydub.AudioSegment
out: torch.Tensor (float32)
'''
def to_tensor(audio):
sample_width = audio.sample_width
sample_bits = 8 * sample_width
sample_max_int = 2 ** sample_bits
sample_channels = audio.channels
samples = np.asarray(audio.get_array_of_samples())
samples = samples.reshape((-1, 2)).transpose((1, 0))
samples = samples.astype('f') / sample_max_int
samples = torch.from_numpy(samples).type(torch.float32)
return samples