diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2023-10-07 11:30:54 +0300 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2023-10-07 11:30:54 +0300 |
commit | 69c9b34f86af7b96049ec3fe809c6629c902459d (patch) | |
tree | ff9315b49921178ada324b3f20bd9cb2a32f2a73 | |
parent | 95c1416d459585bc99cdaacf87cd3cf1d13b247e (diff) |
Signal domain, with pitch gatingexp_fargan43Sa
-rw-r--r-- | dnn/torch/fargan/adv_train_fargan.py | 23 | ||||
-rw-r--r-- | dnn/torch/fargan/test_fargan.py | 4 | ||||
-rw-r--r-- | dnn/torch/fargan/train_fargan.py | 12 |
3 files changed, 24 insertions, 15 deletions
diff --git a/dnn/torch/fargan/adv_train_fargan.py b/dnn/torch/fargan/adv_train_fargan.py index 5c5e974d..e548c653 100644 --- a/dnn/torch/fargan/adv_train_fargan.py +++ b/dnn/torch/fargan/adv_train_fargan.py @@ -132,6 +132,9 @@ states = None spect_loss = MultiResolutionSTFTLoss(device).to(device) +for param in model.parameters(): + param.requires_grad = False + if __name__ == '__main__': model.to(device) disc.to(device) @@ -153,24 +156,28 @@ if __name__ == '__main__': print(f"training epoch {epoch}...") with tqdm.tqdm(dataloader, unit='batch') as tepoch: for i, (features, periods, target, lpc) in enumerate(tepoch): + if epoch == 1 and i == 100: + for param in model.parameters(): + param.requires_grad = True + optimizer.zero_grad() features = features.to(device) - lpc = lpc.to(device) - lpc = lpc*(args.gamma**torch.arange(1,17, device=device)) - lpc = fargan.interp_lpc(lpc, 4) + #lpc = lpc.to(device) + #lpc = lpc*(args.gamma**torch.arange(1,17, device=device)) + #lpc = fargan.interp_lpc(lpc, 4) periods = periods.to(device) if True: target = target[:, :sequence_length*160] - lpc = lpc[:,:sequence_length*4,:] + #lpc = lpc[:,:sequence_length*4,:] features = features[:,:sequence_length+4,:] periods = periods[:,:sequence_length+4] else: target=target[::2, :] - lpc=lpc[::2,:] + #lpc=lpc[::2,:] features=features[::2,:] periods=periods[::2,:] target = target.to(device) - target = fargan.analysis_filter(target, lpc[:,:,:], nb_subframes=1, gamma=args.gamma) + #target = fargan.analysis_filter(target, lpc[:,:,:], nb_subframes=1, gamma=args.gamma) #nb_pre = random.randrange(1, 6) nb_pre = 2 @@ -210,7 +217,7 @@ if __name__ == '__main__': cont_loss = fargan.sig_loss(target[:, nb_pre*160:nb_pre*160+80], output[:, nb_pre*160:nb_pre*160+80]) specc_loss = spect_loss(output, target.detach()) - reg_loss = args.reg_weight * (.00*cont_loss + specc_loss) + reg_loss = (.00*cont_loss + specc_loss) loss_gen = 0 for scale in scores_gen: @@ -218,7 +225,7 @@ if __name__ == '__main__': feat_loss = args.fmap_weight * fmap_loss(scores_real, scores_gen) - gen_loss = reg_loss + feat_loss + loss_gen + gen_loss = args.reg_weight * reg_loss + feat_loss + loss_gen model.zero_grad() diff --git a/dnn/torch/fargan/test_fargan.py b/dnn/torch/fargan/test_fargan.py index d47400f8..925f7f01 100644 --- a/dnn/torch/fargan/test_fargan.py +++ b/dnn/torch/fargan/test_fargan.py @@ -108,6 +108,7 @@ def inverse_perceptual_weighting40 (pw_signal, filters): buffer[:] = out_sig_frame[-16:] return signal +from scipy.signal import lfilter if __name__ == '__main__': model.to(device) @@ -121,7 +122,8 @@ if __name__ == '__main__': sig, _ = model(features, periods, nb_frames - 4) #weighting_vector = np.array([gamma**i for i in range(16,0,-1)]) sig = sig.detach().numpy().flatten() - sig = inverse_perceptual_weighting40(sig, lpc[0,:,:]) + sig = lfilter(np.array([1.]), np.array([1., -.85]), sig) + #sig = inverse_perceptual_weighting40(sig, lpc[0,:,:]) pcm = np.round(32768*np.clip(sig, a_max=.99, a_min=-.99)).astype('int16') pcm.tofile(signal_file) diff --git a/dnn/torch/fargan/train_fargan.py b/dnn/torch/fargan/train_fargan.py index b18dbc52..dc6feb2d 100644 --- a/dnn/torch/fargan/train_fargan.py +++ b/dnn/torch/fargan/train_fargan.py @@ -116,23 +116,23 @@ if __name__ == '__main__': features = features.to(device) #lpc = torch.tensor(fargan.interp_lpc(lpc.numpy(), 4)) #print("interp size", lpc.shape) - lpc = lpc.to(device) - lpc = lpc*(args.gamma**torch.arange(1,17, device=device)) - lpc = fargan.interp_lpc(lpc, 4) + #lpc = lpc.to(device) + #lpc = lpc*(args.gamma**torch.arange(1,17, device=device)) + #lpc = fargan.interp_lpc(lpc, 4) periods = periods.to(device) if (np.random.rand() > 0.1): target = target[:, :sequence_length*160] - lpc = lpc[:,:sequence_length*4,:] + #lpc = lpc[:,:sequence_length*4,:] features = features[:,:sequence_length+4,:] periods = periods[:,:sequence_length+4] else: target=target[::2, :] - lpc=lpc[::2,:] + #lpc=lpc[::2,:] features=features[::2,:] periods=periods[::2,:] target = target.to(device) #print(target.shape, lpc.shape) - target = fargan.analysis_filter(target, lpc[:,:,:], nb_subframes=1, gamma=args.gamma) + #target = fargan.analysis_filter(target, lpc[:,:,:], nb_subframes=1, gamma=args.gamma) #nb_pre = random.randrange(1, 6) nb_pre = 2 |