Update examples_tts.ipynb

28abab45 · Niels Becker · a633eae8 · 28abab45
Commit 28abab45 authored 3 years ago by Niels Becker
--- a/examples_tts.ipynb
+++ b/examples_tts.ipynb
@@ -44,7 +44,7 @@
        "id": "ckqpD3IEdXh4"
      },
      "source": [
-        "# Coding Davinchi 2021 \n",
+        "# Coding da Vinci 2021 \n",
        "This Code is based on the Silero Model.\n",
        "https://github.com/snakers4/silero-models\n",
        "On Colab.\n",
@@ -212,4 +212,4 @@
      "outputs": []
    }
  ]
-}
\ No newline at end of file
+}
 %% Cell type:markdown id:ckqpD3IEdXh4 tags:

-# Coding Davinchi 2021
+# Coding da Vinci 2021
 This Code is based on the Silero Model.
 https://github.com/snakers4/silero-models
 On Colab.
 https://colab.research.google.com/github/snakers4/silero-models/blob/master/examples_tts.ipynb#scrollTo=ckqpD3IEdXh4

 %% Cell type:markdown id:previous-bacon tags:

 # Dependencies and Imports

 %% Cell type:code id:complicated-receiver tags:

 ``` python
 #@title Install dependencies

 !pip install -q torchaudio omegaconf

 import torch
 from pprint import pprint
 from omegaconf import OmegaConf
 from IPython.display import Audio, display

 torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',
                               'latest_silero_models.yml',
                               progress=False)
 models = OmegaConf.load('latest_silero_models.yml')
 ```

 %% Cell type:markdown id:nasty-intention tags:

 # Colab Demo

 %% Cell type:code id:pacific-injury tags:

 ``` python
 # see latest avaiable models
 available_languages = list(models.tts_models.keys())
 print(f'Available languages {available_languages}')

 for lang in available_languages:
    speakers = list(models.tts_models.get(lang).keys())
    print(f'Available speakers for {lang}: {speakers}')
 ```

 %% Cell type:code id:stupid-naples tags:

 ``` python
 import torch

 language = 'ru'
 speaker = 'kseniya_16khz'
 device = torch.device('cpu')
 model, symbols, sample_rate, example_text, apply_tts = torch.hub.load(repo_or_dir='snakers4/silero-models',
                                                                      model='silero_tts',
                                                                      language=language,
                                                                      speaker=speaker)
 model = model.to(device)  # gpu or cpu
 ```

 %% Cell type:code id:indirect-berry tags:

 ``` python
 audio = apply_tts(texts=[example_text],
                  model=model,
                  sample_rate=sample_rate,
                  symbols=symbols,
                  device=device)

 print(example_text)
 display(Audio(audio[0], rate=sample_rate))
 ```

 %% Cell type:markdown id:n-IHQN_5KA_A tags:

 ## Enhance synthesis with logmmse

 %% Cell type:code id:ivNvVXhLKbmA tags:

 ``` python
 !pip install -q logmmse
 ```

 %% Cell type:markdown id:pLMPBH_CMAzh tags:

 You can try to enhance synthesized audio with logmmse algorithm, though it could demand parameters tuning for the particular speaker.

 %% Cell type:code id:6b048VLuzgDF tags:

 ``` python
 import numpy as np
 from logmmse import logmmse

 enhanced = logmmse(np.array(audio[0]), sample_rate, output_file=None, initial_noise=1, window_size=160, noise_threshold=0.15)
 display(Audio(enhanced, rate=sample_rate))
 ```