
    ~VjiC                         d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ dZddiZeeeeeef         Z G d	 d
e          ZdS )    N)Tuple)Tensor)Dataset)download_url_to_file)_extract_zipzNhttps://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip@f96258be9fdc2cbff6559541aae7ea4f59df3fcaf5cf963aae5ca647357e359cc            	           e Zd ZdZddedfdedededefd	Zd
efdZd
e	e
ef         fdZdededed
efdZded
efdZd
efdZdS )VCTK_092a:  *VCTK 0.92* :cite:`yamagishi2019vctk` dataset

    Args:
        root (str): Root directory where the dataset's top level directory is found.
        mic_id (str, optional): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional): The URL to download the dataset from.
            (default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
        audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.

    Note:
        * All the speeches from speaker ``p315`` will be skipped due to the lack of the corresponding text files.
        * All the speeches from ``p280`` will be skipped for ``mic_id="mic2"`` due to the lack of the audio files.
        * Some of the speeches from speaker ``p362`` will be skipped due to the lack of  the audio files.
        * See Also: https://datashare.is.ed.ac.uk/handle/10283/3443
    mic2Fz.flacrootmic_iddownloadurlc           
         |dvrt          d|           t          j                            |d          }t          j                            |d          | _        t          j                            | j        d          | _        t          j                            | j        d          | _        || _        || _        |rt          j        	                    | j                  sat          j        
                    |          s-t                              |d           }t          |||           t          || j                   t          j        	                    | j                  st          d          t          t          j        | j                            | _        g | _        	 | j        D ]}|d	k    r|d
k    rt          j                            | j        |          }	t          d t          j        |	          D                       D ]}
t          j                            |
          d         }t          j                            | j        || d| | j                   }|dk    r t          j        
                    |          s| j                            |                    d                     d S )N)mic1r   z3`mic_id` has to be either "mic1" or "mic2". Found: zVCTK-Corpus-0.92.zipzVCTK-Corpus-0.92txtwav48_silence_trimmed)hash_prefixz=Dataset not found. Please use `download=True` to download it.p280r   c              3   D   K   | ]}|                     d           |V  dS ).txtN)endswith).0fs     R/root/voice-cloning/.venv/lib/python3.11/site-packages/torchaudio/datasets/vctk.py	<genexpr>z$VCTK_092.__init__.<locals>.<genexpr>U   s7      (d(dqQRQ[Q[\bQcQc(d(d(d(d(d(d(d    r   _p362)RuntimeErrorospathjoin_path_txt_dir
_audio_dir_mic_id
_audio_extisdirisfile
_CHECKSUMSgetr   r   sortedlistdir_speaker_ids_sample_idssplitextappendsplit)selfr   r   r   r   	audio_extarchivechecksum
speaker_idutterance_dirutterance_fileutterance_idaudio_path_mics                r   __init__zVCTK_092.__init__&   s~    )))]U[]]^^^',,t%;<<W\\$(:;;
TZ77',,tz3JKK# 	27==,, 2w~~g.. M)~~c488H(g8LLLLWdj111w}}TZ(( 	`^___ #2:dm#<#<==		 + 	A 	AJV##&(8(8GLL
CCM"((d(dBJ}4M4M(d(d(d"d"d 	A 	A!w//??B!#O#??f?do??" "
 ''~0N0N' ''(:(:3(?(?@@@@	A		A 	Ar   returnc                     t          |          5 }|                                d         cd d d            S # 1 swxY w Y   d S )Nr   )open	readlinesr4   	file_paths     r   
_load_textzVCTK_092._load_text`   s    )__ 	,	&&((+	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	,s   7;;c                 *    t          j        |          S N)
torchaudioloadrB   s     r   _load_audiozVCTK_092._load_audiod   s    y)))r   r8   r;   c           
      ,   t           j                            | j        || d| d          }t           j                            | j        || d| d| | j                   }|                     |          }|                     |          \  }}|||||fS )Nr   r   )r!   r"   r#   r%   r&   r(   rD   rI   )	r4   r8   r;   r   transcript_path
audio_path
transcriptwaveformsample_rates	            r   _load_samplezVCTK_092._load_sampleg   s    ',,t}jZBdBdR^BdBdBdeeW\\ODDLDD6D4?DD
 

 ___55
 !% 0 0 < <++z:|LLr   nc                 Z    | j         |         \  }}|                     ||| j                  S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            Tensor:
                Waveform
            int:
                Sample rate
            str:
                Transcript
            str:
                Speaker ID
            std:
                Utterance ID
        )r0   rP   r'   )r4   rQ   r8   r;   s       r   __getitem__zVCTK_092.__getitem__w   s0    ( $(#3A#6 
L  \4<HHHr   c                 *    t          | j                  S rF   )lenr0   )r4   s    r   __len__zVCTK_092.__len__   s    4#$$$r   N)__name__
__module____qualname____doc__URLstrboolr=   rD   r   r   intrI   
SampleTyperP   rS   rV    r   r   r
   r
      s/        * 8A 8A8A 8A 	8A
 8A 8A 8A 8At,s , , , ,*fck(: * * * *Ms M# Ms Mz M M M M IS IZ I I I I.% % % % % % %r   r
   )r!   typingr   rG   torchr   torch.utils.datar   torchaudio._internalr   torchaudio.datasets.utilsr   r[   r+   r^   r\   r_   r
   r`   r   r   <module>rf      s    				                 $ $ $ $ $ $ 5 5 5 5 5 5 2 2 2 2 2 2VT  WY

 63S#-.
|% |% |% |% |%w |% |% |% |% |%r   