
    ~VjiS                         d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d	Zd
ZdZdgdgddgdZdedee         dedeeeef                  fdZ G d de          ZdS )    N)Path)ListTupleUnion)Tensor)Dataset)download_url_to_file)_get_librispeech_metadata)_extract_tarlibrispeech_finetuningzIhttps://dl.fbaipublicfiles.com/librilight/data/librispeech_finetuning.tgz@5d1efdc777b548194d7e09ba89126e2188026df9fd57aa57eb14408d2b2342afz1h/0z1h/*9h)10min1h10hpathfolders
_ext_audioreturnc                      t                      g }|D ]7} fd                     | d|           D             }|d |D             z  }8|                    d            |S )a  Get the file names and the corresponding file paths without `speaker_id`
    and `chapter_id` directories.
    The format of path is like:
        {root}/{_ARCHIVE_NAME}/1h/[0-5]/[clean, other] or
        {root}/{_ARCHIVE_NAME}/9h/[clean, other]

    Args:
        path (Path): Root path to the dataset.
        folders (List[str]): Folders that contain the desired audio files.
        _ext_audio (str): Extension of audio files.

    Returns:
        List[Tuple[str, str]]:
            List of tuples where the first element is the relative path to the audio file.
            The format of relative path is like:
            1h/[0-5]/[clean, other] or 9h/[clean, other]
            The second element is the file name without audio extension.
    c                 :    g | ]}|                               S  )relative_to).0pr   s     `/root/voice-cloning/.venv/lib/python3.11/site-packages/torchaudio/datasets/librilight_limited.py
<listcomp>z&_get_fileids_paths.<locals>.<listcomp>*   s%    YYYt$$YYY    z/*/*/*/*c                 r    g | ]4}t          |j        j        j                  t          |j                  f5S r   )strparentstem)r   r   s     r   r   z&_get_fileids_paths.<locals>.<listcomp>+   s4    RRRqQX_344c!&kkBRRRr   c                 $    | d         | d         z   S )Nr      r   )xs    r   <lambda>z$_get_fileids_paths.<locals>.<lambda>,   s    1Q4!A$; r   )key)r   globsort)r   r   r   files_pathsfolderpathss   `     r   _get_fileids_pathsr-      s    ( ::DK S SYYYYdii68W8W:8W8W.X.XYYYRRERRRR..///r   c            
       |    e Zd ZdZdZdZ	 	 ddeeef         dede	d	d
fdZ
ded	eeeeeeef         fdZd	efdZd
S )LibriLightLimiteda  Subset of Libri-light :cite:`librilight` dataset,
    which was used in HuBERT :cite:`hsu2021hubert` for supervised fine-tuning.

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.
        subset (str, optional): The subset to use. Options: [``"10min"``, ``"1h"``, ``"10h"``]
            (Default: ``"10min"``).
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
    z
.trans.txtz.flacr   Frootsubsetdownloadr   Nc                    |t           vr,t          dt                                            d|           t           |         }t          j        |          }t          j                            |t                    | _        t          j                            |t           d          }t          j        	                    | j                  s[|st          d          t          j                            |          st          t          |t                     t          |           t!          | j        || j                  | _        d S )Nz`subset` must be one of z	. Found: z.tgzz9Dataset not found. Please use `download=True` to download)hash_prefix)_SUBSET_MAP
ValueErrorkeysosfspathr   join_ARCHIVE_NAME_pathisdirRuntimeErrorisfiler	   _URL	_CHECKSUMr   r-   r   _fileids_paths)selfr0   r1   r2   r   archives         r   __init__zLibriLightLimited.__init__?   s    $$]8H8H8J8J]]U[]]^^^f%yW\\$66
',,t%;%;%;<<w}}TZ(( 	" `"#^___7>>'** K$T7	JJJJ!!!0WdoVVr   nc                    | j         |         \  }}t          || j        || j        | j                  }t          j        t          j        	                    | j        |d                             \  }}|f|dd         z   S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded
        Returns:
            Tuple of the following items;

            Tensor:
                Waveform
            int:
                Sample rate
            str:
                Transcript
            int:
                Speaker ID
            int:
                Chapter ID
            int:
                Utterance ID
        r   r$   N)
rB   r
   r<   r   _ext_txt
torchaudioloadr8   r   r:   )rC   rF   	file_pathfileidmetadatawaveform_s          r   __getitem__zLibriLightLimited.__getitem__T   sq    * !/2	6,VTZDO]a]jkk obgll4:x{&K&KLL!{Xabb\))r   c                 *    t          | j                  S )N)lenrB   )rC   s    r   __len__zLibriLightLimited.__len__n   s    4&'''r   )r   F)__name__
__module____qualname____doc__rH   r   r   r    r   boolrE   intr   r   rP   rS   r   r   r   r/   r/   0   s        	 	 HJ
 	W WCIW W 	W
 
W W W W**S *U63S#s+J%K * * * *4( ( ( ( ( ( (r   r/   )r8   pathlibr   typingr   r   r   rI   torchr   torch.utils.datar   torchaudio._internalr	   torchaudio.datasets.librispeechr
   torchaudio.datasets.utilsr   r;   r@   rA   r5   r    r-   r/   r   r   r   <module>ra      sF   				       % % % % % % % % % %           $ $ $ $ $ $ 5 5 5 5 5 5 E E E E E E 2 2 2 2 2 2 )RN	&$HHT DI 3 4PUVY[^V^P_K`    :?( ?( ?( ?( ?( ?( ?( ?( ?( ?(r   