
    ~Vjig                         d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ dZdZd	Zg d
Z G d de
          ZdededefdZdS )    N)Path)OptionalTupleUnion)Dataset)download_url_to_file)_extract_tar_load_waveformz6https://speech.fit.vutbr.cz/files/quesst14Database.tgzi@  @4f869e06bc066bbe9c5dde31dbd3909a0870d70291110ebbb38878dcbc2fc5e4)albanianbasqueczech	nnenglishromanianslovakc                       e Zd ZdZ	 	 ddeeef         dedee         dedd	f
d
Z	de
deee
ef         fdZde
deej        e
ef         fdZde
fdZd	S )QUESST14a  *QUESST14* :cite:`Mir2015QUESST2014EQ` dataset.

    Args:
        root (str or Path): Root directory where the dataset's top level directory is found
        subset (str): Subset of the dataset to use. Options: [``"docs"``, ``"dev"``, ``"eval"``].
        language (str or None, optional): Language to get dataset for.
            Options: [``None``, ``albanian``, ``basque``, ``czech``, ``nnenglish``, ``romanian``, ``slovak``].
            If ``None``, dataset consists of all languages. (default: ``"nnenglish"``)
        download (bool, optional): Whether to download the dataset if it is not found at root path.
            (default: ``False``)
    r   FrootsubsetlanguagedownloadreturnNc                    |dvrt          d          |-|t          vr$t          dt          t                               t          j        |          }t          j                            t                    }t          j                            ||          }|	                    dd          d         }t          j                            ||          | _
        t          j                            | j
                  s\t          j                            |          s-|st          d          t          t          |t                     t!          ||           |d	k    rt#          | j
        |d
          | _        d S |dk    rt#          | j
        |d          | _        d S |dk    rt#          | j
        |d          | _        d S d S )N)docsdevevalz/`subset` must be one of ['docs', 'dev', 'eval']z"`language` must be None or one of .   r   z9Dataset not found. Please use `download=True` to download)hash_prefixr   zlanguage_key_utterances.lstr   zlanguage_key_dev.lstr   zlanguage_key_eval.lst)
ValueError
_LANGUAGESstrosfspathpathbasenameURLjoinrsplit_pathisdirisfileRuntimeErrorr   	_CHECKSUMr	   filter_audio_pathsdata)selfr   r   r   r   r&   archives          V/root/voice-cloning/.venv/lib/python3.11/site-packages/torchaudio/datasets/quesst14.py__init__zQUESST14.__init__&   s    000NOOOHJ$>$>S#j//SSTTT y7##C((',,tX..??3**1-W\\$11
w}}TZ(( 	(7>>'** J d&'bccc$S'yIIII$'''V*4:xA^__DIIIu__*4:xAWXXDIIIv*4:xAXYYDIII     nc                     | j         |         }t          j                            || j                  }|t
          |                    d          j        fS )a  Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
        but otherwise returns the same fields as :py:func:`__getitem__`.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            str:
                Path to audio
            int:
                Sample rate
            str:
                File name
         )r0   r#   r%   relpathr*   SAMPLE_RATEwith_suffixname)r1   r6   
audio_pathr9   s       r3   get_metadatazQUESST14.get_metadataJ   sD    " Yq\
'//*dj99Z%;%;B%?%?%DDDr5   c                     |                      |          }t          | j        |d         |d                   }|f|dd         z   S )a:  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            Tensor:
                Waveform
            int:
                Sample rate
            str:
                File name
        r      N)r>   r
   r*   )r1   r6   metadatawaveforms       r3   __getitem__zQUESST14.__getitem___   sG      $$Q''!$*hqk8A;GG{Xabb\))r5   c                 *    t          | j                  S )N)lenr0   )r1   s    r3   __len__zQUESST14.__len__s   s    49~~r5   )r   F)__name__
__module____qualname____doc__r   r"   r   r   boolr4   intr   r>   torchTensorrC   rF    r5   r3   r   r      s       
 
  #."Z "ZCI"Z "Z 3-	"Z
 "Z 
"Z "Z "Z "ZHEc EeCcM&: E E E E**S *U5<c+A%B * * * *(      r5   r   r%   r   lst_namec                 N   g }t          |           } t          | dz  |z            5 }|D ]b}|                                                                \  }}|||k    r4t	          j        dd|          }|                    | |z             c	 ddd           n# 1 swxY w Y   |S )z+Extract audio paths for the given language.scoringNz^.*?\/r8   )r   openstripsplitresubappend)r%   r   rP   audio_pathsfliner=   langs           r3   r/   r/   w   s     K::D	dY)	*	* 2a 	2 	2D#zz||1133J#(8(8	2z::Jtj01111	22 2 2 2 2 2 2 2 2 2 2 2 2 2 2 s   A&BB!B)r#   rV   pathlibr   typingr   r   r   rM   torch.utils.datar   torchaudio._internalr   torchaudio.datasets.utilsr	   r
   r'   r:   r.   r!   r   r"   r/   rO   r5   r3   <module>rb      s   				 				       ) ) ) ) ) ) ) ) ) )  $ $ $ $ $ $ 5 5 5 5 5 5 B B B B B B B B ?N	  
[ [ [ [ [w [ [ [|
      r5   