
    ~Vjif                         d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ dddZh d	Zd
ee         dedee         fdZ G d de
          ZdS )    N)Path)IterableListTupleUnion)Dataset)download_url_to_file@209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4@408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027)Bhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7bJhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols>8   .DOT+PLUS-DASH"QUOTE'QUOTE(PAREN)PAREN,COMMA--DASH.POINT/SLASH:COLON{BRACE(PARENS)PARENS-HYPHEN.PERIOD"UNQUOTE%PERCENT.DECIMAL
"END-QUOTE
"IN-QUOTES
#HASH-MARK
&AMPERSAND
'END-QUOTE
)END-PAREN
.FULL-STOP#POUND-SIGN#SHARP-SIGN(LEFT-PAREN)END-PARENS...ELLIPSIS;SEMI-COLON{LEFT-BRACE{OPEN-BRACE"CLOSE-QUOTE'INNER-QUOTE(PARENTHESES)CLOSE-PAREN)RIGHT-PAREN}CLOSE-BRACE}RIGHT-BRACE"DOUBLE-QUOTE"END-OF-QUOTE'SINGLE-QUOTE(BEGIN-PARENS)END-THE-PAREN;SEMI-COLON(1)?QUESTION-MARK(IN-PARENTHESES)UN-PARENTHESES'END-INNER-QUOTE)END-PARENTHESES(OPEN-PARENTHESES!EXCLAMATION-POINT)CLOSE-PARENTHESESlinesexclude_punctuationsreturnc                    t          j        d          }g }| D ]}|r|                    d          r|                                                    d          \  }}|t
          v r;|rP|                    d          rd}n |                    d          rd}n|d         }t          j        |d|          }|                    d          }|                    ||f           |S )	Nz
\([0-9]+\)z;;;z  z...z--r     )recompile
startswithstripsplit_PUNCTUATIONSsubappend)rF   rG   _alt_recmudictlinewordphoness          U/root/voice-cloning/.venv/lib/python3.11/site-packages/torchaudio/datasets/cmudict.py_parse_dictionaryrZ   J   s    j''G+-G ' ' 	tu-- 	zz||))$//f=  #  u%% && Aw
 vgr4((c""f~&&&&N    c                       e Zd ZdZ	 ddddddeeef         ded	ed
ededdfdZde	de
eee         f         fdZde	fdZedee         fd            ZdS )CMUDictaZ  *CMU Pronouncing Dictionary* :cite:`cmudict` (CMUDict) dataset.

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.
        exclude_punctuations (bool, optional):
            When enabled, exclude the pronounciation of punctuations, such as
            `!EXCLAMATION-POINT` and `#HASH-MARK`.
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional):
            The URL to download the dictionary from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b"``)
        url_symbols (str, optional):
            The URL to download the list of symbols from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols"``)
    TFr   r   )downloadurlurl_symbolsrootrG   r^   r_   r`   rH   Nc                   || _         t          |          | _        t          j                            | j                  st          d|           | j        t          j                            |          z  }| j        t          j                            |          z  }t          j                            |          s@|st          d|           t          
                    |d           }t          |||           t          j                            |          s@|st          d|           t          
                    |d           }t          |||           t          |d          5 }	d |	                                D             | _        d d d            n# 1 swxY w Y   t          |dd          5 }	t          |	                                | j                   | _        d d d            d S # 1 swxY w Y   d S )	Nz#The root directory does not exist; z`The dictionary file is not found in the following location. Set `download=True` to download it. z\The symbol file is not found in the following location. Set `download=True` to download it. rc                 6    g | ]}|                                 S  )rO   ).0rV   s     rY   
<listcomp>z$CMUDict.__init__.<locals>.<listcomp>   s     GGGdTZZ\\GGGr[   zlatin-1)encoding)rG   )rG   r   
_root_pathospathisdirRuntimeErrorbasenameexists
_CHECKSUMSgetr	   open	readlines_symbolsrZ   _dictionary)
selfra   rG   r^   r_   r`   	dict_filesymbol_filechecksumtexts
             rY   __init__zCMUDict.__init__{   s    %9!t**w}}T_-- 	MKTKKLLLObg&6&6s&;&;;	o(8(8(E(EEw~~i(( 	; "G;DG G   "~~c400H i:::w~~k** 	E "I;FI I   "~~k488H k8DDD+s## 	HtGGdnn6F6FGGGDM	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H 	H )S9555 	s01A1AX\XqrrrD	s 	s 	s 	s 	s 	s 	s 	s 	s 	s 	s 	s 	s 	s 	s 	s 	s 	ss$   .$FF"%F";.G66G:=G:nc                     | j         |         S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded.

        Returns:
            Tuple of a word and its phonemes

            str:
                Word
            List[str]:
                Phonemes
        )ru   )rv   r|   s     rY   __getitem__zCMUDict.__getitem__   s     ""r[   c                 *    t          | j                  S )N)lenru   rv   s    rY   __len__zCMUDict.__len__   s    4#$$$r[   c                 4    | j                                         S )zLlist[str]: A list of phonemes symbols, such as ``"AA"``, ``"AE"``, ``"AH"``.)rt   copyr   s    rY   symbolszCMUDict.symbols   s     }!!###r[   )T)__name__
__module____qualname____doc__r   strr   boolr{   intr   r   r~   r   propertyr   re   r[   rY   r]   r]   i   s        ( &*'s
 Wg's 's 'sCI's #'s
 's 's 's 
's 's 's 'sR#S #U3S	>%: # # # # % % % % % $c $ $ $ X$ $ $r[   r]   )rj   rL   pathlibr   typingr   r   r   r   torch.utils.datar   torchaudio._internalr	   rp   rQ   r   r   rZ   r]   re   r[   rY   <module>r      s   				 				       / / / / / / / / / / / / $ $ $ $ $ $ 5 5 5 5 5 5 KM SU 
9 9 9xXc] $ 4PS9    >Q$ Q$ Q$ Q$ Q$g Q$ Q$ Q$ Q$ Q$r[   