
    1;ji57                         d dl Z d dlZd dlmZmZ ddlmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddZ G d d          ZdS )    N)ProcessQueue   )res_summarize)Configconfig)DataSet)FeatureExtractor)Model)	inference)ADF)	getFscorec                 6   | t                      } | j        t                      }nt          j        | j                  }|                    | j                   |                                 |                    | j        | j        | j	                   |                    | j
        | j        | j                   |                    | j	        | j        | j                   |                    | j        | j        | j                    | j                     t'          t(          j                            | j        | j                  d          | _        t'          t(          j                            | j        | j                  d          | _        t'          t(          j                            | j        | j                  d          | _        t=          d           | j                            d           t=          d           | j                            d           tA          j        | j        | j                  }tA          j        | j        | j                  }|!                    | j"                  }t=          d#                    tI          |          tI          |                               | j                            d#                    tI          |          tI          |                               | j                            d#                    | j%                             t=          d	#                    | j%                             | j&        r2| j                            d
#                    | j%                             tO          | ||          }g }g }g }g }tQ          | j)                  D ]3}	tU          j*                    }
|+                                \  }}}tU          j*                    |
z
  }|,                    |           |,                    |           |,                    |           |-                    ||	          }|,                    |           |d         }d#                    |	||| j.        |          }| j                            |dz              | j                            d           | j        /                                 t=          |           5ta          j        | ||||           | j        dk    r|j1                                         | j        2                                 | j        2                                 | j        2                                 ta          j3        |            t=          d           d S )Nwz
start training...z
start training...
z 
reading training & test data...z!
reading training & test data...
z"done! train/test data sizes: {}/{}z#done! train/test data sizes: {}/{}
z
r: {}
z
r: {}z
%r: {}
r   z7iter{}  diff={:.2e}  train-time(sec)={:.2f}  {}={:.2f}%
z1------------------------------------------------
r   z	finished.)4r   
init_modelr
   loadbuild	trainFilesave!convert_text_file_to_feature_filec_trainf_traintestFilec_testf_test convert_feature_file_to_idx_filefFeatureTrain
fGoldTrainfFeatureTest	fGoldTestglobalCheckopenospathjoinoutDirfLogswLogfResRawswResRawfTuneswTuneprintwriter	   resizetrainSizeScaleformatlenregrawResWriteTrainerrangettlItertimetrain_epochappendtestmetricflushr   modelclose	summarize)r   feature_extractortrainsettestsettrainer	time_listerr_list	diff_listscore_list_listitime_serrsample_sizedifftime_t
score_listscorelogstrs                     N/root/voice-cloning/.venv/lib/python3.11/site-packages/spacy_pkuseg/trainer.pytrainrT   "   s   ~ ,..,1&2CDDF,---77&.&.   77   66,f.?   66v*F,<   FV]FK@@#FFFL27<<v~FFLLFOfmV\BBCHHFM	
   
L.///	
-...
L<===|F0&2CDDHl6.0@AAGv455H	,33CMM3w<<PP   L.55MM3w<<	
 	
   L{))&*55666	)

6:
&
&''' ?l11&*==>>>fh(9::GIHIO6>""  !(!4!4!6!6[$v%   \\'1--
z***1JQQtVV]E
 
 	6D=)))OPPPf	8YPPP{a
L
O
MF###	+    c                   ~    e Zd Zd Zd Zd Zd ZdedefdZ	dedefdZ
ed	             Zdedefd
Zd Zd Zd ZdS )r6   c                 0   || _         || _        |j        | _        |j        | _        |j         t          | j        | j                  | _        nCt          j        |j                  | _        | j                            | j        | j                   | 	                    || j                  | _
        || _        i | _        |j                                        D ]=\  }}|                    d          rd}|                    d          rd}|| j        |<   >d S )NIO)r   X	n_featuren_tagr   r   r?   r   expand_get_optimizeroptimrB   idx_to_chunk_tag
tag_to_idxitems
startswith)selfr   datasetrB   tagidxs         rS   __init__zTrainer.__init__   s
    *]
$t~tz::DJJF$566DJJdndj999(($*==
!2 ")4::<< 	- 	-HC~~c"" ~~c"" ),D!#&&	- 	-rU   c                 b    | j         }d|j        v rt          |||          S t          d          )NadfzInvalid Optimizer)r   modelOptimizerr   
ValueError)rd   re   r?   r   s       rS   r^   zTrainer._get_optimizer   s8    F)))vw...,---rU   c                 4    | j                                         S N)r_   optimize)rd   s    rS   r:   zTrainer.train_epoch   s    z""$$$rU   c                 n   t           j                            t          j        t          j                            |                    }| j        | j        | j	        d}t          |dd          5 } |t          j                 || j        |          }d d d            n# 1 swxY w Y   |D ]	}d |_        
|S )N)ztok.acczstr.accf1r   utf8)encoding)r$   r%   r&   r   r'   fOutputr2   _decode_tokAcc_decode_strAcc_decode_fscorer#   
evalMetricr?   predicted_tags)rd   rD   	iterationoutfilefunc_mappingwriterrP   examples           rS   r<   zTrainer.test   s    ',,v}fn.C.CI.N.NOO **%
 
 '3000 	F8f&78V J	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
  	* 	*G%)G""s   /#BB"%B"rD   r?   c                     t           j        dk    r|                     ||           d S |                     ||           d S )Nr   )r   nThread_decode_single_decode_multi_proc)rd   rD   r?   s      rS   _decodezTrainer._decode   sG    >Q/////##GU33333rU   c                 X    |D ]&}t          j        |j        |          \  }}||_        'd S rn   )_infdecodeViterbi_fastfeaturesry   )rd   rD   r?   r~   _tagss         rS   r   zTrainer._decode_single   s?     	* 	*G-g.>FFGAt%)G""	* 	*rU   c                     	 |                                 }|d S |\  }}t          j        ||           \  }}|                    ||f           Mrn   )getr   r   put)r?   in_queue	out_queueitemrg   r   r   r   s           rS   _decode_proczTrainer._decode_proc   sX    	'<<>>D| MC-h>>GAtMM3+&&&	'rU   c                 P   t                      }t                      }g }| j        j        }t          |          D ]0}t	          | j        |||f          }|                    |           1t          |          D ]!\  }	}
|                    |	|
j	        f           "|D ]+}|                    d            |
                                 ,t          t          |                    D ]&}|                                \  }	}|||	         _        '|D ]}|                                 d S )N)targetargs)r   r   r   r7   r   r   r;   	enumerater   r   startr3   r   ry   r&   )rd   rD   r?   r   r   procsnthreadrJ   prg   r~   procr   r   s                 rS   r   zTrainer._decode_multi_proc   s;   77GG	+%w 	 	A(x/K  A LLOOOO%g.. 	2 	2LCLL#w/01111 	 	DLLJJLLLLs7||$$ 	/ 	/A!IC*.GCL'' 	 	AFFHHHH	 	rU   c                 Z   | j         }|                     ||           |j        }dg|z  }dg|z  }dg|z  }|D ]}	|	j        }
|	j        }|P|                    d                    t          t          |
                               |                    d           t          |
|          D ];\  }}||xx         dz  cc<   ||xx         dz  cc<   ||k    r||xx         dz  cc<   <|j
                            d           d}d}d}t          t          |||                    D ]y\  }\  }}}
||z  }||
z  }||z  }|dk    rd}n|dz  |z  }|
dk    rd}n|dz  |
z  }|j
                            d                    |||
|||d|z  |z  ||z   z                       z|dk    rd}n|dz  |z  }|dk    rd}n|dz  |z  }|dk    r	|dk    rd}nd|z  |z  ||z   z  }|j
                            d	                    ||||||                     |j
                                         |gS )
Nr   ,r   r   zZ% tag-type  #gold  #output  #correct-output  token-precision  token-recall  token-f-score
      Y@z*% {}:  {}  {}  {}  {:.2f}  {:.2f}  {:.2f}
   z4% overall-tags:  {}  {}  {}  {:.2f}  {:.2f}  {:.2f}
)r   r   r\   ry   r   r/   r&   mapstrzipr)   r   r2   r>   )rd   re   r?   r}   r   r\   all_correctall_predall_goldr~   predgoldpred_taggold_tagsumGold	sumOutputsumCorrOutputrJ   correctrecprecfscores                         rS   ru   zTrainer._decode_tokAcc   s   We$$$cEk3;3; 	/ 	/G)D<D!SXXc#tnn55666T"""&)$oo / /"("""a'""""""a'"""x'')))Q.)))	/ 	i	
 	
 	
 	(1Xx00)
 )
 	 	$A$t tOGIW$Mqyyo,qyy-L=DDX^tcz2 
 
 
 
 a<<CC%''1C>>DD 5(94D199FFX^tcz2FCJJM4f 	
 	
 	

 	xrU   c           	         | j         }|                     ||           d}t          |          }|D ]}|j        }|j        }	|P|                    d                    t          t          |                               |                    d           t          ||	          D ]\  }
}|
|k    r n|dz  }||z  dz  }|j
                            d                    |||                     |gS )Nr   r   r   r   r   zAtotal-tag-strings={}  correct-tag-strings={}  string-accuracy={}%)r   r   r3   ry   r   r/   r&   r   r   r   r)   r2   )rd   re   r?   r}   r   r   totalr~   r   r   r   r   accs                rS   rv   zTrainer._decode_strAcc:  s   We$$$G 	 	G)D<D!SXXc#tnn55666T"""&)$oo  "(x''E ( 1o%OVVw 	
 	
 	

 urU   c                    | j         }|                     ||           g }g }|D ]}|j        }|j        }	d                    t          t          |                    }
|                    |
           |*|                    |
           |                    d           |                    d                    t          t          |	                               t          ||| j
                  \  }}|j                            d                    |d         |d         |d         |d         |d         |d                              |S )Nr   r   zl#gold-chunk={}  #output-chunk={}  #correct-output-chunk={}  precision={:.2f}  recall={:.2f}  f-score={:.2f}
r   r   r   )r   r   ry   r   r&   r   r   r;   r/   r   r`   r)   r2   )rd   re   r?   r}   r   	gold_tags	pred_tagsr~   r   r   pred_str	scoreListinfoLists                rS   rw   zTrainer._decode_fscoreY  sL   We$$$		 		7 		7G)D<DxxC//HX&&&!X&&&T"""SXXc#tnn556666'y$"7
 
	8 	{  C  C!!! 		
 		
 		
 rU   N)__name__
__module____qualname__rh   r^   r:   r<   r	   r   r   r   staticmethodr   r   ru   rv   rw    rU   rS   r6   r6      s       - - -.. . .% % %  (4w 4u 4 4 4 4*g *e * * * * ' ' \'' %    4O O Ob  >         rU   r6   rn   )r$   r9   multiprocessingr   r    r   r   r   datar	   rB   r
   r?   r   r   r   	optimizerr   scorerr   rT   r6   r   rU   rS   <module>r      s"  
 
			  * * * * * * * *       # " " " " " " "       / / / / / /                        ^ ^ ^ ^Bv v v v v v v v v vrU   