@inproceedings{oai:nitech.repo.nii.ac.jp:00003405, author = {Wu, Yi-Jian and Tokuda, Keiichi}, book = {ICASSP 2009. IEEE International Conference on Acoustics, Speech and Signal Processing, 2009.}, month = {}, note = {application/pdf, This paper improves a minimum generation error (MGE) basedHMM training technique for HMM-based speech synthesis by directlyusing the original spectrum instead of line spectral pairs(LSPs) as reference spectrum for log spectral distortion (LSD) measure.Two types of original reference spectra for LSD calculation areinvestigated, including the spectrum extracted from speech waveformby STRAIGHT, and the short-time FFT spectrum calculatedfrom speech waveforms. Since only the harmonics of the FFT spectrumare coincident with the underlying spectral envelope, the LSDbetween generated LSPs and original FFT spectrum is calculated bysampling at the harmonic frequencies, and a weighting function isdesigned to simulate the sampling strategy on LSPs. From the experimentalresults, the MGE-LSD training using the FFT spectrumas reference spectrum achieved the best performance., 9-24 April 2009Location: Taipei, Taiwan}, pages = {4013--4016}, publisher = {Institute of Electrical and Electronics Engineers}, title = {Minimum generation error training by using original spectrum as reference for log spectral distortion measure}, year = {2009} }