@article{oai:nitech.repo.nii.ac.jp:00004967, author = {Miyajima, Chiyomi and Hattori, Yosuke and Tokuda, Keiichi and 徳田, 恵一 and Masuko, Takashi and Kobayashi, Takao and Kitamura, Tadashi}, issue = {7}, journal = {IEICE transactions on information and systems}, month = {Jul}, note = {This paper presents a new approach to modeling speech spectra and pitch for text-independent speaker identification using Gaussian mixture models based on multi-space probability distribution (MSD-GMM). MSD-GMM allows us to model continuous pitch values of voiced frames and discrete symbols for unvoiced frames in a unified framework. Spectral and pitch features are jointly modeled by a two-stream MSD-GMM. We derive maximum likelihood (ML) estimation formulae and minimum classification error (MCE) training procedure for MSD-GMM parameters. The MSD-GMM speaker models are evaluated for text-independent speaker identification tasks. The experimental results show that the MSD-GMM can efficiently model spectral and pitch features of each speaker and outperforms conventional speaker models. The results also demonstrate the utility of the MCE training of the MSD-GMM parameters and the robustness for the inter-session variability., application/pdf}, pages = {847--855}, title = {Text-Independent Speaker Identification Using Gaussian Mixture Models Based on Multi-Space Probability Distribution}, volume = {E84-D}, year = {2001}, yomi = {トクダ, ケイイチ} }