@inproceedings{oai:nitech.repo.nii.ac.jp:00003397, author = {Yu, Zhi-Peng and Wu, Yi-Jian and Tokuda, Keiichi and Zen, Heiga and Nankaku, Yoshihiko and 南角, 吉彦}, book = {ICSP 2008. 9th International Conference on Signal Processing, 2008.}, month = {}, note = {application/pdf, In conventional HMM-based speech synthesis framework,spectral features are modeled in one stream, andstream-dependent tree-based clustering was then appliedfor tying the model parameters. In this paper, we investigateseveral different stream-dependent tying structuresfor spectral features by splitting the feature vectorinto several streams. One splitting approach is to spliteach feature dimension into each stream. Another oneis to split the static and dynamic features into differentstreams. Although splitting spectral features into differentstreams would ignore the correlation of context dependencybetween them, the number of model parameterscan be optimized for each stream after stream-dependentclustering. From the experimental results, both splittingapproaches can improve the quality of synthesizedspeech. However, the quality of synthesized speech becameworse when we combined these two splitting approaches., September 15-18, 2008Tokyo, Japan}, pages = {655--658}, publisher = {Institute of Electrical and Electronics Engineers}, title = {Analysis of Stream-Dependent Tying Structure for HMM-based Speech Synthesis}, year = {2008}, yomi = {ナンカク, ヨシヒコ} }