@inproceedings{oai:nitech.repo.nii.ac.jp:00003411, author = {Peng, Xianglin and Oura, Keiichiro and 大浦, 圭一郎 and Nankaku, Yoshihiko and 南角, 吉彦 and Tokuda, Keiichi}, book = {ICSP2010 Proceedings}, month = {}, note = {application/pdf, This paper proposes an improved cross-lingualspeaker adaptation technique with considering the differencesbetween language-dependent average voices in a Speech-to-Speech Translation system. A state mapping based method hadbeen introduced for cross-lingual speaker adaptation in HMMbasedspeech synthesis. In this method, the transforms estimatedfrom the input language are applied to average voice models ofthe output language according to the state mapping information.However, the differences between average voices in the inputand output language may degrade the adaptation performance.To reduce the differences, we apply a global linear transformto output average voice models, which minimizes the symmetricKullback-Leibler divergence between two average voice models.From the experimental results, our approach could not obtaina better result than the original state mapping based method.This is because the global transform affects not only speakercharacteristics but also language identity in acoustic features, andthis degrades the synthetic speech quality. Therefore, it becomesclear that a technique which separate speaker and languageidentities is required., Oct.24-28 2010 Beijing CHINA}, publisher = {Institute of Electrical and Electronics Engineers}, title = {Cross-lingual speaker adaptation for HMM-based speech synthesis considering differences between language-dependent average voices}, year = {2010}, yomi = {オオウラ, ケイイチロウ and ナンカク, ヨシヒコ} }