{"created":"2023-05-15T12:36:08.295078+00:00","id":5340,"links":{},"metadata":{"_buckets":{"deposit":"aaf07784-53c0-45c6-83ac-921cd8747881"},"_deposit":{"created_by":3,"id":"5340","owners":[3],"pid":{"revision_id":0,"type":"depid","value":"5340"},"status":"published"},"_oai":{"id":"oai:nitech.repo.nii.ac.jp:00005340","sets":["31"]},"author_link":["10746","18461","3028","464","8864"],"item_10001_biblio_info_28":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2007-05-01","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"5","bibliographicPageEnd":"834","bibliographicPageStart":"825","bibliographicVolumeNumber":"E90-D","bibliographic_titles":[{"bibliographic_title":"IEICE transactions on information and systems"}]}]},"item_10001_description_36":{"attribute_name":"内容記述","attribute_value_mlt":[{"subitem_description":"A statistical speech synthesis system based on the hidden Markov model (HMM) was recently proposed. In this system, spectrum, excitation, and duration of speech are modeled simultaneously by context-dependent HMMs, and speech parameter vector sequences are generated from the HMMs themselves. This system defines a speech synthesis problem in a generative model framework and solves it based on the maximum likelihood (ML) criterion. However, there is an inconsistency: although state duration probability density functions (PDFs) are explicitly used in the synthesis part of the system, they have not been incorporated into its training part. This inconsistency can make the synthesized speech sound less natural. In this paper, we propose a statistical speech synthesis system based on a hidden semi-Markov model (HSMM), which can be viewed as an HMM with explicit state duration PDFs. The use of HSMMs can solve the above inconsistency because we can incorporate the state duration PDFs explicitly into both the synthesis and the training parts of the system. Subjective listening test results show that use of HSMMs improves the reported naturalness of synthesized speech.","subitem_description_type":"Other"}]},"item_10001_description_38":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_10001_full_name_27":{"attribute_name":"著者別名","attribute_value_mlt":[{"affiliations":[{"affiliationNames":[{"affiliationName":"","lang":"ja"}],"nameIdentifiers":[{"nameIdentifier":"","nameIdentifierScheme":"ISNI","nameIdentifierURI":"http://www.isni.org/isni/"}]}],"familyNames":[{},{},{}],"givenNames":[{},{},{}],"nameIdentifiers":[{},{}],"names":[{"name":"Tokuda, Keiichi","nameLang":"en"},{"name":"徳田, 恵一","nameLang":"ja"},{"name":"トクダ, ケイイチ","nameLang":"ja-Kana"}]},{"nameIdentifiers":[{}],"names":[{"name":"北村, 正"}]}]},"item_10001_publisher_29":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"Institute of Electronics, Information and Communication Engineers"}]},"item_10001_source_id_30":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"09168532","subitem_source_identifier_type":"ISSN"}]},"item_10001_source_id_32":{"attribute_name":"書誌レコードID(NCID)","attribute_value_mlt":[{"subitem_source_identifier":"AA10826272","subitem_source_identifier_type":"NCID"}]},"item_10001_version_type_33":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Zen, Heiga","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorAffiliations":[{"affiliationNameIdentifiers":[{"affiliationNameIdentifier":"","affiliationNameIdentifierScheme":"ISNI","affiliationNameIdentifierURI":"http://www.isni.org/isni/"}],"affiliationNames":[{"affiliationName":"","affiliationNameLang":"ja"}]}],"creatorNames":[{"creatorName":"Tokuda, Keiichi","creatorNameLang":"en"},{"creatorName":"徳田, 恵一","creatorNameLang":"ja"},{"creatorName":"トクダ, ケイイチ","creatorNameLang":"ja-Kana"}],"familyNames":[{},{},{}],"givenNames":[{},{},{}],"nameIdentifiers":[{},{}]},{"creatorNames":[{"creatorName":"Masuko, Takashi","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Kobayashi, Takao","creatorNameLang":"en"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Kitamura, Tadashi","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2017-01-25"}],"displaytype":"detail","filename":"E90-D_825.pdf","filesize":[{"value":"465.2 kB"}],"format":"application/pdf","license_note":"Copyright(c)2007 IEICE http://search.ieice.org/index.html","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"本文_fulltext","url":"https://nitech.repo.nii.ac.jp/record/5340/files/E90-D_825.pdf"},"version_id":"9076523a-9098-45f0-a415-5845355861b4"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"journal article","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"A Hidden Semi-Markov Model-Based Speech Synthesis System","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"A Hidden Semi-Markov Model-Based Speech Synthesis System","subitem_title_language":"en"}]},"item_type_id":"10001","owner":"3","path":["31"],"pubdate":{"attribute_name":"公開日","attribute_value":"2012-11-07"},"publish_date":"2012-11-07","publish_status":"0","recid":"5340","relation_version_is_last":true,"title":["A Hidden Semi-Markov Model-Based Speech Synthesis System"],"weko_creator_id":"3","weko_shared_id":3},"updated":"2023-07-10T03:01:05.045030+00:00"}