diff --git a/esphome/components/voice_assistant/__init__.py b/esphome/components/voice_assistant/__init__.py index 5715604605..d05f39072c 100644 --- a/esphome/components/voice_assistant/__init__.py +++ b/esphome/components/voice_assistant/__init__.py @@ -29,6 +29,8 @@ CONF_ON_STT_VAD_END = "on_stt_vad_end" CONF_ON_STT_VAD_START = "on_stt_vad_start" CONF_ON_TTS_END = "on_tts_end" CONF_ON_TTS_START = "on_tts_start" +CONF_ON_TTS_STREAM_START = "on_tts_stream_start" +CONF_ON_TTS_STREAM_END = "on_tts_stream_end" CONF_ON_WAKE_WORD_DETECTED = "on_wake_word_detected" CONF_SILENCE_DETECTION = "silence_detection" @@ -56,6 +58,17 @@ IsRunningCondition = voice_assistant_ns.class_( "IsRunningCondition", automation.Condition, cg.Parented.template(VoiceAssistant) ) + +def tts_stream_validate(config): + if CONF_SPEAKER not in config and ( + CONF_ON_TTS_STREAM_START in config or CONF_ON_TTS_STREAM_END in config + ): + raise cv.Invalid( + f"{CONF_SPEAKER} is required when using {CONF_ON_TTS_STREAM_START} and/or {CONF_ON_TTS_STREAM_END}" + ) + return config + + CONFIG_SCHEMA = cv.All( cv.Schema( { @@ -105,8 +118,15 @@ CONFIG_SCHEMA = cv.All( cv.Optional(CONF_ON_STT_VAD_END): automation.validate_automation( single=True ), + cv.Optional(CONF_ON_TTS_STREAM_START): automation.validate_automation( + single=True + ), + cv.Optional(CONF_ON_TTS_STREAM_END): automation.validate_automation( + single=True + ), } ).extend(cv.COMPONENT_SCHEMA), + tts_stream_validate, ) @@ -222,6 +242,20 @@ async def to_code(config): config[CONF_ON_STT_VAD_END], ) + if CONF_ON_TTS_STREAM_START in config: + await automation.build_automation( + var.get_tts_stream_start_trigger(), + [], + config[CONF_ON_TTS_STREAM_START], + ) + + if CONF_ON_TTS_STREAM_END in config: + await automation.build_automation( + var.get_tts_stream_end_trigger(), + [], + config[CONF_ON_TTS_STREAM_END], + ) + cg.add_define("USE_VOICE_ASSISTANT") diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index 7ebbe762b3..9b13a71039 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -632,11 +632,17 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { case api::enums::VOICE_ASSISTANT_TTS_STREAM_START: { #ifdef USE_SPEAKER this->wait_for_stream_end_ = true; + ESP_LOGD(TAG, "TTS stream start"); + this->tts_stream_start_trigger_->trigger(); #endif break; } case api::enums::VOICE_ASSISTANT_TTS_STREAM_END: { this->set_state_(State::RESPONSE_FINISHED, State::IDLE); +#ifdef USE_SPEAKER + ESP_LOGD(TAG, "TTS stream end"); + this->tts_stream_end_trigger_->trigger(); +#endif break; } case api::enums::VOICE_ASSISTANT_STT_VAD_START: diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h index a985bc4678..f6dcd1c563 100644 --- a/esphome/components/voice_assistant/voice_assistant.h +++ b/esphome/components/voice_assistant/voice_assistant.h @@ -107,6 +107,10 @@ class VoiceAssistant : public Component { Trigger<> *get_start_trigger() const { return this->start_trigger_; } Trigger<> *get_stt_vad_end_trigger() const { return this->stt_vad_end_trigger_; } Trigger<> *get_stt_vad_start_trigger() const { return this->stt_vad_start_trigger_; } +#ifdef USE_SPEAKER + Trigger<> *get_tts_stream_start_trigger() const { return this->tts_stream_start_trigger_; } + Trigger<> *get_tts_stream_end_trigger() const { return this->tts_stream_end_trigger_; } +#endif Trigger<> *get_wake_word_detected_trigger() const { return this->wake_word_detected_trigger_; } Trigger *get_stt_end_trigger() const { return this->stt_end_trigger_; } Trigger *get_tts_end_trigger() const { return this->tts_end_trigger_; } @@ -135,6 +139,10 @@ class VoiceAssistant : public Component { Trigger<> *start_trigger_ = new Trigger<>(); Trigger<> *stt_vad_start_trigger_ = new Trigger<>(); Trigger<> *stt_vad_end_trigger_ = new Trigger<>(); +#ifdef USE_SPEAKER + Trigger<> *tts_stream_start_trigger_ = new Trigger<>(); + Trigger<> *tts_stream_end_trigger_ = new Trigger<>(); +#endif Trigger<> *wake_word_detected_trigger_ = new Trigger<>(); Trigger *stt_end_trigger_ = new Trigger(); Trigger *tts_end_trigger_ = new Trigger();