diff --git a/esphome/components/voice_assistant/__init__.py b/esphome/components/voice_assistant/__init__.py
index 5715604605..d05f39072c 100644
--- a/esphome/components/voice_assistant/__init__.py
+++ b/esphome/components/voice_assistant/__init__.py
@@ -29,6 +29,8 @@ CONF_ON_STT_VAD_END = "on_stt_vad_end"
 CONF_ON_STT_VAD_START = "on_stt_vad_start"
 CONF_ON_TTS_END = "on_tts_end"
 CONF_ON_TTS_START = "on_tts_start"
+CONF_ON_TTS_STREAM_START = "on_tts_stream_start"
+CONF_ON_TTS_STREAM_END = "on_tts_stream_end"
 CONF_ON_WAKE_WORD_DETECTED = "on_wake_word_detected"
 
 CONF_SILENCE_DETECTION = "silence_detection"
@@ -56,6 +58,17 @@ IsRunningCondition = voice_assistant_ns.class_(
     "IsRunningCondition", automation.Condition, cg.Parented.template(VoiceAssistant)
 )
 
+
+def tts_stream_validate(config):
+    if CONF_SPEAKER not in config and (
+        CONF_ON_TTS_STREAM_START in config or CONF_ON_TTS_STREAM_END in config
+    ):
+        raise cv.Invalid(
+            f"{CONF_SPEAKER} is required when using {CONF_ON_TTS_STREAM_START} and/or {CONF_ON_TTS_STREAM_END}"
+        )
+    return config
+
+
 CONFIG_SCHEMA = cv.All(
     cv.Schema(
         {
@@ -105,8 +118,15 @@ CONFIG_SCHEMA = cv.All(
             cv.Optional(CONF_ON_STT_VAD_END): automation.validate_automation(
                 single=True
             ),
+            cv.Optional(CONF_ON_TTS_STREAM_START): automation.validate_automation(
+                single=True
+            ),
+            cv.Optional(CONF_ON_TTS_STREAM_END): automation.validate_automation(
+                single=True
+            ),
         }
     ).extend(cv.COMPONENT_SCHEMA),
+    tts_stream_validate,
 )
 
 
@@ -222,6 +242,20 @@ async def to_code(config):
             config[CONF_ON_STT_VAD_END],
         )
 
+    if CONF_ON_TTS_STREAM_START in config:
+        await automation.build_automation(
+            var.get_tts_stream_start_trigger(),
+            [],
+            config[CONF_ON_TTS_STREAM_START],
+        )
+
+    if CONF_ON_TTS_STREAM_END in config:
+        await automation.build_automation(
+            var.get_tts_stream_end_trigger(),
+            [],
+            config[CONF_ON_TTS_STREAM_END],
+        )
+
     cg.add_define("USE_VOICE_ASSISTANT")
 
 
diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp
index 7ebbe762b3..9b13a71039 100644
--- a/esphome/components/voice_assistant/voice_assistant.cpp
+++ b/esphome/components/voice_assistant/voice_assistant.cpp
@@ -632,11 +632,17 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
     case api::enums::VOICE_ASSISTANT_TTS_STREAM_START: {
 #ifdef USE_SPEAKER
       this->wait_for_stream_end_ = true;
+      ESP_LOGD(TAG, "TTS stream start");
+      this->tts_stream_start_trigger_->trigger();
 #endif
       break;
     }
     case api::enums::VOICE_ASSISTANT_TTS_STREAM_END: {
       this->set_state_(State::RESPONSE_FINISHED, State::IDLE);
+#ifdef USE_SPEAKER
+      ESP_LOGD(TAG, "TTS stream end");
+      this->tts_stream_end_trigger_->trigger();
+#endif
       break;
     }
     case api::enums::VOICE_ASSISTANT_STT_VAD_START:
diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h
index a985bc4678..f6dcd1c563 100644
--- a/esphome/components/voice_assistant/voice_assistant.h
+++ b/esphome/components/voice_assistant/voice_assistant.h
@@ -107,6 +107,10 @@ class VoiceAssistant : public Component {
   Trigger<> *get_start_trigger() const { return this->start_trigger_; }
   Trigger<> *get_stt_vad_end_trigger() const { return this->stt_vad_end_trigger_; }
   Trigger<> *get_stt_vad_start_trigger() const { return this->stt_vad_start_trigger_; }
+#ifdef USE_SPEAKER
+  Trigger<> *get_tts_stream_start_trigger() const { return this->tts_stream_start_trigger_; }
+  Trigger<> *get_tts_stream_end_trigger() const { return this->tts_stream_end_trigger_; }
+#endif
   Trigger<> *get_wake_word_detected_trigger() const { return this->wake_word_detected_trigger_; }
   Trigger<std::string> *get_stt_end_trigger() const { return this->stt_end_trigger_; }
   Trigger<std::string> *get_tts_end_trigger() const { return this->tts_end_trigger_; }
@@ -135,6 +139,10 @@ class VoiceAssistant : public Component {
   Trigger<> *start_trigger_ = new Trigger<>();
   Trigger<> *stt_vad_start_trigger_ = new Trigger<>();
   Trigger<> *stt_vad_end_trigger_ = new Trigger<>();
+#ifdef USE_SPEAKER
+  Trigger<> *tts_stream_start_trigger_ = new Trigger<>();
+  Trigger<> *tts_stream_end_trigger_ = new Trigger<>();
+#endif
   Trigger<> *wake_word_detected_trigger_ = new Trigger<>();
   Trigger<std::string> *stt_end_trigger_ = new Trigger<std::string>();
   Trigger<std::string> *tts_end_trigger_ = new Trigger<std::string>();