Add stream start and end events (#5545)

This commit is contained in:
Jesse Hills 2023-10-17 17:18:05 +13:00 committed by GitHub
parent e42c51a222
commit 4913b3cc35
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 81 additions and 5 deletions

View file

@ -1459,6 +1459,8 @@ enum VoiceAssistantEvent {
VOICE_ASSISTANT_WAKE_WORD_END = 10; VOICE_ASSISTANT_WAKE_WORD_END = 10;
VOICE_ASSISTANT_STT_VAD_START = 11; VOICE_ASSISTANT_STT_VAD_START = 11;
VOICE_ASSISTANT_STT_VAD_END = 12; VOICE_ASSISTANT_STT_VAD_END = 12;
VOICE_ASSISTANT_TTS_STREAM_START = 98;
VOICE_ASSISTANT_TTS_STREAM_END = 99;
} }
message VoiceAssistantEventData { message VoiceAssistantEventData {

View file

@ -452,6 +452,10 @@ template<> const char *proto_enum_to_string<enums::VoiceAssistantEvent>(enums::V
return "VOICE_ASSISTANT_STT_VAD_START"; return "VOICE_ASSISTANT_STT_VAD_START";
case enums::VOICE_ASSISTANT_STT_VAD_END: case enums::VOICE_ASSISTANT_STT_VAD_END:
return "VOICE_ASSISTANT_STT_VAD_END"; return "VOICE_ASSISTANT_STT_VAD_END";
case enums::VOICE_ASSISTANT_TTS_STREAM_START:
return "VOICE_ASSISTANT_TTS_STREAM_START";
case enums::VOICE_ASSISTANT_TTS_STREAM_END:
return "VOICE_ASSISTANT_TTS_STREAM_END";
default: default:
return "UNKNOWN"; return "UNKNOWN";
} }

View file

@ -184,6 +184,8 @@ enum VoiceAssistantEvent : uint32_t {
VOICE_ASSISTANT_WAKE_WORD_END = 10, VOICE_ASSISTANT_WAKE_WORD_END = 10,
VOICE_ASSISTANT_STT_VAD_START = 11, VOICE_ASSISTANT_STT_VAD_START = 11,
VOICE_ASSISTANT_STT_VAD_END = 12, VOICE_ASSISTANT_STT_VAD_END = 12,
VOICE_ASSISTANT_TTS_STREAM_START = 98,
VOICE_ASSISTANT_TTS_STREAM_END = 99,
}; };
enum AlarmControlPanelState : uint32_t { enum AlarmControlPanelState : uint32_t {
ALARM_STATE_DISARMED = 0, ALARM_STATE_DISARMED = 0,

View file

@ -158,8 +158,13 @@ void I2SAudioSpeaker::watch_() {
if (xQueueReceive(this->event_queue_, &event, 0) == pdTRUE) { if (xQueueReceive(this->event_queue_, &event, 0) == pdTRUE) {
switch (event.type) { switch (event.type) {
case TaskEventType::STARTING: case TaskEventType::STARTING:
ESP_LOGD(TAG, "Starting I2S Audio Speaker");
break;
case TaskEventType::STARTED: case TaskEventType::STARTED:
ESP_LOGD(TAG, "Started I2S Audio Speaker");
break;
case TaskEventType::STOPPING: case TaskEventType::STOPPING:
ESP_LOGD(TAG, "Stopping I2S Audio Speaker");
break; break;
case TaskEventType::PLAYING: case TaskEventType::PLAYING:
this->status_clear_warning(); this->status_clear_warning();
@ -170,6 +175,7 @@ void I2SAudioSpeaker::watch_() {
this->player_task_handle_ = nullptr; this->player_task_handle_ = nullptr;
this->parent_->unlock(); this->parent_->unlock();
xQueueReset(this->buffer_queue_); xQueueReset(this->buffer_queue_);
ESP_LOGD(TAG, "Stopped I2S Audio Speaker");
break; break;
case TaskEventType::WARNING: case TaskEventType::WARNING:
ESP_LOGW(TAG, "Error writing to I2S: %s", esp_err_to_name(event.err)); ESP_LOGW(TAG, "Error writing to I2S: %s", esp_err_to_name(event.err));

View file

@ -281,11 +281,14 @@ void VoiceAssistant::loop() {
memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written); memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written);
this->speaker_buffer_size_ -= written; this->speaker_buffer_size_ -= written;
this->speaker_buffer_index_ -= written; this->speaker_buffer_index_ -= written;
this->set_timeout("speaker-timeout", 1000, [this]() { this->speaker_->stop(); }); this->set_timeout("speaker-timeout", 2000, [this]() { this->speaker_->stop(); });
} else { } else {
ESP_LOGW(TAG, "Speaker buffer full."); ESP_LOGW(TAG, "Speaker buffer full.");
} }
} }
if (this->wait_for_stream_end_) {
break; // We dont want to timeout here as the STREAM_END event will take care of that.
}
playing = this->speaker_->is_running(); playing = this->speaker_->is_running();
} }
#endif #endif
@ -295,28 +298,77 @@ void VoiceAssistant::loop() {
} }
#endif #endif
if (playing) { if (playing) {
this->set_timeout("playing", 100, [this]() { this->set_timeout("playing", 2000, [this]() {
this->cancel_timeout("speaker-timeout"); this->cancel_timeout("speaker-timeout");
this->set_state_(State::IDLE, State::IDLE); this->set_state_(State::IDLE, State::IDLE);
}); });
} }
break; break;
} }
case State::RESPONSE_FINISHED: {
#ifdef USE_SPEAKER
if (this->speaker_ != nullptr) {
this->speaker_->stop();
this->cancel_timeout("speaker-timeout");
this->cancel_timeout("playing");
this->speaker_buffer_size_ = 0;
this->speaker_buffer_index_ = 0;
memset(this->speaker_buffer_, 0, SPEAKER_BUFFER_SIZE);
}
#endif
this->wait_for_stream_end_ = false;
this->set_state_(State::IDLE, State::IDLE);
break;
}
default: default:
break; break;
} }
} }
static const LogString *voice_assistant_state_to_string(State state) {
switch (state) {
case State::IDLE:
return LOG_STR("IDLE");
case State::START_MICROPHONE:
return LOG_STR("START_MICROPHONE");
case State::STARTING_MICROPHONE:
return LOG_STR("STARTING_MICROPHONE");
case State::WAIT_FOR_VAD:
return LOG_STR("WAIT_FOR_VAD");
case State::WAITING_FOR_VAD:
return LOG_STR("WAITING_FOR_VAD");
case State::START_PIPELINE:
return LOG_STR("START_PIPELINE");
case State::STARTING_PIPELINE:
return LOG_STR("STARTING_PIPELINE");
case State::STREAMING_MICROPHONE:
return LOG_STR("STREAMING_MICROPHONE");
case State::STOP_MICROPHONE:
return LOG_STR("STOP_MICROPHONE");
case State::STOPPING_MICROPHONE:
return LOG_STR("STOPPING_MICROPHONE");
case State::AWAITING_RESPONSE:
return LOG_STR("AWAITING_RESPONSE");
case State::STREAMING_RESPONSE:
return LOG_STR("STREAMING_RESPONSE");
case State::RESPONSE_FINISHED:
return LOG_STR("RESPONSE_FINISHED");
default:
return LOG_STR("UNKNOWN");
}
};
void VoiceAssistant::set_state_(State state) { void VoiceAssistant::set_state_(State state) {
State old_state = this->state_; State old_state = this->state_;
this->state_ = state; this->state_ = state;
ESP_LOGD(TAG, "State changed from %d to %d", static_cast<uint8_t>(old_state), static_cast<uint8_t>(state)); ESP_LOGD(TAG, "State changed from %s to %s", LOG_STR_ARG(voice_assistant_state_to_string(old_state)),
LOG_STR_ARG(voice_assistant_state_to_string(state)));
} }
void VoiceAssistant::set_state_(State state, State desired_state) { void VoiceAssistant::set_state_(State state, State desired_state) {
this->set_state_(state); this->set_state_(state);
this->desired_state_ = desired_state; this->desired_state_ = desired_state;
ESP_LOGD(TAG, "Desired state set to %d", static_cast<uint8_t>(desired_state)); ESP_LOGD(TAG, "Desired state set to %s", LOG_STR_ARG(voice_assistant_state_to_string(desired_state)));
} }
void VoiceAssistant::failed_to_start() { void VoiceAssistant::failed_to_start() {
@ -400,6 +452,7 @@ void VoiceAssistant::request_stop() {
break; break;
case State::AWAITING_RESPONSE: case State::AWAITING_RESPONSE:
case State::STREAMING_RESPONSE: case State::STREAMING_RESPONSE:
case State::RESPONSE_FINISHED:
break; // Let the incoming audio stream finish then it will go to idle. break; // Let the incoming audio stream finish then it will go to idle.
} }
} }
@ -531,6 +584,14 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
this->error_trigger_->trigger(code, message); this->error_trigger_->trigger(code, message);
break; break;
} }
case api::enums::VOICE_ASSISTANT_TTS_STREAM_START: {
this->wait_for_stream_end_ = true;
break;
}
case api::enums::VOICE_ASSISTANT_TTS_STREAM_END: {
this->set_state_(State::RESPONSE_FINISHED, State::IDLE);
break;
}
default: default:
ESP_LOGD(TAG, "Unhandled event type: %d", msg.event_type); ESP_LOGD(TAG, "Unhandled event type: %d", msg.event_type);
break; break;

View file

@ -46,6 +46,7 @@ enum class State {
STOPPING_MICROPHONE, STOPPING_MICROPHONE,
AWAITING_RESPONSE, AWAITING_RESPONSE,
STREAMING_RESPONSE, STREAMING_RESPONSE,
RESPONSE_FINISHED,
}; };
class VoiceAssistant : public Component { class VoiceAssistant : public Component {
@ -132,10 +133,10 @@ class VoiceAssistant : public Component {
uint8_t *speaker_buffer_; uint8_t *speaker_buffer_;
size_t speaker_buffer_index_{0}; size_t speaker_buffer_index_{0};
size_t speaker_buffer_size_{0}; size_t speaker_buffer_size_{0};
bool wait_for_stream_end_{false};
#endif #endif
#ifdef USE_MEDIA_PLAYER #ifdef USE_MEDIA_PLAYER
media_player::MediaPlayer *media_player_{nullptr}; media_player::MediaPlayer *media_player_{nullptr};
bool playing_tts_{false};
#endif #endif
bool local_output_{false}; bool local_output_{false};