From 4ac49907ca339345f934467ff73f56654b7afc49 Mon Sep 17 00:00:00 2001 From: Keith Burzinski Date: Wed, 15 Nov 2023 21:29:50 -0600 Subject: [PATCH 1/3] Add more VA triggers (#5762) --- .../components/voice_assistant/__init__.py | 61 ++++++++++++++++--- .../voice_assistant/voice_assistant.cpp | 39 ++++++++---- .../voice_assistant/voice_assistant.h | 16 +++-- 3 files changed, 91 insertions(+), 25 deletions(-) diff --git a/esphome/components/voice_assistant/__init__.py b/esphome/components/voice_assistant/__init__.py index 3270b9f370..5715604605 100644 --- a/esphome/components/voice_assistant/__init__.py +++ b/esphome/components/voice_assistant/__init__.py @@ -18,20 +18,25 @@ DEPENDENCIES = ["api", "microphone"] CODEOWNERS = ["@jesserockz"] -CONF_SILENCE_DETECTION = "silence_detection" -CONF_ON_LISTENING = "on_listening" -CONF_ON_START = "on_start" -CONF_ON_WAKE_WORD_DETECTED = "on_wake_word_detected" -CONF_ON_STT_END = "on_stt_end" -CONF_ON_TTS_START = "on_tts_start" -CONF_ON_TTS_END = "on_tts_end" CONF_ON_END = "on_end" CONF_ON_ERROR = "on_error" +CONF_ON_INTENT_END = "on_intent_end" +CONF_ON_INTENT_START = "on_intent_start" +CONF_ON_LISTENING = "on_listening" +CONF_ON_START = "on_start" +CONF_ON_STT_END = "on_stt_end" +CONF_ON_STT_VAD_END = "on_stt_vad_end" +CONF_ON_STT_VAD_START = "on_stt_vad_start" +CONF_ON_TTS_END = "on_tts_end" +CONF_ON_TTS_START = "on_tts_start" +CONF_ON_WAKE_WORD_DETECTED = "on_wake_word_detected" + +CONF_SILENCE_DETECTION = "silence_detection" CONF_USE_WAKE_WORD = "use_wake_word" CONF_VAD_THRESHOLD = "vad_threshold" -CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level" CONF_AUTO_GAIN = "auto_gain" +CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level" CONF_VOLUME_MULTIPLIER = "volume_multiplier" @@ -88,6 +93,18 @@ CONFIG_SCHEMA = cv.All( cv.Optional(CONF_ON_CLIENT_DISCONNECTED): automation.validate_automation( single=True ), + cv.Optional(CONF_ON_INTENT_START): automation.validate_automation( + single=True + ), + cv.Optional(CONF_ON_INTENT_END): automation.validate_automation( + single=True + ), + cv.Optional(CONF_ON_STT_VAD_START): automation.validate_automation( + single=True + ), + cv.Optional(CONF_ON_STT_VAD_END): automation.validate_automation( + single=True + ), } ).extend(cv.COMPONENT_SCHEMA), ) @@ -177,6 +194,34 @@ async def to_code(config): config[CONF_ON_CLIENT_DISCONNECTED], ) + if CONF_ON_INTENT_START in config: + await automation.build_automation( + var.get_intent_start_trigger(), + [], + config[CONF_ON_INTENT_START], + ) + + if CONF_ON_INTENT_END in config: + await automation.build_automation( + var.get_intent_end_trigger(), + [], + config[CONF_ON_INTENT_END], + ) + + if CONF_ON_STT_VAD_START in config: + await automation.build_automation( + var.get_stt_vad_start_trigger(), + [], + config[CONF_ON_STT_VAD_START], + ) + + if CONF_ON_STT_VAD_END in config: + await automation.build_automation( + var.get_stt_vad_end_trigger(), + [], + config[CONF_ON_STT_VAD_END], + ) + cg.add_define("USE_VOICE_ASSISTANT") diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index fc5dd6e4e4..7ebbe762b3 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -31,7 +31,7 @@ void VoiceAssistant::setup() { this->socket_ = socket::socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); if (socket_ == nullptr) { - ESP_LOGW(TAG, "Could not create socket."); + ESP_LOGW(TAG, "Could not create socket"); this->mark_failed(); return; } @@ -69,7 +69,7 @@ void VoiceAssistant::setup() { ExternalRAMAllocator speaker_allocator(ExternalRAMAllocator::ALLOW_FAILURE); this->speaker_buffer_ = speaker_allocator.allocate(SPEAKER_BUFFER_SIZE); if (this->speaker_buffer_ == nullptr) { - ESP_LOGW(TAG, "Could not allocate speaker buffer."); + ESP_LOGW(TAG, "Could not allocate speaker buffer"); this->mark_failed(); return; } @@ -79,7 +79,7 @@ void VoiceAssistant::setup() { ExternalRAMAllocator allocator(ExternalRAMAllocator::ALLOW_FAILURE); this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE); if (this->input_buffer_ == nullptr) { - ESP_LOGW(TAG, "Could not allocate input buffer."); + ESP_LOGW(TAG, "Could not allocate input buffer"); this->mark_failed(); return; } @@ -89,7 +89,7 @@ void VoiceAssistant::setup() { this->ring_buffer_ = rb_create(BUFFER_SIZE, sizeof(int16_t)); if (this->ring_buffer_ == nullptr) { - ESP_LOGW(TAG, "Could not allocate ring buffer."); + ESP_LOGW(TAG, "Could not allocate ring buffer"); this->mark_failed(); return; } @@ -98,7 +98,7 @@ void VoiceAssistant::setup() { ExternalRAMAllocator send_allocator(ExternalRAMAllocator::ALLOW_FAILURE); this->send_buffer_ = send_allocator.allocate(SEND_BUFFER_SIZE); if (send_buffer_ == nullptr) { - ESP_LOGW(TAG, "Could not allocate send buffer."); + ESP_LOGW(TAG, "Could not allocate send buffer"); this->mark_failed(); return; } @@ -221,8 +221,8 @@ void VoiceAssistant::loop() { msg.audio_settings = audio_settings; if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) { - ESP_LOGW(TAG, "Could not request start."); - this->error_trigger_->trigger("not-connected", "Could not request start."); + ESP_LOGW(TAG, "Could not request start"); + this->error_trigger_->trigger("not-connected", "Could not request start"); this->continuous_ = false; this->set_state_(State::IDLE, State::IDLE); break; @@ -280,7 +280,7 @@ void VoiceAssistant::loop() { this->speaker_buffer_size_ += len; } } else { - ESP_LOGW(TAG, "Receive buffer full."); + ESP_LOGW(TAG, "Receive buffer full"); } if (this->speaker_buffer_size_ > 0) { size_t written = this->speaker_->play(this->speaker_buffer_, this->speaker_buffer_size_); @@ -290,7 +290,7 @@ void VoiceAssistant::loop() { this->speaker_buffer_index_ -= written; this->set_timeout("speaker-timeout", 2000, [this]() { this->speaker_->stop(); }); } else { - ESP_LOGW(TAG, "Speaker buffer full."); + ESP_LOGW(TAG, "Speaker buffer full"); } } if (this->wait_for_stream_end_) { @@ -513,7 +513,7 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { break; } case api::enums::VOICE_ASSISTANT_STT_START: - ESP_LOGD(TAG, "STT Started"); + ESP_LOGD(TAG, "STT started"); this->listening_trigger_->trigger(); break; case api::enums::VOICE_ASSISTANT_STT_END: { @@ -525,19 +525,24 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { } } if (text.empty()) { - ESP_LOGW(TAG, "No text in STT_END event."); + ESP_LOGW(TAG, "No text in STT_END event"); return; } ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str()); this->stt_end_trigger_->trigger(text); break; } + case api::enums::VOICE_ASSISTANT_INTENT_START: + ESP_LOGD(TAG, "Intent started"); + this->intent_start_trigger_->trigger(); + break; case api::enums::VOICE_ASSISTANT_INTENT_END: { for (auto arg : msg.data) { if (arg.name == "conversation_id") { this->conversation_id_ = std::move(arg.value); } } + this->intent_end_trigger_->trigger(); break; } case api::enums::VOICE_ASSISTANT_TTS_START: { @@ -548,7 +553,7 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { } } if (text.empty()) { - ESP_LOGW(TAG, "No text in TTS_START event."); + ESP_LOGW(TAG, "No text in TTS_START event"); return; } ESP_LOGD(TAG, "Response: \"%s\"", text.c_str()); @@ -566,7 +571,7 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { } } if (url.empty()) { - ESP_LOGW(TAG, "No url in TTS_END event."); + ESP_LOGW(TAG, "No url in TTS_END event"); return; } ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str()); @@ -634,6 +639,14 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { this->set_state_(State::RESPONSE_FINISHED, State::IDLE); break; } + case api::enums::VOICE_ASSISTANT_STT_VAD_START: + ESP_LOGD(TAG, "Starting STT by VAD"); + this->stt_vad_start_trigger_->trigger(); + break; + case api::enums::VOICE_ASSISTANT_STT_VAD_END: + ESP_LOGD(TAG, "STT by VAD end"); + this->stt_vad_end_trigger_->trigger(); + break; default: ESP_LOGD(TAG, "Unhandled event type: %d", msg.event_type); break; diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h index a265522bca..a985bc4678 100644 --- a/esphome/components/voice_assistant/voice_assistant.h +++ b/esphome/components/voice_assistant/voice_assistant.h @@ -100,13 +100,17 @@ class VoiceAssistant : public Component { void set_auto_gain(uint8_t auto_gain) { this->auto_gain_ = auto_gain; } void set_volume_multiplier(float volume_multiplier) { this->volume_multiplier_ = volume_multiplier; } + Trigger<> *get_intent_end_trigger() const { return this->intent_end_trigger_; } + Trigger<> *get_intent_start_trigger() const { return this->intent_start_trigger_; } Trigger<> *get_listening_trigger() const { return this->listening_trigger_; } + Trigger<> *get_end_trigger() const { return this->end_trigger_; } Trigger<> *get_start_trigger() const { return this->start_trigger_; } + Trigger<> *get_stt_vad_end_trigger() const { return this->stt_vad_end_trigger_; } + Trigger<> *get_stt_vad_start_trigger() const { return this->stt_vad_start_trigger_; } Trigger<> *get_wake_word_detected_trigger() const { return this->wake_word_detected_trigger_; } Trigger *get_stt_end_trigger() const { return this->stt_end_trigger_; } - Trigger *get_tts_start_trigger() const { return this->tts_start_trigger_; } Trigger *get_tts_end_trigger() const { return this->tts_end_trigger_; } - Trigger<> *get_end_trigger() const { return this->end_trigger_; } + Trigger *get_tts_start_trigger() const { return this->tts_start_trigger_; } Trigger *get_error_trigger() const { return this->error_trigger_; } Trigger<> *get_client_connected_trigger() const { return this->client_connected_trigger_; } @@ -124,13 +128,17 @@ class VoiceAssistant : public Component { std::unique_ptr socket_ = nullptr; struct sockaddr_storage dest_addr_; + Trigger<> *intent_end_trigger_ = new Trigger<>(); + Trigger<> *intent_start_trigger_ = new Trigger<>(); Trigger<> *listening_trigger_ = new Trigger<>(); + Trigger<> *end_trigger_ = new Trigger<>(); Trigger<> *start_trigger_ = new Trigger<>(); + Trigger<> *stt_vad_start_trigger_ = new Trigger<>(); + Trigger<> *stt_vad_end_trigger_ = new Trigger<>(); Trigger<> *wake_word_detected_trigger_ = new Trigger<>(); Trigger *stt_end_trigger_ = new Trigger(); - Trigger *tts_start_trigger_ = new Trigger(); Trigger *tts_end_trigger_ = new Trigger(); - Trigger<> *end_trigger_ = new Trigger<>(); + Trigger *tts_start_trigger_ = new Trigger(); Trigger *error_trigger_ = new Trigger(); Trigger<> *client_connected_trigger_ = new Trigger<>(); From 255483de63f9c1fd846cbbdaae98be96df41d6cb Mon Sep 17 00:00:00 2001 From: Mat931 <49403702+Mat931@users.noreply.github.com> Date: Thu, 16 Nov 2023 07:45:08 +0000 Subject: [PATCH 2/3] Fix MY9231 flicker (#5765) --- esphome/components/my9231/my9231.cpp | 26 +++++++++++++++++++++----- esphome/components/my9231/my9231.h | 1 + 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/esphome/components/my9231/my9231.cpp b/esphome/components/my9231/my9231.cpp index a97587b7be..c511591856 100644 --- a/esphome/components/my9231/my9231.cpp +++ b/esphome/components/my9231/my9231.cpp @@ -1,5 +1,6 @@ #include "my9231.h" #include "esphome/core/log.h" +#include "esphome/core/helpers.h" namespace esphome { namespace my9231 { @@ -51,7 +52,11 @@ void MY9231OutputComponent::setup() { MY9231_CMD_SCATTER_APDM | MY9231_CMD_FREQUENCY_DIVIDE_1 | MY9231_CMD_REACTION_FAST | MY9231_CMD_ONE_SHOT_DISABLE; ESP_LOGV(TAG, " Command: 0x%02X", command); - this->init_chips_(command); + { + InterruptLock lock; + this->send_dcki_pulses_(32 * this->num_chips_); + this->init_chips_(command); + } ESP_LOGV(TAG, " Chips initialized."); } void MY9231OutputComponent::dump_config() { @@ -66,11 +71,14 @@ void MY9231OutputComponent::loop() { if (!this->update_) return; - for (auto pwm_amount : this->pwm_amounts_) { - this->write_word_(pwm_amount, this->bit_depth_); + { + InterruptLock lock; + for (auto pwm_amount : this->pwm_amounts_) { + this->write_word_(pwm_amount, this->bit_depth_); + } + // Send 8 DI pulses. After 8 falling edges, the duty data are store. + this->send_di_pulses_(8); } - // Send 8 DI pulses. After 8 falling edges, the duty data are store. - this->send_di_pulses_(8); this->update_ = false; } void MY9231OutputComponent::set_channel_value_(uint8_t channel, uint16_t value) { @@ -92,6 +100,7 @@ void MY9231OutputComponent::init_chips_(uint8_t command) { // Send 16 DI pulse. After 14 falling edges, the command data are // stored and after 16 falling edges the duty mode is activated. this->send_di_pulses_(16); + delayMicroseconds(12); } void MY9231OutputComponent::write_word_(uint16_t value, uint8_t bits) { for (uint8_t i = bits; i > 0; i--) { @@ -106,6 +115,13 @@ void MY9231OutputComponent::send_di_pulses_(uint8_t count) { this->pin_di_->digital_write(false); } } +void MY9231OutputComponent::send_dcki_pulses_(uint8_t count) { + delayMicroseconds(12); + for (uint8_t i = 0; i < count; i++) { + this->pin_dcki_->digital_write(true); + this->pin_dcki_->digital_write(false); + } +} } // namespace my9231 } // namespace esphome diff --git a/esphome/components/my9231/my9231.h b/esphome/components/my9231/my9231.h index a777dcc960..77c1259853 100644 --- a/esphome/components/my9231/my9231.h +++ b/esphome/components/my9231/my9231.h @@ -49,6 +49,7 @@ class MY9231OutputComponent : public Component { void init_chips_(uint8_t command); void write_word_(uint16_t value, uint8_t bits); void send_di_pulses_(uint8_t count); + void send_dcki_pulses_(uint8_t count); GPIOPin *pin_di_; GPIOPin *pin_dcki_; From 445b13dbc6330df2361e06fb9499ae31c6a1153b Mon Sep 17 00:00:00 2001 From: Jesse Hills <3060199+jesserockz@users.noreply.github.com> Date: Thu, 16 Nov 2023 20:55:28 +1300 Subject: [PATCH 3/3] Bump version to 2023.11.1 --- esphome/const.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esphome/const.py b/esphome/const.py index f937ecf068..7ceaffbe57 100644 --- a/esphome/const.py +++ b/esphome/const.py @@ -1,6 +1,6 @@ """Constants used by esphome.""" -__version__ = "2023.11.0" +__version__ = "2023.11.1" ALLOWED_NAME_CHARS = "abcdefghijklmnopqrstuvwxyz0123456789-_" VALID_SUBSTITUTIONS_CHARACTERS = (