From 01fc0578bdfb37d1dd582e538fff421fb848bea1 Mon Sep 17 00:00:00 2001 From: Jesse Hills <3060199+jesserockz@users.noreply.github.com> Date: Wed, 6 Mar 2024 07:41:18 +1300 Subject: [PATCH] Add wake word phrase to voice assistant start command (#6290) --- esphome/components/api/api.proto | 1 + esphome/components/api/api_pb2.cpp | 9 +++++++++ esphome/components/api/api_pb2.h | 1 + .../components/micro_wake_word/micro_wake_word.cpp | 2 +- esphome/components/voice_assistant/__init__.py | 6 ++++++ .../components/voice_assistant/voice_assistant.cpp | 2 ++ esphome/components/voice_assistant/voice_assistant.h | 11 ++++++++++- 7 files changed, 30 insertions(+), 2 deletions(-) diff --git a/esphome/components/api/api.proto b/esphome/components/api/api.proto index 8d79163590..6237ee4a52 100644 --- a/esphome/components/api/api.proto +++ b/esphome/components/api/api.proto @@ -1450,6 +1450,7 @@ message VoiceAssistantRequest { string conversation_id = 2; uint32 flags = 3; VoiceAssistantAudioSettings audio_settings = 4; + string wake_word_phrase = 5; } message VoiceAssistantResponse { diff --git a/esphome/components/api/api_pb2.cpp b/esphome/components/api/api_pb2.cpp index d3aa1fa2bf..2c5e283e3e 100644 --- a/esphome/components/api/api_pb2.cpp +++ b/esphome/components/api/api_pb2.cpp @@ -6603,6 +6603,10 @@ bool VoiceAssistantRequest::decode_length(uint32_t field_id, ProtoLengthDelimite this->audio_settings = value.as_message(); return true; } + case 5: { + this->wake_word_phrase = value.as_string(); + return true; + } default: return false; } @@ -6612,6 +6616,7 @@ void VoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const { buffer.encode_string(2, this->conversation_id); buffer.encode_uint32(3, this->flags); buffer.encode_message(4, this->audio_settings); + buffer.encode_string(5, this->wake_word_phrase); } #ifdef HAS_PROTO_MESSAGE_DUMP void VoiceAssistantRequest::dump_to(std::string &out) const { @@ -6633,6 +6638,10 @@ void VoiceAssistantRequest::dump_to(std::string &out) const { out.append(" audio_settings: "); this->audio_settings.dump_to(out); out.append("\n"); + + out.append(" wake_word_phrase: "); + out.append("'").append(this->wake_word_phrase).append("'"); + out.append("\n"); out.append("}"); } #endif diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h index ee975c1726..161443e86d 100644 --- a/esphome/components/api/api_pb2.h +++ b/esphome/components/api/api_pb2.h @@ -1702,6 +1702,7 @@ class VoiceAssistantRequest : public ProtoMessage { std::string conversation_id{}; uint32_t flags{0}; VoiceAssistantAudioSettings audio_settings{}; + std::string wake_word_phrase{}; void encode(ProtoWriteBuffer buffer) const override; #ifdef HAS_PROTO_MESSAGE_DUMP void dump_to(std::string &out) const override; diff --git a/esphome/components/micro_wake_word/micro_wake_word.cpp b/esphome/components/micro_wake_word/micro_wake_word.cpp index f0b3d55a9d..7321e5b05b 100644 --- a/esphome/components/micro_wake_word/micro_wake_word.cpp +++ b/esphome/components/micro_wake_word/micro_wake_word.cpp @@ -134,7 +134,7 @@ void MicroWakeWord::loop() { this->set_state_(State::IDLE); if (this->detected_) { this->detected_ = false; - this->wake_word_detected_trigger_->trigger(""); + this->wake_word_detected_trigger_->trigger(this->wake_word_); } } break; diff --git a/esphome/components/voice_assistant/__init__.py b/esphome/components/voice_assistant/__init__.py index b21a5b27da..17bdffd9da 100644 --- a/esphome/components/voice_assistant/__init__.py +++ b/esphome/components/voice_assistant/__init__.py @@ -42,6 +42,8 @@ CONF_AUTO_GAIN = "auto_gain" CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level" CONF_VOLUME_MULTIPLIER = "volume_multiplier" +CONF_WAKE_WORD = "wake_word" + voice_assistant_ns = cg.esphome_ns.namespace("voice_assistant") VoiceAssistant = voice_assistant_ns.class_("VoiceAssistant", cg.Component) @@ -285,6 +287,7 @@ VOICE_ASSISTANT_ACTION_SCHEMA = cv.Schema({cv.GenerateID(): cv.use_id(VoiceAssis VOICE_ASSISTANT_ACTION_SCHEMA.extend( { cv.Optional(CONF_SILENCE_DETECTION, default=True): cv.boolean, + cv.Optional(CONF_WAKE_WORD): cv.templatable(cv.string), } ), ) @@ -293,6 +296,9 @@ async def voice_assistant_listen_to_code(config, action_id, template_arg, args): await cg.register_parented(var, config[CONF_ID]) if CONF_SILENCE_DETECTION in config: cg.add(var.set_silence_detection(config[CONF_SILENCE_DETECTION])) + if wake_word := config.get(CONF_WAKE_WORD): + templ = await cg.templatable(wake_word, args, cg.std_string) + cg.add(var.set_wake_word(templ)) return var diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index 260605c0b4..49b8fdc959 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -215,6 +215,8 @@ void VoiceAssistant::loop() { msg.conversation_id = this->conversation_id_; msg.flags = flags; msg.audio_settings = audio_settings; + msg.wake_word_phrase = this->wake_word_; + this->wake_word_ = ""; if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) { ESP_LOGW(TAG, "Could not request start"); diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h index f0ee793f53..14352bf3ae 100644 --- a/esphome/components/voice_assistant/voice_assistant.h +++ b/esphome/components/voice_assistant/voice_assistant.h @@ -124,6 +124,8 @@ class VoiceAssistant : public Component { void client_subscription(api::APIConnection *client, bool subscribe); api::APIConnection *get_api_connection() const { return this->api_client_; } + void set_wake_word(const std::string &wake_word) { this->wake_word_ = wake_word; } + protected: int read_microphone_(); void set_state_(State state); @@ -175,6 +177,8 @@ class VoiceAssistant : public Component { std::string conversation_id_{""}; + std::string wake_word_{""}; + HighFrequencyLoopRequester high_freq_; #ifdef USE_ESP_ADF @@ -200,8 +204,13 @@ class VoiceAssistant : public Component { }; template class StartAction : public Action, public Parented { + TEMPLATABLE_VALUE(std::string, wake_word); + public: - void play(Ts... x) override { this->parent_->request_start(false, this->silence_detection_); } + void play(Ts... x) override { + this->parent_->set_wake_word(this->wake_word_.value(x...)); + this->parent_->request_start(false, this->silence_detection_); + } void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }