Add wake word phrase to voice assistant start command (#6290)

This commit is contained in:
Jesse Hills 2024-03-06 07:41:18 +13:00
parent b0a25401f7
commit f39dc49f49
No known key found for this signature in database
GPG key ID: BEAAE804EFD8E83A
7 changed files with 30 additions and 2 deletions

View file

@ -1449,6 +1449,7 @@ message VoiceAssistantRequest {
string conversation_id = 2; string conversation_id = 2;
uint32 flags = 3; uint32 flags = 3;
VoiceAssistantAudioSettings audio_settings = 4; VoiceAssistantAudioSettings audio_settings = 4;
string wake_word_phrase = 5;
} }
message VoiceAssistantResponse { message VoiceAssistantResponse {

View file

@ -6594,6 +6594,10 @@ bool VoiceAssistantRequest::decode_length(uint32_t field_id, ProtoLengthDelimite
this->audio_settings = value.as_message<VoiceAssistantAudioSettings>(); this->audio_settings = value.as_message<VoiceAssistantAudioSettings>();
return true; return true;
} }
case 5: {
this->wake_word_phrase = value.as_string();
return true;
}
default: default:
return false; return false;
} }
@ -6603,6 +6607,7 @@ void VoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const {
buffer.encode_string(2, this->conversation_id); buffer.encode_string(2, this->conversation_id);
buffer.encode_uint32(3, this->flags); buffer.encode_uint32(3, this->flags);
buffer.encode_message<VoiceAssistantAudioSettings>(4, this->audio_settings); buffer.encode_message<VoiceAssistantAudioSettings>(4, this->audio_settings);
buffer.encode_string(5, this->wake_word_phrase);
} }
#ifdef HAS_PROTO_MESSAGE_DUMP #ifdef HAS_PROTO_MESSAGE_DUMP
void VoiceAssistantRequest::dump_to(std::string &out) const { void VoiceAssistantRequest::dump_to(std::string &out) const {
@ -6624,6 +6629,10 @@ void VoiceAssistantRequest::dump_to(std::string &out) const {
out.append(" audio_settings: "); out.append(" audio_settings: ");
this->audio_settings.dump_to(out); this->audio_settings.dump_to(out);
out.append("\n"); out.append("\n");
out.append(" wake_word_phrase: ");
out.append("'").append(this->wake_word_phrase).append("'");
out.append("\n");
out.append("}"); out.append("}");
} }
#endif #endif

View file

@ -1701,6 +1701,7 @@ class VoiceAssistantRequest : public ProtoMessage {
std::string conversation_id{}; std::string conversation_id{};
uint32_t flags{0}; uint32_t flags{0};
VoiceAssistantAudioSettings audio_settings{}; VoiceAssistantAudioSettings audio_settings{};
std::string wake_word_phrase{};
void encode(ProtoWriteBuffer buffer) const override; void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP #ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override; void dump_to(std::string &out) const override;

View file

@ -134,7 +134,7 @@ void MicroWakeWord::loop() {
this->set_state_(State::IDLE); this->set_state_(State::IDLE);
if (this->detected_) { if (this->detected_) {
this->detected_ = false; this->detected_ = false;
this->wake_word_detected_trigger_->trigger(""); this->wake_word_detected_trigger_->trigger(this->wake_word_);
} }
} }
break; break;

View file

@ -42,6 +42,8 @@ CONF_AUTO_GAIN = "auto_gain"
CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level" CONF_NOISE_SUPPRESSION_LEVEL = "noise_suppression_level"
CONF_VOLUME_MULTIPLIER = "volume_multiplier" CONF_VOLUME_MULTIPLIER = "volume_multiplier"
CONF_WAKE_WORD = "wake_word"
voice_assistant_ns = cg.esphome_ns.namespace("voice_assistant") voice_assistant_ns = cg.esphome_ns.namespace("voice_assistant")
VoiceAssistant = voice_assistant_ns.class_("VoiceAssistant", cg.Component) VoiceAssistant = voice_assistant_ns.class_("VoiceAssistant", cg.Component)
@ -285,6 +287,7 @@ VOICE_ASSISTANT_ACTION_SCHEMA = cv.Schema({cv.GenerateID(): cv.use_id(VoiceAssis
VOICE_ASSISTANT_ACTION_SCHEMA.extend( VOICE_ASSISTANT_ACTION_SCHEMA.extend(
{ {
cv.Optional(CONF_SILENCE_DETECTION, default=True): cv.boolean, cv.Optional(CONF_SILENCE_DETECTION, default=True): cv.boolean,
cv.Optional(CONF_WAKE_WORD): cv.templatable(cv.string),
} }
), ),
) )
@ -293,6 +296,9 @@ async def voice_assistant_listen_to_code(config, action_id, template_arg, args):
await cg.register_parented(var, config[CONF_ID]) await cg.register_parented(var, config[CONF_ID])
if CONF_SILENCE_DETECTION in config: if CONF_SILENCE_DETECTION in config:
cg.add(var.set_silence_detection(config[CONF_SILENCE_DETECTION])) cg.add(var.set_silence_detection(config[CONF_SILENCE_DETECTION]))
if wake_word := config.get(CONF_WAKE_WORD):
templ = await cg.templatable(wake_word, args, cg.std_string)
cg.add(var.set_wake_word(templ))
return var return var

View file

@ -215,6 +215,8 @@ void VoiceAssistant::loop() {
msg.conversation_id = this->conversation_id_; msg.conversation_id = this->conversation_id_;
msg.flags = flags; msg.flags = flags;
msg.audio_settings = audio_settings; msg.audio_settings = audio_settings;
msg.wake_word_phrase = this->wake_word_;
this->wake_word_ = "";
if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) { if (this->api_client_ == nullptr || !this->api_client_->send_voice_assistant_request(msg)) {
ESP_LOGW(TAG, "Could not request start"); ESP_LOGW(TAG, "Could not request start");

View file

@ -124,6 +124,8 @@ class VoiceAssistant : public Component {
void client_subscription(api::APIConnection *client, bool subscribe); void client_subscription(api::APIConnection *client, bool subscribe);
api::APIConnection *get_api_connection() const { return this->api_client_; } api::APIConnection *get_api_connection() const { return this->api_client_; }
void set_wake_word(const std::string &wake_word) { this->wake_word_ = wake_word; }
protected: protected:
int read_microphone_(); int read_microphone_();
void set_state_(State state); void set_state_(State state);
@ -175,6 +177,8 @@ class VoiceAssistant : public Component {
std::string conversation_id_{""}; std::string conversation_id_{""};
std::string wake_word_{""};
HighFrequencyLoopRequester high_freq_; HighFrequencyLoopRequester high_freq_;
#ifdef USE_ESP_ADF #ifdef USE_ESP_ADF
@ -200,8 +204,13 @@ class VoiceAssistant : public Component {
}; };
template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> { template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
TEMPLATABLE_VALUE(std::string, wake_word);
public: public:
void play(Ts... x) override { this->parent_->request_start(false, this->silence_detection_); } void play(Ts... x) override {
this->parent_->set_wake_word(this->wake_word_.value(x...));
this->parent_->request_start(false, this->silence_detection_);
}
void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; } void set_silence_detection(bool silence_detection) { this->silence_detection_ = silence_detection; }