From 746488cabf2d4ac8db4b0d7883dc72dd8c5aa73c Mon Sep 17 00:00:00 2001 From: Jesse Hills <3060199+jesserockz@users.noreply.github.com> Date: Wed, 19 Jul 2023 11:38:47 +1200 Subject: [PATCH] Fix silence detection flag on voice assistant (#5120) --- esphome/components/api/api.proto | 1 + esphome/components/api/api_connection.cpp | 3 ++- esphome/components/api/api_connection.h | 2 +- esphome/components/api/api_pb2.cpp | 9 +++++++++ esphome/components/api/api_pb2.h | 1 + esphome/components/api/api_server.cpp | 6 +++--- esphome/components/api/api_server.h | 2 +- esphome/components/voice_assistant/voice_assistant.cpp | 2 +- esphome/components/voice_assistant/voice_assistant.h | 6 +----- 9 files changed, 20 insertions(+), 12 deletions(-) diff --git a/esphome/components/api/api.proto b/esphome/components/api/api.proto index 0d68d9fe55..86685aa5e6 100644 --- a/esphome/components/api/api.proto +++ b/esphome/components/api/api.proto @@ -1420,6 +1420,7 @@ message VoiceAssistantRequest { bool start = 1; string conversation_id = 2; + bool use_vad = 3; } message VoiceAssistantResponse { diff --git a/esphome/components/api/api_connection.cpp b/esphome/components/api/api_connection.cpp index 858ff0e525..a46efd80e5 100644 --- a/esphome/components/api/api_connection.cpp +++ b/esphome/components/api/api_connection.cpp @@ -907,12 +907,13 @@ BluetoothConnectionsFreeResponse APIConnection::subscribe_bluetooth_connections_ #endif #ifdef USE_VOICE_ASSISTANT -bool APIConnection::request_voice_assistant(bool start, const std::string &conversation_id) { +bool APIConnection::request_voice_assistant(bool start, const std::string &conversation_id, bool use_vad) { if (!this->voice_assistant_subscription_) return false; VoiceAssistantRequest msg; msg.start = start; msg.conversation_id = conversation_id; + msg.use_vad = use_vad; return this->send_voice_assistant_request(msg); } void APIConnection::on_voice_assistant_response(const VoiceAssistantResponse &msg) { diff --git a/esphome/components/api/api_connection.h b/esphome/components/api/api_connection.h index c146adff02..acc4578661 100644 --- a/esphome/components/api/api_connection.h +++ b/esphome/components/api/api_connection.h @@ -124,7 +124,7 @@ class APIConnection : public APIServerConnection { void subscribe_voice_assistant(const SubscribeVoiceAssistantRequest &msg) override { this->voice_assistant_subscription_ = msg.subscribe; } - bool request_voice_assistant(bool start, const std::string &conversation_id); + bool request_voice_assistant(bool start, const std::string &conversation_id, bool use_vad); void on_voice_assistant_response(const VoiceAssistantResponse &msg) override; void on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) override; #endif diff --git a/esphome/components/api/api_pb2.cpp b/esphome/components/api/api_pb2.cpp index 8c7f6d0c4a..3a2d980e57 100644 --- a/esphome/components/api/api_pb2.cpp +++ b/esphome/components/api/api_pb2.cpp @@ -6348,6 +6348,10 @@ bool VoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarInt value) this->start = value.as_bool(); return true; } + case 3: { + this->use_vad = value.as_bool(); + return true; + } default: return false; } @@ -6365,6 +6369,7 @@ bool VoiceAssistantRequest::decode_length(uint32_t field_id, ProtoLengthDelimite void VoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const { buffer.encode_bool(1, this->start); buffer.encode_string(2, this->conversation_id); + buffer.encode_bool(3, this->use_vad); } #ifdef HAS_PROTO_MESSAGE_DUMP void VoiceAssistantRequest::dump_to(std::string &out) const { @@ -6377,6 +6382,10 @@ void VoiceAssistantRequest::dump_to(std::string &out) const { out.append(" conversation_id: "); out.append("'").append(this->conversation_id).append("'"); out.append("\n"); + + out.append(" use_vad: "); + out.append(YESNO(this->use_vad)); + out.append("\n"); out.append("}"); } #endif diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h index 769f7aaff5..627165953d 100644 --- a/esphome/components/api/api_pb2.h +++ b/esphome/components/api/api_pb2.h @@ -1655,6 +1655,7 @@ class VoiceAssistantRequest : public ProtoMessage { public: bool start{false}; std::string conversation_id{}; + bool use_vad{false}; void encode(ProtoWriteBuffer buffer) const override; #ifdef HAS_PROTO_MESSAGE_DUMP void dump_to(std::string &out) const override; diff --git a/esphome/components/api/api_server.cpp b/esphome/components/api/api_server.cpp index 87b5f9e63f..f70d45ecd0 100644 --- a/esphome/components/api/api_server.cpp +++ b/esphome/components/api/api_server.cpp @@ -323,16 +323,16 @@ void APIServer::on_shutdown() { } #ifdef USE_VOICE_ASSISTANT -bool APIServer::start_voice_assistant(const std::string &conversation_id) { +bool APIServer::start_voice_assistant(const std::string &conversation_id, bool use_vad) { for (auto &c : this->clients_) { - if (c->request_voice_assistant(true, conversation_id)) + if (c->request_voice_assistant(true, conversation_id, use_vad)) return true; } return false; } void APIServer::stop_voice_assistant() { for (auto &c : this->clients_) { - if (c->request_voice_assistant(false, "")) + if (c->request_voice_assistant(false, "", false)) return; } } diff --git a/esphome/components/api/api_server.h b/esphome/components/api/api_server.h index be124f42ff..9b40a5ef02 100644 --- a/esphome/components/api/api_server.h +++ b/esphome/components/api/api_server.h @@ -81,7 +81,7 @@ class APIServer : public Component, public Controller { #endif #ifdef USE_VOICE_ASSISTANT - bool start_voice_assistant(const std::string &conversation_id); + bool start_voice_assistant(const std::string &conversation_id, bool use_vad); void stop_voice_assistant(); #endif diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index 44d640ff39..217ddb6354 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -130,7 +130,7 @@ void VoiceAssistant::start(struct sockaddr_storage *addr, uint16_t port) { void VoiceAssistant::request_start(bool continuous) { ESP_LOGD(TAG, "Requesting start..."); - if (!api::global_api_server->start_voice_assistant(this->conversation_id_)) { + if (!api::global_api_server->start_voice_assistant(this->conversation_id_, this->silence_detection_)) { ESP_LOGW(TAG, "Could not request start."); this->error_trigger_->trigger("not-connected", "Could not request start."); this->continuous_ = false; diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h index b103584509..e67baaee65 100644 --- a/esphome/components/voice_assistant/voice_assistant.h +++ b/esphome/components/voice_assistant/voice_assistant.h @@ -25,10 +25,9 @@ namespace voice_assistant { // Version 1: Initial version // Version 2: Adds raw speaker support -// Version 3: Adds continuous support +// Version 3: Unused/skip static const uint32_t INITIAL_VERSION = 1; static const uint32_t SPEAKER_SUPPORT = 2; -static const uint32_t SILENCE_DETECTION_SUPPORT = 3; class VoiceAssistant : public Component { public: @@ -48,9 +47,6 @@ class VoiceAssistant : public Component { uint32_t get_version() const { #ifdef USE_SPEAKER if (this->speaker_ != nullptr) { - if (this->silence_detection_) { - return SILENCE_DETECTION_SUPPORT; - } return SPEAKER_SUPPORT; } #endif