Add push to talk voice assistant (#4648)

* Add push to talk voice assistant

* Refactor most code into voice_assistant

* Make voice_assistant the component and remove push_to_talk (can be done in yaml)

* Fix component setup

* Always AF_INET to match serverside

* Fix microphone and media player co-existence

* Format

* Update codeowners

* Update test file

* Fix endifs

* nullptr not NULL

* clang-tidy

* Format

* fixup: Add VA event data

* Generate proto

* Parse and log events

* Add default to switch

* Fix

* Add mic/va to test5
This commit is contained in:
Jesse Hills 2023-04-12 11:45:10 +12:00 committed by GitHub
parent 80bc567c31
commit b60c08dd28
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
35 changed files with 1384 additions and 75 deletions

View file

@ -111,6 +111,8 @@ esphome/components/hte501/* @Stock-M
esphome/components/hydreon_rgxx/* @functionpointer
esphome/components/i2c/* @esphome/core
esphome/components/i2s_audio/* @jesserockz
esphome/components/i2s_audio/media_player/* @jesserockz
esphome/components/i2s_audio/microphone/* @jesserockz
esphome/components/ili9xxx/* @nielsnl68
esphome/components/improv_base/* @esphome/core
esphome/components/improv_serial/* @esphome/core
@ -154,6 +156,7 @@ esphome/components/mcp9808/* @k7hpn
esphome/components/md5/* @esphome/core
esphome/components/mdns/* @esphome/core
esphome/components/media_player/* @jesserockz
esphome/components/microphone/* @jesserockz
esphome/components/mics_4514/* @jesserockz
esphome/components/midea/* @dudanov
esphome/components/midea_ir/* @dudanov
@ -287,6 +290,7 @@ esphome/components/ufire_ise/* @pvizeli
esphome/components/ultrasonic/* @OttoWinter
esphome/components/vbus/* @ssieb
esphome/components/version/* @esphome/core
esphome/components/voice_assistant/* @jesserockz
esphome/components/wake_on_lan/* @willwill2will54
esphome/components/web_server_base/* @OttoWinter
esphome/components/whirlpool/* @glmnet

View file

@ -55,6 +55,7 @@ service APIConnection {
rpc subscribe_bluetooth_connections_free(SubscribeBluetoothConnectionsFreeRequest) returns (BluetoothConnectionsFreeResponse) {}
rpc unsubscribe_bluetooth_le_advertisements(UnsubscribeBluetoothLEAdvertisementsRequest) returns (void) {}
rpc subscribe_voice_assistant(SubscribeVoiceAssistantRequest) returns (void) {}
}
@ -210,6 +211,8 @@ message DeviceInfoResponse {
string manufacturer = 12;
string friendly_name = 13;
uint32 voice_assistant_version = 14;
}
message ListEntitiesRequest {
@ -1379,3 +1382,55 @@ message BluetoothDeviceClearCacheResponse {
bool success = 2;
int32 error = 3;
}
// ==================== PUSH TO TALK ====================
message SubscribeVoiceAssistantRequest {
option (id) = 89;
option (source) = SOURCE_CLIENT;
option (ifdef) = "USE_VOICE_ASSISTANT";
bool subscribe = 1;
}
message VoiceAssistantRequest {
option (id) = 90;
option (source) = SOURCE_SERVER;
option (ifdef) = "USE_VOICE_ASSISTANT";
bool start = 1;
}
message VoiceAssistantResponse {
option (id) = 91;
option (source) = SOURCE_CLIENT;
option (ifdef) = "USE_VOICE_ASSISTANT";
uint32 port = 1;
bool error = 2;
}
enum VoiceAssistantEvent {
VOICE_ASSISTANT_ERROR = 0;
VOICE_ASSISTANT_RUN_START = 1;
VOICE_ASSISTANT_RUN_END = 2;
VOICE_ASSISTANT_STT_START = 3;
VOICE_ASSISTANT_STT_END = 4;
VOICE_ASSISTANT_INTENT_START = 5;
VOICE_ASSISTANT_INTENT_END = 6;
VOICE_ASSISTANT_TTS_START = 7;
VOICE_ASSISTANT_TTS_END = 8;
}
message VoiceAssistantEventData {
string name = 1;
string value = 2;
}
message VoiceAssistantEventResponse {
option (id) = 92;
option (source) = SOURCE_CLIENT;
option (ifdef) = "USE_VOICE_ASSISTANT";
VoiceAssistantEvent event_type = 1;
repeated VoiceAssistantEventData data = 2;
}

View file

@ -16,6 +16,9 @@
#ifdef USE_BLUETOOTH_PROXY
#include "esphome/components/bluetooth_proxy/bluetooth_proxy.h"
#endif
#ifdef USE_VOICE_ASSISTANT
#include "esphome/components/voice_assistant/voice_assistant.h"
#endif
namespace esphome {
namespace api {
@ -893,6 +896,30 @@ BluetoothConnectionsFreeResponse APIConnection::subscribe_bluetooth_connections_
}
#endif
#ifdef USE_VOICE_ASSISTANT
bool APIConnection::request_voice_assistant(bool start) {
if (!this->voice_assistant_subscription_)
return false;
VoiceAssistantRequest msg;
msg.start = start;
return this->send_voice_assistant_request(msg);
}
void APIConnection::on_voice_assistant_response(const VoiceAssistantResponse &msg) {
if (voice_assistant::global_voice_assistant != nullptr) {
struct sockaddr_storage storage;
socklen_t len = sizeof(storage);
this->helper_->getpeername((struct sockaddr *) &storage, &len);
voice_assistant::global_voice_assistant->start(&storage, msg.port);
}
};
void APIConnection::on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) {
if (voice_assistant::global_voice_assistant != nullptr) {
voice_assistant::global_voice_assistant->on_event(msg);
}
}
#endif
bool APIConnection::send_log_message(int level, const char *tag, const char *line) {
if (this->log_subscription_ < level)
return false;
@ -970,6 +997,9 @@ DeviceInfoResponse APIConnection::device_info(const DeviceInfoRequest &msg) {
resp.bluetooth_proxy_version = bluetooth_proxy::global_bluetooth_proxy->has_active()
? bluetooth_proxy::ACTIVE_CONNECTIONS_VERSION
: bluetooth_proxy::PASSIVE_ONLY_VERSION;
#endif
#ifdef USE_VOICE_ASSISTANT
resp.voice_assistant_version = 1;
#endif
return resp;
}

View file

@ -6,6 +6,7 @@
#include "api_server.h"
#include "esphome/core/application.h"
#include "esphome/core/component.h"
#include "esphome/core/defines.h"
#include <vector>
@ -123,6 +124,15 @@ class APIConnection : public APIServerConnection {
}
#endif
#ifdef USE_VOICE_ASSISTANT
void subscribe_voice_assistant(const SubscribeVoiceAssistantRequest &msg) override {
this->voice_assistant_subscription_ = msg.subscribe;
}
bool request_voice_assistant(bool start);
void on_voice_assistant_response(const VoiceAssistantResponse &msg) override;
void on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) override;
#endif
void on_disconnect_response(const DisconnectResponse &value) override;
void on_ping_response(const PingResponse &value) override {
// we initiated ping
@ -203,6 +213,9 @@ class APIConnection : public APIServerConnection {
bool service_call_subscription_{false};
#ifdef USE_BLUETOOTH_PROXY
bool bluetooth_le_advertisement_subscription_{false};
#endif
#ifdef USE_VOICE_ASSISTANT
bool voice_assistant_subscription_{false};
#endif
bool next_close_ = false;
APIServer *parent_;

View file

@ -10,8 +10,8 @@
#include "noise/protocol.h"
#endif
#include "esphome/components/socket/socket.h"
#include "api_noise_context.h"
#include "esphome/components/socket/socket.h"
namespace esphome {
namespace api {
@ -67,6 +67,7 @@ class APIFrameHelper {
virtual bool can_write_without_blocking() = 0;
virtual APIError write_packet(uint16_t type, const uint8_t *data, size_t len) = 0;
virtual std::string getpeername() = 0;
virtual int getpeername(struct sockaddr *addr, socklen_t *addrlen) = 0;
virtual APIError close() = 0;
virtual APIError shutdown(int how) = 0;
// Give this helper a name for logging
@ -84,7 +85,10 @@ class APINoiseFrameHelper : public APIFrameHelper {
APIError read_packet(ReadPacketBuffer *buffer) override;
bool can_write_without_blocking() override;
APIError write_packet(uint16_t type, const uint8_t *payload, size_t len) override;
std::string getpeername() override { return socket_->getpeername(); }
std::string getpeername() override { return this->socket_->getpeername(); }
int getpeername(struct sockaddr *addr, socklen_t *addrlen) override {
return this->socket_->getpeername(addr, addrlen);
}
APIError close() override;
APIError shutdown(int how) override;
// Give this helper a name for logging
@ -144,7 +148,10 @@ class APIPlaintextFrameHelper : public APIFrameHelper {
APIError read_packet(ReadPacketBuffer *buffer) override;
bool can_write_without_blocking() override;
APIError write_packet(uint16_t type, const uint8_t *payload, size_t len) override;
std::string getpeername() override { return socket_->getpeername(); }
std::string getpeername() override { return this->socket_->getpeername(); }
int getpeername(struct sockaddr *addr, socklen_t *addrlen) override {
return this->socket_->getpeername(addr, addrlen);
}
APIError close() override;
APIError shutdown(int how) override;
// Give this helper a name for logging

View file

@ -407,6 +407,32 @@ const char *proto_enum_to_string<enums::BluetoothDeviceRequestType>(enums::Bluet
}
}
#endif
#ifdef HAS_PROTO_MESSAGE_DUMP
template<> const char *proto_enum_to_string<enums::VoiceAssistantEvent>(enums::VoiceAssistantEvent value) {
switch (value) {
case enums::VOICE_ASSISTANT_ERROR:
return "VOICE_ASSISTANT_ERROR";
case enums::VOICE_ASSISTANT_RUN_START:
return "VOICE_ASSISTANT_RUN_START";
case enums::VOICE_ASSISTANT_RUN_END:
return "VOICE_ASSISTANT_RUN_END";
case enums::VOICE_ASSISTANT_STT_START:
return "VOICE_ASSISTANT_STT_START";
case enums::VOICE_ASSISTANT_STT_END:
return "VOICE_ASSISTANT_STT_END";
case enums::VOICE_ASSISTANT_INTENT_START:
return "VOICE_ASSISTANT_INTENT_START";
case enums::VOICE_ASSISTANT_INTENT_END:
return "VOICE_ASSISTANT_INTENT_END";
case enums::VOICE_ASSISTANT_TTS_START:
return "VOICE_ASSISTANT_TTS_START";
case enums::VOICE_ASSISTANT_TTS_END:
return "VOICE_ASSISTANT_TTS_END";
default:
return "UNKNOWN";
}
}
#endif
bool HelloRequest::decode_varint(uint32_t field_id, ProtoVarInt value) {
switch (field_id) {
case 2: {
@ -594,6 +620,10 @@ bool DeviceInfoResponse::decode_varint(uint32_t field_id, ProtoVarInt value) {
this->bluetooth_proxy_version = value.as_uint32();
return true;
}
case 14: {
this->voice_assistant_version = value.as_uint32();
return true;
}
default:
return false;
}
@ -654,6 +684,7 @@ void DeviceInfoResponse::encode(ProtoWriteBuffer buffer) const {
buffer.encode_uint32(11, this->bluetooth_proxy_version);
buffer.encode_string(12, this->manufacturer);
buffer.encode_string(13, this->friendly_name);
buffer.encode_uint32(14, this->voice_assistant_version);
}
#ifdef HAS_PROTO_MESSAGE_DUMP
void DeviceInfoResponse::dump_to(std::string &out) const {
@ -712,6 +743,11 @@ void DeviceInfoResponse::dump_to(std::string &out) const {
out.append(" friendly_name: ");
out.append("'").append(this->friendly_name).append("'");
out.append("\n");
out.append(" voice_assistant_version: ");
sprintf(buffer, "%u", this->voice_assistant_version);
out.append(buffer);
out.append("\n");
out.append("}");
}
#endif
@ -6111,6 +6147,155 @@ void BluetoothDeviceClearCacheResponse::dump_to(std::string &out) const {
out.append("}");
}
#endif
bool SubscribeVoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarInt value) {
switch (field_id) {
case 1: {
this->subscribe = value.as_bool();
return true;
}
default:
return false;
}
}
void SubscribeVoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const { buffer.encode_bool(1, this->subscribe); }
#ifdef HAS_PROTO_MESSAGE_DUMP
void SubscribeVoiceAssistantRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("SubscribeVoiceAssistantRequest {\n");
out.append(" subscribe: ");
out.append(YESNO(this->subscribe));
out.append("\n");
out.append("}");
}
#endif
bool VoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarInt value) {
switch (field_id) {
case 1: {
this->start = value.as_bool();
return true;
}
default:
return false;
}
}
void VoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const { buffer.encode_bool(1, this->start); }
#ifdef HAS_PROTO_MESSAGE_DUMP
void VoiceAssistantRequest::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("VoiceAssistantRequest {\n");
out.append(" start: ");
out.append(YESNO(this->start));
out.append("\n");
out.append("}");
}
#endif
bool VoiceAssistantResponse::decode_varint(uint32_t field_id, ProtoVarInt value) {
switch (field_id) {
case 1: {
this->port = value.as_uint32();
return true;
}
case 2: {
this->error = value.as_bool();
return true;
}
default:
return false;
}
}
void VoiceAssistantResponse::encode(ProtoWriteBuffer buffer) const {
buffer.encode_uint32(1, this->port);
buffer.encode_bool(2, this->error);
}
#ifdef HAS_PROTO_MESSAGE_DUMP
void VoiceAssistantResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("VoiceAssistantResponse {\n");
out.append(" port: ");
sprintf(buffer, "%u", this->port);
out.append(buffer);
out.append("\n");
out.append(" error: ");
out.append(YESNO(this->error));
out.append("\n");
out.append("}");
}
#endif
bool VoiceAssistantEventData::decode_length(uint32_t field_id, ProtoLengthDelimited value) {
switch (field_id) {
case 1: {
this->name = value.as_string();
return true;
}
case 2: {
this->value = value.as_string();
return true;
}
default:
return false;
}
}
void VoiceAssistantEventData::encode(ProtoWriteBuffer buffer) const {
buffer.encode_string(1, this->name);
buffer.encode_string(2, this->value);
}
#ifdef HAS_PROTO_MESSAGE_DUMP
void VoiceAssistantEventData::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("VoiceAssistantEventData {\n");
out.append(" name: ");
out.append("'").append(this->name).append("'");
out.append("\n");
out.append(" value: ");
out.append("'").append(this->value).append("'");
out.append("\n");
out.append("}");
}
#endif
bool VoiceAssistantEventResponse::decode_varint(uint32_t field_id, ProtoVarInt value) {
switch (field_id) {
case 1: {
this->event_type = value.as_enum<enums::VoiceAssistantEvent>();
return true;
}
default:
return false;
}
}
bool VoiceAssistantEventResponse::decode_length(uint32_t field_id, ProtoLengthDelimited value) {
switch (field_id) {
case 2: {
this->data.push_back(value.as_message<VoiceAssistantEventData>());
return true;
}
default:
return false;
}
}
void VoiceAssistantEventResponse::encode(ProtoWriteBuffer buffer) const {
buffer.encode_enum<enums::VoiceAssistantEvent>(1, this->event_type);
for (auto &it : this->data) {
buffer.encode_message<VoiceAssistantEventData>(2, it, true);
}
}
#ifdef HAS_PROTO_MESSAGE_DUMP
void VoiceAssistantEventResponse::dump_to(std::string &out) const {
__attribute__((unused)) char buffer[64];
out.append("VoiceAssistantEventResponse {\n");
out.append(" event_type: ");
out.append(proto_enum_to_string<enums::VoiceAssistantEvent>(this->event_type));
out.append("\n");
for (const auto &it : this->data) {
out.append(" data: ");
it.dump_to(out);
out.append("\n");
}
out.append("}");
}
#endif
} // namespace api
} // namespace esphome

View file

@ -165,6 +165,17 @@ enum BluetoothDeviceRequestType : uint32_t {
BLUETOOTH_DEVICE_REQUEST_TYPE_CONNECT_V3_WITHOUT_CACHE = 5,
BLUETOOTH_DEVICE_REQUEST_TYPE_CLEAR_CACHE = 6,
};
enum VoiceAssistantEvent : uint32_t {
VOICE_ASSISTANT_ERROR = 0,
VOICE_ASSISTANT_RUN_START = 1,
VOICE_ASSISTANT_RUN_END = 2,
VOICE_ASSISTANT_STT_START = 3,
VOICE_ASSISTANT_STT_END = 4,
VOICE_ASSISTANT_INTENT_START = 5,
VOICE_ASSISTANT_INTENT_END = 6,
VOICE_ASSISTANT_TTS_START = 7,
VOICE_ASSISTANT_TTS_END = 8,
};
} // namespace enums
@ -279,6 +290,7 @@ class DeviceInfoResponse : public ProtoMessage {
uint32_t bluetooth_proxy_version{0};
std::string manufacturer{};
std::string friendly_name{};
uint32_t voice_assistant_version{0};
void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override;
@ -1577,6 +1589,65 @@ class BluetoothDeviceClearCacheResponse : public ProtoMessage {
protected:
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
};
class SubscribeVoiceAssistantRequest : public ProtoMessage {
public:
bool subscribe{false};
void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override;
#endif
protected:
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
};
class VoiceAssistantRequest : public ProtoMessage {
public:
bool start{false};
void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override;
#endif
protected:
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
};
class VoiceAssistantResponse : public ProtoMessage {
public:
uint32_t port{0};
bool error{false};
void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override;
#endif
protected:
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
};
class VoiceAssistantEventData : public ProtoMessage {
public:
std::string name{};
std::string value{};
void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override;
#endif
protected:
bool decode_length(uint32_t field_id, ProtoLengthDelimited value) override;
};
class VoiceAssistantEventResponse : public ProtoMessage {
public:
enums::VoiceAssistantEvent event_type{};
std::vector<VoiceAssistantEventData> data{};
void encode(ProtoWriteBuffer buffer) const override;
#ifdef HAS_PROTO_MESSAGE_DUMP
void dump_to(std::string &out) const override;
#endif
protected:
bool decode_length(uint32_t field_id, ProtoLengthDelimited value) override;
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
};
} // namespace api
} // namespace esphome

View file

@ -453,6 +453,20 @@ bool APIServerConnectionBase::send_bluetooth_device_clear_cache_response(const B
return this->send_message_<BluetoothDeviceClearCacheResponse>(msg, 88);
}
#endif
#ifdef USE_VOICE_ASSISTANT
#endif
#ifdef USE_VOICE_ASSISTANT
bool APIServerConnectionBase::send_voice_assistant_request(const VoiceAssistantRequest &msg) {
#ifdef HAS_PROTO_MESSAGE_DUMP
ESP_LOGVV(TAG, "send_voice_assistant_request: %s", msg.dump().c_str());
#endif
return this->send_message_<VoiceAssistantRequest>(msg, 90);
}
#endif
#ifdef USE_VOICE_ASSISTANT
#endif
#ifdef USE_VOICE_ASSISTANT
#endif
bool APIServerConnectionBase::read_message(uint32_t msg_size, uint32_t msg_type, uint8_t *msg_data) {
switch (msg_type) {
case 1: {
@ -827,6 +841,39 @@ bool APIServerConnectionBase::read_message(uint32_t msg_size, uint32_t msg_type,
ESP_LOGVV(TAG, "on_unsubscribe_bluetooth_le_advertisements_request: %s", msg.dump().c_str());
#endif
this->on_unsubscribe_bluetooth_le_advertisements_request(msg);
#endif
break;
}
case 89: {
#ifdef USE_VOICE_ASSISTANT
SubscribeVoiceAssistantRequest msg;
msg.decode(msg_data, msg_size);
#ifdef HAS_PROTO_MESSAGE_DUMP
ESP_LOGVV(TAG, "on_subscribe_voice_assistant_request: %s", msg.dump().c_str());
#endif
this->on_subscribe_voice_assistant_request(msg);
#endif
break;
}
case 91: {
#ifdef USE_VOICE_ASSISTANT
VoiceAssistantResponse msg;
msg.decode(msg_data, msg_size);
#ifdef HAS_PROTO_MESSAGE_DUMP
ESP_LOGVV(TAG, "on_voice_assistant_response: %s", msg.dump().c_str());
#endif
this->on_voice_assistant_response(msg);
#endif
break;
}
case 92: {
#ifdef USE_VOICE_ASSISTANT
VoiceAssistantEventResponse msg;
msg.decode(msg_data, msg_size);
#ifdef HAS_PROTO_MESSAGE_DUMP
ESP_LOGVV(TAG, "on_voice_assistant_event_response: %s", msg.dump().c_str());
#endif
this->on_voice_assistant_event_response(msg);
#endif
break;
}
@ -1226,6 +1273,19 @@ void APIServerConnection::on_unsubscribe_bluetooth_le_advertisements_request(
this->unsubscribe_bluetooth_le_advertisements(msg);
}
#endif
#ifdef USE_VOICE_ASSISTANT
void APIServerConnection::on_subscribe_voice_assistant_request(const SubscribeVoiceAssistantRequest &msg) {
if (!this->is_connection_setup()) {
this->on_no_setup_connection();
return;
}
if (!this->is_authenticated()) {
this->on_unauthenticated_access();
return;
}
this->subscribe_voice_assistant(msg);
}
#endif
} // namespace api
} // namespace esphome

View file

@ -224,6 +224,18 @@ class APIServerConnectionBase : public ProtoService {
#endif
#ifdef USE_BLUETOOTH_PROXY
bool send_bluetooth_device_clear_cache_response(const BluetoothDeviceClearCacheResponse &msg);
#endif
#ifdef USE_VOICE_ASSISTANT
virtual void on_subscribe_voice_assistant_request(const SubscribeVoiceAssistantRequest &value){};
#endif
#ifdef USE_VOICE_ASSISTANT
bool send_voice_assistant_request(const VoiceAssistantRequest &msg);
#endif
#ifdef USE_VOICE_ASSISTANT
virtual void on_voice_assistant_response(const VoiceAssistantResponse &value){};
#endif
#ifdef USE_VOICE_ASSISTANT
virtual void on_voice_assistant_event_response(const VoiceAssistantEventResponse &value){};
#endif
protected:
bool read_message(uint32_t msg_size, uint32_t msg_type, uint8_t *msg_data) override;
@ -306,6 +318,9 @@ class APIServerConnection : public APIServerConnectionBase {
#endif
#ifdef USE_BLUETOOTH_PROXY
virtual void unsubscribe_bluetooth_le_advertisements(const UnsubscribeBluetoothLEAdvertisementsRequest &msg) = 0;
#endif
#ifdef USE_VOICE_ASSISTANT
virtual void subscribe_voice_assistant(const SubscribeVoiceAssistantRequest &msg) = 0;
#endif
protected:
void on_hello_request(const HelloRequest &msg) override;
@ -384,6 +399,9 @@ class APIServerConnection : public APIServerConnectionBase {
void on_unsubscribe_bluetooth_le_advertisements_request(
const UnsubscribeBluetoothLEAdvertisementsRequest &msg) override;
#endif
#ifdef USE_VOICE_ASSISTANT
void on_subscribe_voice_assistant_request(const SubscribeVoiceAssistantRequest &msg) override;
#endif
};
} // namespace api

View file

@ -427,5 +427,18 @@ void APIServer::on_shutdown() {
delay(10);
}
#ifdef USE_VOICE_ASSISTANT
void APIServer::start_voice_assistant() {
for (auto &c : this->clients_) {
c->request_voice_assistant(true);
}
}
void APIServer::stop_voice_assistant() {
for (auto &c : this->clients_) {
c->request_voice_assistant(false);
}
}
#endif
} // namespace api
} // namespace esphome

View file

@ -95,6 +95,11 @@ class APIServer : public Component, public Controller {
void request_time();
#endif
#ifdef USE_VOICE_ASSISTANT
void start_voice_assistant();
void stop_voice_assistant();
#endif
bool is_connected() const;
struct HomeAssistantStateSubscription {

View file

@ -0,0 +1,70 @@
import esphome.config_validation as cv
import esphome.final_validate as fv
import esphome.codegen as cg
from esphome import pins
from esphome.const import CONF_ID
from esphome.components.esp32 import get_esp32_variant
from esphome.components.esp32.const import (
VARIANT_ESP32,
VARIANT_ESP32S2,
VARIANT_ESP32S3,
VARIANT_ESP32C3,
)
CODEOWNERS = ["@jesserockz"]
DEPENDENCIES = ["esp32"]
MULTI_CONF = True
CONF_I2S_DOUT_PIN = "i2s_dout_pin"
CONF_I2S_DIN_PIN = "i2s_din_pin"
CONF_I2S_BCLK_PIN = "i2s_bclk_pin"
CONF_I2S_LRCLK_PIN = "i2s_lrclk_pin"
CONF_I2S_AUDIO = "i2s_audio"
CONF_I2S_AUDIO_ID = "i2s_audio_id"
i2s_audio_ns = cg.esphome_ns.namespace("i2s_audio")
I2SAudioComponent = i2s_audio_ns.class_("I2SAudioComponent", cg.Component)
I2SAudioIn = i2s_audio_ns.class_("I2SAudioIn", cg.Parented.template(I2SAudioComponent))
I2SAudioOut = i2s_audio_ns.class_(
"I2SAudioOut", cg.Parented.template(I2SAudioComponent)
)
# https://github.com/espressif/esp-idf/blob/master/components/soc/{variant}/include/soc/soc_caps.h
I2S_PORTS = {
VARIANT_ESP32: 2,
VARIANT_ESP32S2: 1,
VARIANT_ESP32S3: 2,
VARIANT_ESP32C3: 1,
}
CONFIG_SCHEMA = cv.Schema(
{
cv.GenerateID(): cv.declare_id(I2SAudioComponent),
cv.Required(CONF_I2S_BCLK_PIN): pins.internal_gpio_output_pin_number,
cv.Required(CONF_I2S_LRCLK_PIN): pins.internal_gpio_output_pin_number,
}
)
def _final_validate(_):
i2s_audio_configs = fv.full_config.get()[CONF_I2S_AUDIO]
variant = get_esp32_variant()
if variant not in I2S_PORTS:
raise cv.Invalid(f"Unsupported variant {variant}")
if len(i2s_audio_configs) > I2S_PORTS[variant]:
raise cv.Invalid(
f"Only {I2S_PORTS[variant]} I2S audio ports are supported on {variant}"
)
FINAL_VALIDATE_SCHEMA = _final_validate
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN]))
cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN]))

View file

@ -0,0 +1,30 @@
#include "i2s_audio.h"
#ifdef USE_ESP32
#include "esphome/core/log.h"
namespace esphome {
namespace i2s_audio {
static const char *const TAG = "i2s_audio";
void I2SAudioComponent::setup() {
static i2s_port_t next_port_num = I2S_NUM_0;
if (next_port_num >= I2S_NUM_MAX) {
ESP_LOGE(TAG, "Too many I2S Audio components!");
this->mark_failed();
return;
}
this->port_ = next_port_num;
next_port_num = (i2s_port_t) (next_port_num + 1);
ESP_LOGCONFIG(TAG, "Setting up I2S Audio...");
}
} // namespace i2s_audio
} // namespace esphome
#endif // USE_ESP32

View file

@ -0,0 +1,64 @@
#pragma once
#ifdef USE_ESP32
#include <driver/i2s.h>
#include "esphome/core/component.h"
#include "esphome/core/helpers.h"
namespace esphome {
namespace i2s_audio {
class I2SAudioComponent;
class I2SAudioIn : public Parented<I2SAudioComponent> {};
class I2SAudioOut : public Parented<I2SAudioComponent> {};
class I2SAudioComponent : public Component {
public:
void setup() override;
void register_audio_in(I2SAudioIn *in) {
this->audio_in_ = in;
in->set_parent(this);
}
void register_audio_out(I2SAudioOut *out) {
this->audio_out_ = out;
out->set_parent(this);
}
i2s_pin_config_t get_pin_config() const {
return {
.mck_io_num = I2S_PIN_NO_CHANGE,
.bck_io_num = this->bclk_pin_,
.ws_io_num = this->lrclk_pin_,
.data_out_num = I2S_PIN_NO_CHANGE,
.data_in_num = I2S_PIN_NO_CHANGE,
};
}
void set_bclk_pin(uint8_t pin) { this->bclk_pin_ = pin; }
void set_lrclk_pin(uint8_t pin) { this->lrclk_pin_ = pin; }
void lock() { this->lock_.lock(); }
bool try_lock() { return this->lock_.try_lock(); }
void unlock() { this->lock_.unlock(); }
i2s_port_t get_port() const { return this->port_; }
protected:
Mutex lock_;
I2SAudioIn *audio_in_{nullptr};
I2SAudioOut *audio_out_{nullptr};
uint8_t bclk_pin_;
uint8_t lrclk_pin_;
i2s_port_t port_{};
};
} // namespace i2s_audio
} // namespace esphome
#endif // USE_ESP32

View file

@ -5,22 +5,25 @@ import esphome.config_validation as cv
from esphome import pins
from esphome.const import CONF_ID, CONF_MODE
from esphome.core import CORE
from .. import (
i2s_audio_ns,
I2SAudioComponent,
I2SAudioOut,
CONF_I2S_AUDIO_ID,
CONF_I2S_DOUT_PIN,
)
CODEOWNERS = ["@jesserockz"]
DEPENDENCIES = ["esp32"]
i2s_audio_ns = cg.esphome_ns.namespace("i2s_audio")
DEPENDENCIES = ["i2s_audio"]
I2SAudioMediaPlayer = i2s_audio_ns.class_(
"I2SAudioMediaPlayer", cg.Component, media_player.MediaPlayer
"I2SAudioMediaPlayer", cg.Component, media_player.MediaPlayer, I2SAudioOut
)
i2s_dac_mode_t = cg.global_ns.enum("i2s_dac_mode_t")
CONF_I2S_DOUT_PIN = "i2s_dout_pin"
CONF_I2S_BCLK_PIN = "i2s_bclk_pin"
CONF_I2S_LRCLK_PIN = "i2s_lrclk_pin"
CONF_MUTE_PIN = "mute_pin"
CONF_AUDIO_ID = "audio_id"
CONF_DAC_TYPE = "dac_type"
@ -48,34 +51,26 @@ def validate_esp32_variant(config):
CONFIG_SCHEMA = cv.All(
cv.typed_schema(
{
"internal": cv.Schema(
"internal": media_player.MEDIA_PLAYER_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(I2SAudioMediaPlayer),
cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent),
cv.Required(CONF_MODE): cv.enum(INTERNAL_DAC_OPTIONS, lower=True),
}
)
.extend(media_player.MEDIA_PLAYER_SCHEMA)
.extend(cv.COMPONENT_SCHEMA),
"external": cv.Schema(
).extend(cv.COMPONENT_SCHEMA),
"external": media_player.MEDIA_PLAYER_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(I2SAudioMediaPlayer),
cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent),
cv.Required(
CONF_I2S_DOUT_PIN
): pins.internal_gpio_output_pin_number,
cv.Required(
CONF_I2S_BCLK_PIN
): pins.internal_gpio_output_pin_number,
cv.Required(
CONF_I2S_LRCLK_PIN
): pins.internal_gpio_output_pin_number,
cv.Optional(CONF_MUTE_PIN): pins.gpio_output_pin_schema,
cv.Optional(CONF_MODE, default="mono"): cv.one_of(
*EXTERNAL_DAC_OPTIONS, lower=True
),
}
)
.extend(media_player.MEDIA_PLAYER_SCHEMA)
.extend(cv.COMPONENT_SCHEMA),
).extend(cv.COMPONENT_SCHEMA),
},
key=CONF_DAC_TYPE,
),
@ -89,18 +84,18 @@ async def to_code(config):
await cg.register_component(var, config)
await media_player.register_media_player(var, config)
parent = await cg.get_variable(config[CONF_I2S_AUDIO_ID])
cg.add(parent.register_audio_out(var))
if config[CONF_DAC_TYPE] == "internal":
cg.add(var.set_internal_dac_mode(config[CONF_MODE]))
else:
cg.add(var.set_dout_pin(config[CONF_I2S_DOUT_PIN]))
cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN]))
cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN]))
if CONF_MUTE_PIN in config:
pin = await cg.gpio_pin_expression(config[CONF_MUTE_PIN])
cg.add(var.set_mute_pin(pin))
cg.add(var.set_external_dac_channels(2 if config[CONF_MODE] == "stereo" else 1))
if CORE.is_esp32:
cg.add_library("WiFiClientSecure", None)
cg.add_library("HTTPClient", None)
cg.add_library("esphome/ESP32-audioI2S", "2.0.6")

View file

@ -11,11 +11,19 @@ static const char *const TAG = "audio";
void I2SAudioMediaPlayer::control(const media_player::MediaPlayerCall &call) {
if (call.get_media_url().has_value()) {
if (this->audio_->isRunning())
this->current_url_ = call.get_media_url();
if (this->state == media_player::MEDIA_PLAYER_STATE_PLAYING && this->audio_ != nullptr) {
if (this->audio_->isRunning()) {
this->audio_->stopSong();
this->high_freq_.start();
this->audio_->connecttohost(call.get_media_url().value().c_str());
this->state = media_player::MEDIA_PLAYER_STATE_PLAYING;
}
this->audio_->connecttohost(this->current_url_.value().c_str());
} else {
this->start();
}
}
if (this->i2s_state_ != I2S_STATE_RUNNING) {
return;
}
if (call.get_volume().has_value()) {
this->volume = call.get_volume().value();
@ -35,7 +43,7 @@ void I2SAudioMediaPlayer::control(const media_player::MediaPlayerCall &call) {
this->state = media_player::MEDIA_PLAYER_STATE_PAUSED;
break;
case media_player::MEDIA_PLAYER_COMMAND_STOP:
this->stop_();
this->stop();
break;
case media_player::MEDIA_PLAYER_COMMAND_MUTE:
this->mute_();
@ -94,22 +102,51 @@ void I2SAudioMediaPlayer::set_volume_(float volume, bool publish) {
this->volume = volume;
}
void I2SAudioMediaPlayer::stop_() {
if (this->audio_->isRunning())
this->audio_->stopSong();
this->high_freq_.stop();
void I2SAudioMediaPlayer::setup() {
ESP_LOGCONFIG(TAG, "Setting up Audio...");
this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
}
void I2SAudioMediaPlayer::setup() {
ESP_LOGCONFIG(TAG, "Setting up Audio...");
void I2SAudioMediaPlayer::loop() {
switch (this->i2s_state_) {
case I2S_STATE_STARTING:
this->start_();
break;
case I2S_STATE_RUNNING:
this->play_();
break;
case I2S_STATE_STOPPING:
this->stop_();
break;
case I2S_STATE_STOPPED:
break;
}
}
void I2SAudioMediaPlayer::play_() {
this->audio_->loop();
if (this->state == media_player::MEDIA_PLAYER_STATE_PLAYING && !this->audio_->isRunning()) {
this->stop();
}
}
void I2SAudioMediaPlayer::start() { this->i2s_state_ = I2S_STATE_STARTING; }
void I2SAudioMediaPlayer::start_() {
if (this->parent_->try_lock()) {
return; // Waiting for another i2s to return lock
}
#if SOC_I2S_SUPPORTS_DAC
if (this->internal_dac_mode_ != I2S_DAC_CHANNEL_DISABLE) {
this->audio_ = make_unique<Audio>(true, this->internal_dac_mode_);
this->audio_ = make_unique<Audio>(true, this->internal_dac_mode_, this->parent_->get_port());
} else {
#endif
this->audio_ = make_unique<Audio>(false);
this->audio_->setPinout(this->bclk_pin_, this->lrclk_pin_, this->dout_pin_);
this->audio_ = make_unique<Audio>(false, I2S_DAC_CHANNEL_BOTH_EN, this->parent_->get_port());
i2s_pin_config_t pin_config = this->parent_->get_pin_config();
pin_config.data_out_num = this->dout_pin_;
i2s_set_pin(this->parent_->get_port(), &pin_config);
this->audio_->forceMono(this->external_dac_channels_ == 1);
if (this->mute_pin_ != nullptr) {
this->mute_pin_->setup();
@ -118,16 +155,30 @@ void I2SAudioMediaPlayer::setup() {
#if SOC_I2S_SUPPORTS_DAC
}
#endif
this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
}
void I2SAudioMediaPlayer::loop() {
this->audio_->loop();
if (this->state == media_player::MEDIA_PLAYER_STATE_PLAYING && !this->audio_->isRunning()) {
this->stop_();
this->i2s_state_ = I2S_STATE_RUNNING;
this->high_freq_.start();
if (this->current_url_.has_value()) {
this->audio_->connecttohost(this->current_url_.value().c_str());
this->state = media_player::MEDIA_PLAYER_STATE_PLAYING;
this->publish_state();
}
}
void I2SAudioMediaPlayer::stop() { this->i2s_state_ = I2S_STATE_STOPPING; }
void I2SAudioMediaPlayer::stop_() {
if (this->audio_->isRunning()) {
this->audio_->stopSong();
return;
}
this->audio_ = nullptr;
this->current_url_ = {};
this->parent_->unlock();
this->i2s_state_ = I2S_STATE_STOPPED;
this->high_freq_.stop();
this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
this->publish_state();
}
media_player::MediaPlayerTraits I2SAudioMediaPlayer::get_traits() {
auto traits = media_player::MediaPlayerTraits();

View file

@ -2,6 +2,10 @@
#ifdef USE_ESP32_FRAMEWORK_ARDUINO
#include "../i2s_audio.h"
#include <driver/i2s.h>
#include "esphome/components/media_player/media_player.h"
#include "esphome/core/component.h"
#include "esphome/core/gpio.h"
@ -12,7 +16,14 @@
namespace esphome {
namespace i2s_audio {
class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer {
enum I2SState : uint8_t {
I2S_STATE_STOPPED = 0,
I2S_STATE_STARTING,
I2S_STATE_RUNNING,
I2S_STATE_STOPPING,
};
class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer, public I2SAudioOut {
public:
void setup() override;
float get_setup_priority() const override { return esphome::setup_priority::LATE; }
@ -22,8 +33,6 @@ class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer {
void dump_config() override;
void set_dout_pin(uint8_t pin) { this->dout_pin_ = pin; }
void set_bclk_pin(uint8_t pin) { this->bclk_pin_ = pin; }
void set_lrclk_pin(uint8_t pin) { this->lrclk_pin_ = pin; }
void set_mute_pin(GPIOPin *mute_pin) { this->mute_pin_ = mute_pin; }
#if SOC_I2S_SUPPORTS_DAC
void set_internal_dac_mode(i2s_dac_mode_t mode) { this->internal_dac_mode_ = mode; }
@ -34,20 +43,24 @@ class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer {
bool is_muted() const override { return this->muted_; }
void start();
void stop();
protected:
void control(const media_player::MediaPlayerCall &call) override;
void mute_();
void unmute_();
void set_volume_(float volume, bool publish = true);
void stop_();
void start_();
void stop_();
void play_();
I2SState i2s_state_{I2S_STATE_STOPPED};
std::unique_ptr<Audio> audio_;
uint8_t dout_pin_{0};
uint8_t din_pin_{0};
uint8_t bclk_pin_;
uint8_t lrclk_pin_;
GPIOPin *mute_pin_{nullptr};
bool muted_{false};
@ -59,6 +72,8 @@ class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer {
uint8_t external_dac_channels_;
HighFrequencyLoopRequester high_freq_;
optional<std::string> current_url_{};
};
} // namespace i2s_audio

View file

@ -0,0 +1,41 @@
import esphome.config_validation as cv
import esphome.codegen as cg
from esphome import pins
from esphome.const import CONF_ID
from esphome.components import microphone
from .. import (
i2s_audio_ns,
I2SAudioComponent,
I2SAudioIn,
CONF_I2S_AUDIO_ID,
CONF_I2S_DIN_PIN,
)
CODEOWNERS = ["@jesserockz"]
DEPENDENCIES = ["i2s_audio"]
I2SAudioMicrophone = i2s_audio_ns.class_(
"I2SAudioMicrophone", I2SAudioIn, microphone.Microphone, cg.Component
)
CONFIG_SCHEMA = microphone.MICROPHONE_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(I2SAudioMicrophone),
cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent),
cv.Required(CONF_I2S_DIN_PIN): pins.internal_gpio_output_pin_number,
}
).extend(cv.COMPONENT_SCHEMA)
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
parent = await cg.get_variable(config[CONF_I2S_AUDIO_ID])
cg.add(parent.register_audio_in(var))
cg.add(var.set_din_pin(config[CONF_I2S_DIN_PIN]))
await microphone.register_microphone(var, config)

View file

@ -0,0 +1,101 @@
#include "i2s_audio_microphone.h"
#ifdef USE_ESP32
#include <driver/i2s.h>
#include "esphome/core/hal.h"
#include "esphome/core/log.h"
namespace esphome {
namespace i2s_audio {
static const size_t BUFFER_SIZE = 512;
static const char *const TAG = "i2s_audio.microphone";
void I2SAudioMicrophone::setup() {
ESP_LOGCONFIG(TAG, "Setting up I2S Audio Microphone...");
this->buffer_.resize(BUFFER_SIZE);
}
void I2SAudioMicrophone::start() { this->state_ = microphone::STATE_STARTING; }
void I2SAudioMicrophone::start_() {
if (!this->parent_->try_lock()) {
return; // Waiting for another i2s to return lock
}
i2s_driver_config_t config = {
.mode = (i2s_mode_t) (I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM),
.sample_rate = 16000,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ONLY_RIGHT,
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 4,
.dma_buf_len = 256,
.use_apll = false,
.tx_desc_auto_clear = false,
.fixed_mclk = 0,
.mclk_multiple = I2S_MCLK_MULTIPLE_DEFAULT,
.bits_per_chan = I2S_BITS_PER_CHAN_DEFAULT,
};
i2s_driver_install(this->parent_->get_port(), &config, 0, nullptr);
i2s_pin_config_t pin_config = this->parent_->get_pin_config();
pin_config.data_in_num = this->din_pin_;
i2s_set_pin(this->parent_->get_port(), &pin_config);
this->state_ = microphone::STATE_RUNNING;
this->high_freq_.start();
}
void I2SAudioMicrophone::stop() {
if (this->state_ == microphone::STATE_STOPPED)
return;
this->state_ = microphone::STATE_STOPPING;
}
void I2SAudioMicrophone::stop_() {
i2s_stop(this->parent_->get_port());
i2s_driver_uninstall(this->parent_->get_port());
this->parent_->unlock();
this->state_ = microphone::STATE_STOPPED;
this->high_freq_.stop();
}
void I2SAudioMicrophone::read_() {
size_t bytes_read = 0;
esp_err_t err =
i2s_read(this->parent_->get_port(), this->buffer_.data(), BUFFER_SIZE, &bytes_read, (100 / portTICK_PERIOD_MS));
if (err != ESP_OK) {
ESP_LOGW(TAG, "Error reading from I2S microphone: %s", esp_err_to_name(err));
this->status_set_warning();
return;
}
this->status_clear_warning();
this->data_callbacks_.call(this->buffer_);
}
void I2SAudioMicrophone::loop() {
switch (this->state_) {
case microphone::STATE_STOPPED:
break;
case microphone::STATE_STARTING:
this->start_();
break;
case microphone::STATE_RUNNING:
this->read_();
break;
case microphone::STATE_STOPPING:
this->stop_();
break;
}
}
} // namespace i2s_audio
} // namespace esphome
#endif // USE_ESP32

View file

@ -0,0 +1,37 @@
#pragma once
#ifdef USE_ESP32
#include "../i2s_audio.h"
#include "esphome/components/microphone/microphone.h"
#include "esphome/core/component.h"
namespace esphome {
namespace i2s_audio {
class I2SAudioMicrophone : public I2SAudioIn, public microphone::Microphone, public Component {
public:
void setup() override;
void start() override;
void stop() override;
void loop() override;
void set_din_pin(uint8_t pin) { this->din_pin_ = pin; }
protected:
void start_();
void stop_();
void read_();
uint8_t din_pin_{0};
std::vector<uint8_t> buffer_;
HighFrequencyLoopRequester high_freq_;
};
} // namespace i2s_audio
} // namespace esphome
#endif // USE_ESP32

View file

@ -21,7 +21,7 @@ std::string ImprovBase::get_formatted_next_url_() {
// Ip address
pos = this->next_url_.find("{{ip_address}}");
if (pos != std::string::npos) {
std::string ip = network::IPAddress(network::get_ip_address()).str();
std::string ip = network::get_ip_address().str();
copy.replace(pos, 14, ip);
}

View file

@ -0,0 +1,91 @@
from esphome import automation
import esphome.config_validation as cv
import esphome.codegen as cg
from esphome.automation import maybe_simple_id
from esphome.const import CONF_ID, CONF_TRIGGER_ID
from esphome.core import CORE
from esphome.coroutine import coroutine_with_priority
CODEOWNERS = ["@jesserockz"]
IS_PLATFORM_COMPONENT = True
CONF_ON_DATA = "on_data"
microphone_ns = cg.esphome_ns.namespace("microphone")
Microphone = microphone_ns.class_("Microphone")
CaptureAction = microphone_ns.class_(
"CaptureAction", automation.Action, cg.Parented.template(Microphone)
)
StopCaptureAction = microphone_ns.class_(
"StopCaptureAction", automation.Action, cg.Parented.template(Microphone)
)
DataTrigger = microphone_ns.class_(
"DataTrigger",
automation.Trigger.template(cg.std_vector.template(cg.int16).operator("ref")),
)
IsCapturingCondition = microphone_ns.class_(
"IsCapturingCondition", automation.Condition
)
async def setup_microphone_core_(var, config):
for conf in config.get(CONF_ON_DATA, []):
trigger = cg.new_Pvariable(conf[CONF_TRIGGER_ID], var)
await automation.build_automation(
trigger,
[(cg.std_vector.template(cg.uint8).operator("ref").operator("const"), "x")],
conf,
)
async def register_microphone(var, config):
if not CORE.has_id(config[CONF_ID]):
var = cg.Pvariable(config[CONF_ID], var)
await setup_microphone_core_(var, config)
MICROPHONE_SCHEMA = cv.Schema(
{
cv.Optional(CONF_ON_DATA): automation.validate_automation(
{
cv.GenerateID(CONF_TRIGGER_ID): cv.declare_id(DataTrigger),
}
),
}
)
MICROPHONE_ACTION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(Microphone)})
async def media_player_action(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
return var
automation.register_action(
"microphone.capture", CaptureAction, MICROPHONE_ACTION_SCHEMA
)(media_player_action)
automation.register_action(
"microphone.stop_capture", StopCaptureAction, MICROPHONE_ACTION_SCHEMA
)(media_player_action)
automation.register_condition(
"microphone.is_capturing", IsCapturingCondition, MICROPHONE_ACTION_SCHEMA
)(media_player_action)
@coroutine_with_priority(100.0)
async def to_code(config):
cg.add_global(microphone_ns.using)
cg.add_define("USE_MICROPHONE")

View file

@ -0,0 +1,32 @@
#pragma once
#include "esphome/core/automation.h"
#include "microphone.h"
#include <vector>
namespace esphome {
namespace microphone {
template<typename... Ts> class CaptureAction : public Action<Ts...>, public Parented<Microphone> {
void play(Ts... x) override { this->parent_->start(); }
};
template<typename... Ts> class StopCaptureAction : public Action<Ts...>, public Parented<Microphone> {
void play(Ts... x) override { this->parent_->stop(); }
};
class DataTrigger : public Trigger<const std::vector<uint8_t> &> {
public:
explicit DataTrigger(Microphone *mic) {
mic->add_data_callback([this](const std::vector<uint8_t> &data) { this->trigger(data); });
}
};
template<typename... Ts> class IsCapturingActon : public Condition<Ts...>, public Parented<Microphone> {
public:
bool check(Ts... x) override { return this->parent_->is_running(); }
};
} // namespace microphone
} // namespace esphome

View file

@ -0,0 +1,33 @@
#pragma once
#include "esphome/core/entity_base.h"
#include "esphome/core/helpers.h"
namespace esphome {
namespace microphone {
enum State : uint8_t {
STATE_STOPPED = 0,
STATE_STARTING,
STATE_RUNNING,
STATE_STOPPING,
};
class Microphone {
public:
virtual void start() = 0;
virtual void stop() = 0;
void add_data_callback(std::function<void(const std::vector<uint8_t> &)> &&data_callback) {
this->data_callbacks_.add(std::move(data_callback));
}
bool is_running() const { return this->state_ == STATE_RUNNING; }
protected:
State state_{STATE_STOPPED};
CallbackManager<void(const std::vector<uint8_t> &)> data_callbacks_{};
};
} // namespace microphone
} // namespace esphome

View file

@ -139,6 +139,11 @@ class BSDSocketImpl : public Socket {
return ::writev(fd_, iov, iovcnt);
#endif
}
ssize_t sendto(const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen) override {
return ::sendto(fd_, buf, len, flags, to, tolen);
}
int setblocking(bool blocking) override {
int fl = ::fcntl(fd_, F_GETFL, 0);
if (blocking) {

View file

@ -467,6 +467,10 @@ class LWIPRawImpl : public Socket {
}
return written;
}
ssize_t sendto(const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen) override {
// return ::sendto(fd_, buf, len, flags, to, tolen);
return 0;
}
int setblocking(bool blocking) override {
if (pcb_ == nullptr) {
errno = ECONNRESET;

View file

@ -1,7 +1,8 @@
#include "socket.h"
#include "esphome/core/log.h"
#include <cstring>
#include <cerrno>
#include <cstring>
#include <string>
#include "esphome/core/log.h"
namespace esphome {
namespace socket {
@ -14,7 +15,7 @@ std::unique_ptr<Socket> socket_ip(int type, int protocol) {
#endif
}
socklen_t set_sockaddr(struct sockaddr *addr, socklen_t addrlen, const char *ip_address, uint16_t port) {
socklen_t set_sockaddr(struct sockaddr *addr, socklen_t addrlen, const std::string &ip_address, uint16_t port) {
#if LWIP_IPV6
if (addrlen < sizeof(sockaddr_in6)) {
errno = EINVAL;
@ -24,9 +25,14 @@ socklen_t set_sockaddr(struct sockaddr *addr, socklen_t addrlen, const char *ip_
memset(server, 0, sizeof(sockaddr_in6));
server->sin6_family = AF_INET6;
server->sin6_port = htons(port);
if (ip_address.find('.') != std::string::npos) {
server->sin6_addr.un.u32_addr[3] = inet_addr(ip_address.c_str());
} else {
ip6_addr_t ip6;
inet6_aton(ip_address, &ip6);
inet6_aton(ip_address.c_str(), &ip6);
memcpy(server->sin6_addr.un.u32_addr, ip6.addr, sizeof(ip6.addr));
}
return sizeof(sockaddr_in6);
#else
if (addrlen < sizeof(sockaddr_in)) {
@ -36,7 +42,7 @@ socklen_t set_sockaddr(struct sockaddr *addr, socklen_t addrlen, const char *ip_
auto *server = reinterpret_cast<sockaddr_in *>(addr);
memset(server, 0, sizeof(sockaddr_in));
server->sin_family = AF_INET;
server->sin_addr.s_addr = inet_addr(ip_address);
server->sin_addr.s_addr = inet_addr(ip_address.c_str());
server->sin_port = htons(port);
return sizeof(sockaddr_in);
#endif

View file

@ -1,9 +1,9 @@
#pragma once
#include <string>
#include <memory>
#include <string>
#include "headers.h"
#include "esphome/core/optional.h"
#include "headers.h"
namespace esphome {
namespace socket {
@ -34,6 +34,8 @@ class Socket {
virtual ssize_t readv(const struct iovec *iov, int iovcnt) = 0;
virtual ssize_t write(const void *buf, size_t len) = 0;
virtual ssize_t writev(const struct iovec *iov, int iovcnt) = 0;
virtual ssize_t sendto(const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen);
virtual int setblocking(bool blocking) = 0;
virtual int loop() { return 0; };
};
@ -45,7 +47,7 @@ std::unique_ptr<Socket> socket(int domain, int type, int protocol);
std::unique_ptr<Socket> socket_ip(int type, int protocol);
/// Set a sockaddr to the specified address and port for the IP version used by socket_ip().
socklen_t set_sockaddr(struct sockaddr *addr, socklen_t addrlen, const char *ip_address, uint16_t port);
socklen_t set_sockaddr(struct sockaddr *addr, socklen_t addrlen, const std::string &ip_address, uint16_t port);
/// Set a sockaddr to the any address and specified port for the IP version used by socket_ip().
socklen_t set_sockaddr_any(struct sockaddr *addr, socklen_t addrlen, uint16_t port);

View file

@ -0,0 +1,57 @@
import esphome.config_validation as cv
import esphome.codegen as cg
from esphome.const import CONF_ID, CONF_MICROPHONE
from esphome import automation
from esphome.automation import register_action
from esphome.components import microphone
AUTO_LOAD = ["socket"]
DEPENDENCIES = ["api", "microphone"]
CODEOWNERS = ["@jesserockz"]
voice_assistant_ns = cg.esphome_ns.namespace("voice_assistant")
VoiceAssistant = voice_assistant_ns.class_("VoiceAssistant", cg.Component)
StartAction = voice_assistant_ns.class_(
"StartAction", automation.Action, cg.Parented.template(VoiceAssistant)
)
StopAction = voice_assistant_ns.class_(
"StopAction", automation.Action, cg.Parented.template(VoiceAssistant)
)
CONFIG_SCHEMA = cv.Schema(
{
cv.GenerateID(): cv.declare_id(VoiceAssistant),
cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone),
}
).extend(cv.COMPONENT_SCHEMA)
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
mic = await cg.get_variable(config[CONF_MICROPHONE])
cg.add(var.set_microphone(mic))
cg.add_define("USE_VOICE_ASSISTANT")
VOICE_ASSISTANT_ACTION_SCHEMA = cv.Schema({cv.GenerateID(): cv.use_id(VoiceAssistant)})
@register_action("voice_assistant.start", StartAction, VOICE_ASSISTANT_ACTION_SCHEMA)
async def voice_assistant_listen_to_code(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
return var
@register_action("voice_assistant.stop", StopAction, VOICE_ASSISTANT_ACTION_SCHEMA)
async def voice_assistant_stop_to_code(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
return var

View file

@ -0,0 +1,148 @@
#include "voice_assistant.h"
#include "esphome/core/log.h"
namespace esphome {
namespace voice_assistant {
static const char *const TAG = "voice_assistant";
float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; }
void VoiceAssistant::setup() {
ESP_LOGCONFIG(TAG, "Setting up Voice Assistant...");
global_voice_assistant = this;
this->socket_ = socket::socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
if (socket_ == nullptr) {
ESP_LOGW(TAG, "Could not create socket.");
this->mark_failed();
return;
}
int enable = 1;
int err = socket_->setsockopt(SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
if (err != 0) {
ESP_LOGW(TAG, "Socket unable to set reuseaddr: errno %d", err);
// we can still continue
}
err = socket_->setblocking(false);
if (err != 0) {
ESP_LOGW(TAG, "Socket unable to set nonblocking mode: errno %d", err);
this->mark_failed();
return;
}
this->mic_->add_data_callback([this](const std::vector<uint8_t> &data) {
if (!this->running_) {
return;
}
this->socket_->sendto(data.data(), data.size(), 0, (struct sockaddr *) &this->dest_addr_, sizeof(this->dest_addr_));
});
}
void VoiceAssistant::start(struct sockaddr_storage *addr, uint16_t port) {
ESP_LOGD(TAG, "Starting...");
memcpy(&this->dest_addr_, addr, sizeof(this->dest_addr_));
if (this->dest_addr_.ss_family == AF_INET) {
((struct sockaddr_in *) &this->dest_addr_)->sin_port = htons(port);
}
#if LWIP_IPV6
else if (this->dest_addr_.ss_family == AF_INET6) {
((struct sockaddr_in6 *) &this->dest_addr_)->sin6_port = htons(port);
}
#endif
else {
ESP_LOGW(TAG, "Unknown address family: %d", this->dest_addr_.ss_family);
return;
}
this->running_ = true;
this->mic_->start();
}
void VoiceAssistant::request_start() {
ESP_LOGD(TAG, "Requesting start...");
api::global_api_server->start_voice_assistant();
}
void VoiceAssistant::signal_stop() {
ESP_LOGD(TAG, "Signaling stop...");
this->mic_->stop();
this->running_ = false;
api::global_api_server->stop_voice_assistant();
memset(&this->dest_addr_, 0, sizeof(this->dest_addr_));
}
void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
switch (msg.event_type) {
case api::enums::VOICE_ASSISTANT_RUN_END:
ESP_LOGD(TAG, "Voice Assistant ended.");
break;
case api::enums::VOICE_ASSISTANT_STT_END: {
std::string text;
for (auto arg : msg.data) {
if (arg.name == "text") {
text = std::move(arg.value);
}
}
if (text.empty()) {
ESP_LOGW(TAG, "No text in STT_END event.");
return;
}
ESP_LOGD(TAG, "Speech recognised as: \"%s\"", text.c_str());
// TODO `on_stt_end` trigger
break;
}
case api::enums::VOICE_ASSISTANT_TTS_START: {
std::string text;
for (auto arg : msg.data) {
if (arg.name == "text") {
text = std::move(arg.value);
}
}
if (text.empty()) {
ESP_LOGW(TAG, "No text in TTS_START event.");
return;
}
ESP_LOGD(TAG, "Response: \"%s\"", text.c_str());
// TODO `on_tts_start` trigger
break;
}
case api::enums::VOICE_ASSISTANT_TTS_END: {
std::string url;
for (auto arg : msg.data) {
if (arg.name == "url") {
url = std::move(arg.value);
}
}
if (url.empty()) {
ESP_LOGW(TAG, "No url in TTS_END event.");
return;
}
ESP_LOGD(TAG, "Response URL: \"%s\"", url.c_str());
// TODO `on_tts_end` trigger
break;
}
case api::enums::VOICE_ASSISTANT_ERROR: {
std::string code = "";
std::string message = "";
for (auto arg : msg.data) {
if (arg.name == "code") {
code = std::move(arg.value);
} else if (arg.name == "message") {
message = std::move(arg.value);
}
}
ESP_LOGE(TAG, "Error: %s - %s", code.c_str(), message.c_str());
// TODO `on_error` trigger
}
default:
break;
}
}
VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
} // namespace voice_assistant
} // namespace esphome

View file

@ -0,0 +1,50 @@
#pragma once
#include "esphome/core/automation.h"
#include "esphome/core/component.h"
#include "esphome/core/helpers.h"
#include "esphome/components/api/api_pb2.h"
#include "esphome/components/api/api_server.h"
#include "esphome/components/microphone/microphone.h"
#include "esphome/components/socket/socket.h"
namespace esphome {
namespace voice_assistant {
class VoiceAssistant : public Component {
public:
void setup() override;
float get_setup_priority() const override;
void start(struct sockaddr_storage *addr, uint16_t port);
void set_microphone(microphone::Microphone *mic) { this->mic_ = mic; }
void request_start();
void signal_stop();
void on_event(const api::VoiceAssistantEventResponse &msg);
protected:
std::unique_ptr<socket::Socket> socket_ = nullptr;
struct sockaddr_storage dest_addr_;
microphone::Microphone *mic_{nullptr};
bool running_{false};
};
template<typename... Ts> class StartAction : public Action<Ts...>, public Parented<VoiceAssistant> {
public:
void play(Ts... x) override { this->parent_->request_start(); }
};
template<typename... Ts> class StopAction : public Action<Ts...>, public Parented<VoiceAssistant> {
public:
void play(Ts... x) override { this->parent_->signal_stop(); }
};
extern VoiceAssistant *global_voice_assistant; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables)
} // namespace voice_assistant
} // namespace esphome

View file

@ -394,6 +394,7 @@ CONF_MEASUREMENT_SEQUENCE_NUMBER = "measurement_sequence_number"
CONF_MEDIUM = "medium"
CONF_MEMORY_BLOCKS = "memory_blocks"
CONF_METHOD = "method"
CONF_MICROPHONE = "microphone"
CONF_MIN_COOLING_OFF_TIME = "min_cooling_off_time"
CONF_MIN_COOLING_RUN_TIME = "min_cooling_run_time"
CONF_MIN_FAN_MODE_SWITCHING_TIME = "min_fan_mode_switching_time"

View file

@ -72,6 +72,7 @@
#define USE_SOCKET_IMPL_BSD_SOCKETS
#define USE_WIFI_11KV_SUPPORT
#define USE_BLUETOOTH_PROXY
#define USE_VOICE_ASSISTANT
#ifdef USE_ARDUINO
#define USE_ARDUINO_VERSION_CODE VERSION_CODE(2, 0, 5)

View file

@ -395,6 +395,9 @@ template<typename T, enable_if_t<std::is_unsigned<T>::value, int> = 0> std::stri
val = convert_big_endian(val);
return format_hex(reinterpret_cast<uint8_t *>(&val), sizeof(T));
}
template<std::size_t N> std::string format_hex(const std::array<uint8_t, N> &data) {
return format_hex(data.data(), data.size());
}
/// Format the byte array \p data of length \p len in pretty-printed, human-readable hex.
std::string format_hex_pretty(const uint8_t *data, size_t length);

View file

@ -653,13 +653,15 @@ touchscreen:
format: Touch at (%d, %d)
args: [touch.x, touch.y]
i2s_audio:
i2s_lrclk_pin: GPIO26
i2s_bclk_pin: GPIO27
media_player:
- platform: i2s_audio
name: ${friendly_name}
name: None
dac_type: external
i2s_lrclk_pin: GPIO26
i2s_dout_pin: GPIO25
i2s_bclk_pin: GPIO27
mute_pin: GPIO14
on_state:
- media_player.play:
@ -685,3 +687,12 @@ prometheus:
ha_hello_world:
id: hellow_world
name: Hello World
microphone:
- platform: i2s_audio
id: mic_id
i2s_din_pin: GPIO23
voice_assistant:
microphone: mic_id