mirror of
https://github.com/esphome/esphome.git
synced 2024-12-22 13:34:54 +01:00
Add more debugging logs to microWakeWord (#6238)
This commit is contained in:
parent
27a3a081c3
commit
db9d837d29
3 changed files with 35 additions and 14 deletions
|
@ -261,7 +261,7 @@ CONFIG_SCHEMA = cv.All(
|
||||||
{
|
{
|
||||||
cv.GenerateID(): cv.declare_id(MicroWakeWord),
|
cv.GenerateID(): cv.declare_id(MicroWakeWord),
|
||||||
cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone),
|
cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone),
|
||||||
cv.Optional(CONF_PROBABILITY_CUTOFF): cv.float_,
|
cv.Optional(CONF_PROBABILITY_CUTOFF): cv.percentage,
|
||||||
cv.Optional(CONF_SLIDING_WINDOW_AVERAGE_SIZE): cv.positive_int,
|
cv.Optional(CONF_SLIDING_WINDOW_AVERAGE_SIZE): cv.positive_int,
|
||||||
cv.Optional(CONF_ON_WAKE_WORD_DETECTED): automation.validate_automation(
|
cv.Optional(CONF_ON_WAKE_WORD_DETECTED): automation.validate_automation(
|
||||||
single=True
|
single=True
|
||||||
|
|
|
@ -53,8 +53,15 @@ static const LogString *micro_wake_word_state_to_string(State state) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MicroWakeWord::dump_config() {
|
||||||
|
ESP_LOGCONFIG(TAG, "microWakeWord:");
|
||||||
|
ESP_LOGCONFIG(TAG, " Wake Word: %s", this->get_wake_word().c_str());
|
||||||
|
ESP_LOGCONFIG(TAG, " Probability cutoff: %.3f", this->probability_cutoff_);
|
||||||
|
ESP_LOGCONFIG(TAG, " Sliding window size: %d", this->sliding_window_average_size_);
|
||||||
|
}
|
||||||
|
|
||||||
void MicroWakeWord::setup() {
|
void MicroWakeWord::setup() {
|
||||||
ESP_LOGCONFIG(TAG, "Setting up Micro Wake Word...");
|
ESP_LOGCONFIG(TAG, "Setting up microWakeWord...");
|
||||||
|
|
||||||
if (!this->initialize_models()) {
|
if (!this->initialize_models()) {
|
||||||
ESP_LOGE(TAG, "Failed to initialize models");
|
ESP_LOGE(TAG, "Failed to initialize models");
|
||||||
|
@ -63,7 +70,7 @@ void MicroWakeWord::setup() {
|
||||||
}
|
}
|
||||||
|
|
||||||
ExternalRAMAllocator<int16_t> allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
ExternalRAMAllocator<int16_t> allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
||||||
this->input_buffer_ = allocator.allocate(NEW_SAMPLES_TO_GET);
|
this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE * sizeof(int16_t));
|
||||||
if (this->input_buffer_ == nullptr) {
|
if (this->input_buffer_ == nullptr) {
|
||||||
ESP_LOGW(TAG, "Could not allocate input buffer");
|
ESP_LOGW(TAG, "Could not allocate input buffer");
|
||||||
this->mark_failed();
|
this->mark_failed();
|
||||||
|
@ -81,7 +88,7 @@ void MicroWakeWord::setup() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int MicroWakeWord::read_microphone_() {
|
int MicroWakeWord::read_microphone_() {
|
||||||
size_t bytes_read = this->microphone_->read(this->input_buffer_, NEW_SAMPLES_TO_GET * sizeof(int16_t));
|
size_t bytes_read = this->microphone_->read(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t));
|
||||||
if (bytes_read == 0) {
|
if (bytes_read == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -279,11 +286,6 @@ bool MicroWakeWord::initialize_models() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MicroWakeWord::update_features_() {
|
bool MicroWakeWord::update_features_() {
|
||||||
// Verify we have enough samples for a feature slice
|
|
||||||
if (!this->slice_available_()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Retrieve strided audio samples
|
// Retrieve strided audio samples
|
||||||
int16_t *audio_samples = nullptr;
|
int16_t *audio_samples = nullptr;
|
||||||
if (!this->stride_audio_samples_(&audio_samples)) {
|
if (!this->stride_audio_samples_(&audio_samples)) {
|
||||||
|
@ -369,20 +371,36 @@ void MicroWakeWord::set_sliding_window_average_size(size_t size) {
|
||||||
bool MicroWakeWord::slice_available_() {
|
bool MicroWakeWord::slice_available_() {
|
||||||
size_t available = this->ring_buffer_->available();
|
size_t available = this->ring_buffer_->available();
|
||||||
|
|
||||||
|
size_t free = this->ring_buffer_->free();
|
||||||
|
|
||||||
|
if (free < NEW_SAMPLES_TO_GET * sizeof(int16_t)) {
|
||||||
|
// If the ring buffer is within one audio slice of being full, then wake word detection will have issues.
|
||||||
|
// If this is constantly occuring, then some possibilities why are
|
||||||
|
// 1) there are too many other slow components configured
|
||||||
|
// 2) the ESP32 isn't fast enough; e.g., an ESP32 is much slower than an ESP32-S3 at inferences.
|
||||||
|
// 3) the model is too large
|
||||||
|
// 4) the model uses operations that are not optimized
|
||||||
|
ESP_LOGW(TAG,
|
||||||
|
"Audio buffer is nearly full. Wake word detection may be less accurate and have slower reponse times. "
|
||||||
|
#if !defined(USE_ESP32_VARIANT_ESP32S3)
|
||||||
|
"microWakeWord is designed for the ESP32-S3. The current platform is too slow for this model."
|
||||||
|
#endif
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
return available > (NEW_SAMPLES_TO_GET * sizeof(int16_t));
|
return available > (NEW_SAMPLES_TO_GET * sizeof(int16_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MicroWakeWord::stride_audio_samples_(int16_t **audio_samples) {
|
bool MicroWakeWord::stride_audio_samples_(int16_t **audio_samples) {
|
||||||
|
if (!this->slice_available_()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Copy 320 bytes (160 samples over 10 ms) into preprocessor_audio_buffer_ from history in
|
// Copy 320 bytes (160 samples over 10 ms) into preprocessor_audio_buffer_ from history in
|
||||||
// preprocessor_stride_buffer_
|
// preprocessor_stride_buffer_
|
||||||
memcpy((void *) (this->preprocessor_audio_buffer_), (void *) (this->preprocessor_stride_buffer_),
|
memcpy((void *) (this->preprocessor_audio_buffer_), (void *) (this->preprocessor_stride_buffer_),
|
||||||
HISTORY_SAMPLES_TO_KEEP * sizeof(int16_t));
|
HISTORY_SAMPLES_TO_KEEP * sizeof(int16_t));
|
||||||
|
|
||||||
if (this->ring_buffer_->available() < NEW_SAMPLES_TO_GET * sizeof(int16_t)) {
|
|
||||||
ESP_LOGD(TAG, "Audio Buffer not full enough");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy 640 bytes (320 samples over 20 ms) from the ring buffer
|
// Copy 640 bytes (320 samples over 20 ms) from the ring buffer
|
||||||
// The first 320 bytes (160 samples over 10 ms) will be from history
|
// The first 320 bytes (160 samples over 10 ms) will be from history
|
||||||
size_t bytes_read = this->ring_buffer_->read((void *) (this->preprocessor_audio_buffer_ + HISTORY_SAMPLES_TO_KEEP),
|
size_t bytes_read = this->ring_buffer_->read((void *) (this->preprocessor_audio_buffer_ + HISTORY_SAMPLES_TO_KEEP),
|
||||||
|
|
|
@ -66,6 +66,7 @@ class MicroWakeWord : public Component {
|
||||||
void setup() override;
|
void setup() override;
|
||||||
void loop() override;
|
void loop() override;
|
||||||
float get_setup_priority() const override;
|
float get_setup_priority() const override;
|
||||||
|
void dump_config() override;
|
||||||
|
|
||||||
void start();
|
void start();
|
||||||
void stop();
|
void stop();
|
||||||
|
@ -74,6 +75,8 @@ class MicroWakeWord : public Component {
|
||||||
|
|
||||||
bool initialize_models();
|
bool initialize_models();
|
||||||
|
|
||||||
|
std::string get_wake_word() { return this->wake_word_; }
|
||||||
|
|
||||||
// Increasing either of these will reduce the rate of false acceptances while increasing the false rejection rate
|
// Increasing either of these will reduce the rate of false acceptances while increasing the false rejection rate
|
||||||
void set_probability_cutoff(float probability_cutoff) { this->probability_cutoff_ = probability_cutoff; }
|
void set_probability_cutoff(float probability_cutoff) { this->probability_cutoff_ = probability_cutoff; }
|
||||||
void set_sliding_window_average_size(size_t size);
|
void set_sliding_window_average_size(size_t size);
|
||||||
|
|
Loading…
Reference in a new issue