mirror of
https://github.com/esphome/esphome.git
synced 2024-12-24 14:34:54 +01:00
add nabu media player component
This commit is contained in:
parent
4fa3c6915c
commit
5d41593db2
16 changed files with 3424 additions and 0 deletions
0
esphome/components/nabu/__init__.py
Normal file
0
esphome/components/nabu/__init__.py
Normal file
384
esphome/components/nabu/audio_decoder.cpp
Normal file
384
esphome/components/nabu/audio_decoder.cpp
Normal file
|
@ -0,0 +1,384 @@
|
|||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "audio_decoder.h"
|
||||
|
||||
#include "mp3_decoder.h"
|
||||
|
||||
#include "esphome/core/ring_buffer.h"
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
static const size_t READ_WRITE_TIMEOUT_MS = 20;
|
||||
|
||||
AudioDecoder::AudioDecoder(RingBuffer *input_ring_buffer, RingBuffer *output_ring_buffer, size_t internal_buffer_size) {
|
||||
this->input_ring_buffer_ = input_ring_buffer;
|
||||
this->output_ring_buffer_ = output_ring_buffer;
|
||||
this->internal_buffer_size_ = internal_buffer_size;
|
||||
}
|
||||
|
||||
AudioDecoder::~AudioDecoder() {
|
||||
ExternalRAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
|
||||
if (this->input_buffer_ != nullptr) {
|
||||
allocator.deallocate(this->input_buffer_, this->internal_buffer_size_);
|
||||
}
|
||||
if (this->output_buffer_ != nullptr) {
|
||||
allocator.deallocate(this->output_buffer_, this->internal_buffer_size_);
|
||||
}
|
||||
|
||||
if (this->flac_decoder_ != nullptr) {
|
||||
this->flac_decoder_->free_buffers();
|
||||
this->flac_decoder_.reset(); // Free the unique_ptr
|
||||
this->flac_decoder_ = nullptr;
|
||||
}
|
||||
|
||||
if (this->media_file_type_ == MediaFileType::MP3) {
|
||||
MP3FreeDecoder(this->mp3_decoder_);
|
||||
}
|
||||
|
||||
if (this->wav_decoder_ != nullptr) {
|
||||
this->wav_decoder_.reset(); // Free the unique_ptr
|
||||
this->wav_decoder_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
esp_err_t AudioDecoder::start(MediaFileType media_file_type) {
|
||||
esp_err_t err = this->allocate_buffers_();
|
||||
|
||||
if (err != ESP_OK) {
|
||||
return err;
|
||||
}
|
||||
|
||||
this->media_file_type_ = media_file_type;
|
||||
|
||||
this->input_buffer_current_ = this->input_buffer_;
|
||||
this->input_buffer_length_ = 0;
|
||||
this->output_buffer_current_ = this->output_buffer_;
|
||||
this->output_buffer_length_ = 0;
|
||||
|
||||
this->potentially_failed_count_ = 0;
|
||||
this->end_of_file_ = false;
|
||||
|
||||
switch (this->media_file_type_) {
|
||||
case MediaFileType::FLAC:
|
||||
this->flac_decoder_ = make_unique<flac::FLACDecoder>(this->input_buffer_);
|
||||
break;
|
||||
case MediaFileType::MP3:
|
||||
this->mp3_decoder_ = MP3InitDecoder();
|
||||
break;
|
||||
case MediaFileType::WAV:
|
||||
this->wav_decoder_ = make_unique<wav_decoder::WAVDecoder>(&this->input_buffer_current_);
|
||||
this->wav_decoder_->reset();
|
||||
break;
|
||||
case MediaFileType::NONE:
|
||||
return ESP_ERR_NOT_SUPPORTED;
|
||||
break;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
|
||||
if (stop_gracefully) {
|
||||
if (this->output_buffer_length_ == 0) {
|
||||
// If the file decoder believes it the end of file
|
||||
if (this->end_of_file_) {
|
||||
return AudioDecoderState::FINISHED;
|
||||
}
|
||||
// If all the internal buffers are empty, the decoding is done
|
||||
if ((this->input_ring_buffer_->available() == 0) && (this->input_buffer_length_ == 0)) {
|
||||
return AudioDecoderState::FINISHED;
|
||||
}
|
||||
|
||||
// If the ring buffer has no new data and the decoding failed last time, mark done
|
||||
if ((this->input_ring_buffer_->available() == 0) && (this->potentially_failed_count_ > 0)) {
|
||||
return AudioDecoderState::FINISHED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (this->potentially_failed_count_ > 10) {
|
||||
return AudioDecoderState::FAILED;
|
||||
}
|
||||
|
||||
FileDecoderState state = FileDecoderState::MORE_TO_PROCESS;
|
||||
|
||||
while (state == FileDecoderState::MORE_TO_PROCESS) {
|
||||
if (this->output_buffer_length_ > 0) {
|
||||
// Have decoded data, write it to the output ring buffer
|
||||
|
||||
size_t bytes_to_write = this->output_buffer_length_;
|
||||
|
||||
if (bytes_to_write > 0) {
|
||||
size_t bytes_written = this->output_ring_buffer_->write_without_replacement(
|
||||
(void *) this->output_buffer_current_, bytes_to_write, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
|
||||
|
||||
this->output_buffer_length_ -= bytes_written;
|
||||
this->output_buffer_current_ += bytes_written;
|
||||
}
|
||||
|
||||
if (this->output_buffer_length_ > 0) {
|
||||
// Output buffer still has decoded audio to write
|
||||
return AudioDecoderState::DECODING;
|
||||
}
|
||||
} else {
|
||||
// Decode more data
|
||||
|
||||
// Shift unread data in input buffer to start
|
||||
if (this->input_buffer_length_ > 0) {
|
||||
memmove(this->input_buffer_, this->input_buffer_current_, this->input_buffer_length_);
|
||||
}
|
||||
this->input_buffer_current_ = this->input_buffer_;
|
||||
|
||||
// read in new ring buffer data to fill the remaining input buffer
|
||||
size_t bytes_read = 0;
|
||||
|
||||
size_t bytes_to_read = this->internal_buffer_size_ - this->input_buffer_length_;
|
||||
|
||||
if (bytes_to_read > 0) {
|
||||
uint8_t *new_audio_data = this->input_buffer_ + this->input_buffer_length_;
|
||||
bytes_read = this->input_ring_buffer_->read((void *) new_audio_data, bytes_to_read,
|
||||
pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
|
||||
|
||||
this->input_buffer_length_ += bytes_read;
|
||||
}
|
||||
|
||||
if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) {
|
||||
// Failed to decode in last attempt and there is no new data
|
||||
|
||||
if (bytes_to_read == 0) {
|
||||
// The input buffer is full. Since it previously failed on the exact same data, we can never recover
|
||||
state = FileDecoderState::FAILED;
|
||||
} else {
|
||||
// Attempt to get more data next time
|
||||
state = FileDecoderState::IDLE;
|
||||
}
|
||||
} else if (this->input_buffer_length_ == 0) {
|
||||
// No data to decode, attempt to get more data next time
|
||||
state = FileDecoderState::IDLE;
|
||||
} else {
|
||||
switch (this->media_file_type_) {
|
||||
case MediaFileType::FLAC:
|
||||
state = this->decode_flac_();
|
||||
break;
|
||||
case MediaFileType::MP3:
|
||||
state = this->decode_mp3_();
|
||||
break;
|
||||
case MediaFileType::WAV:
|
||||
state = this->decode_wav_();
|
||||
break;
|
||||
case MediaFileType::NONE:
|
||||
state = FileDecoderState::IDLE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (state == FileDecoderState::POTENTIALLY_FAILED) {
|
||||
++this->potentially_failed_count_;
|
||||
} else if (state == FileDecoderState::END_OF_FILE) {
|
||||
this->end_of_file_ = true;
|
||||
} else if (state == FileDecoderState::FAILED) {
|
||||
return AudioDecoderState::FAILED;
|
||||
} else if ((state == FileDecoderState::MORE_TO_PROCESS)) {
|
||||
this->potentially_failed_count_ = 0;
|
||||
}
|
||||
}
|
||||
return AudioDecoderState::DECODING;
|
||||
}
|
||||
|
||||
esp_err_t AudioDecoder::allocate_buffers_() {
|
||||
ExternalRAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
|
||||
|
||||
if (this->input_buffer_ == nullptr)
|
||||
this->input_buffer_ = allocator.allocate(this->internal_buffer_size_);
|
||||
|
||||
if (this->output_buffer_ == nullptr)
|
||||
this->output_buffer_ = allocator.allocate(this->internal_buffer_size_);
|
||||
|
||||
if ((this->input_buffer_ == nullptr) || (this->output_buffer_ == nullptr)) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
FileDecoderState AudioDecoder::decode_flac_() {
|
||||
if (!this->audio_stream_info_.has_value()) {
|
||||
// Header hasn't been read
|
||||
auto result = this->flac_decoder_->read_header(this->input_buffer_length_);
|
||||
|
||||
if (result == flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
|
||||
return FileDecoderState::POTENTIALLY_FAILED;
|
||||
}
|
||||
|
||||
if (result != flac::FLAC_DECODER_SUCCESS) {
|
||||
// Couldn't read FLAC header
|
||||
return FileDecoderState::FAILED;
|
||||
}
|
||||
|
||||
size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
|
||||
this->input_buffer_current_ += bytes_consumed;
|
||||
this->input_buffer_length_ = this->flac_decoder_->get_bytes_left();
|
||||
|
||||
size_t flac_decoder_output_buffer_min_size = flac_decoder_->get_output_buffer_size();
|
||||
if (this->internal_buffer_size_ < flac_decoder_output_buffer_min_size * sizeof(int16_t)) {
|
||||
// Output buffer is not big enough
|
||||
return FileDecoderState::FAILED;
|
||||
}
|
||||
|
||||
audio::AudioStreamInfo audio_stream_info;
|
||||
audio_stream_info.channels = this->flac_decoder_->get_num_channels();
|
||||
audio_stream_info.sample_rate = this->flac_decoder_->get_sample_rate();
|
||||
audio_stream_info.bits_per_sample = this->flac_decoder_->get_sample_depth();
|
||||
|
||||
this->audio_stream_info_ = audio_stream_info;
|
||||
|
||||
return FileDecoderState::MORE_TO_PROCESS;
|
||||
}
|
||||
|
||||
uint32_t output_samples = 0;
|
||||
auto result =
|
||||
this->flac_decoder_->decode_frame(this->input_buffer_length_, (int16_t *) this->output_buffer_, &output_samples);
|
||||
|
||||
if (result == flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
|
||||
// Not an issue, just needs more data that we'll get next time.
|
||||
return FileDecoderState::POTENTIALLY_FAILED;
|
||||
} else if (result > flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
|
||||
// Corrupted frame, don't retry with current buffer content, wait for new sync
|
||||
size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
|
||||
this->input_buffer_current_ += bytes_consumed;
|
||||
this->input_buffer_length_ = this->flac_decoder_->get_bytes_left();
|
||||
|
||||
return FileDecoderState::POTENTIALLY_FAILED;
|
||||
}
|
||||
|
||||
// We have successfully decoded some input data and have new output data
|
||||
size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
|
||||
this->input_buffer_current_ += bytes_consumed;
|
||||
this->input_buffer_length_ = this->flac_decoder_->get_bytes_left();
|
||||
|
||||
this->output_buffer_current_ = this->output_buffer_;
|
||||
this->output_buffer_length_ = output_samples * sizeof(int16_t);
|
||||
|
||||
if (result == flac::FLAC_DECODER_NO_MORE_FRAMES) {
|
||||
return FileDecoderState::END_OF_FILE;
|
||||
}
|
||||
|
||||
return FileDecoderState::IDLE;
|
||||
}
|
||||
|
||||
FileDecoderState AudioDecoder::decode_mp3_() {
|
||||
// Look for the next sync word
|
||||
int32_t offset = MP3FindSyncWord(this->input_buffer_current_, this->input_buffer_length_);
|
||||
if (offset < 0) {
|
||||
// We may recover if we have more data
|
||||
return FileDecoderState::POTENTIALLY_FAILED;
|
||||
}
|
||||
|
||||
// Advance read pointer
|
||||
this->input_buffer_current_ += offset;
|
||||
this->input_buffer_length_ -= offset;
|
||||
|
||||
int err = MP3Decode(this->mp3_decoder_, &this->input_buffer_current_, (int *) &this->input_buffer_length_,
|
||||
(int16_t *) this->output_buffer_, 0);
|
||||
if (err) {
|
||||
switch (err) {
|
||||
case ERR_MP3_MAINDATA_UNDERFLOW:
|
||||
// Not a problem. Next call to decode will provide more data.
|
||||
return FileDecoderState::POTENTIALLY_FAILED;
|
||||
break;
|
||||
default:
|
||||
return FileDecoderState::FAILED;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
MP3FrameInfo mp3_frame_info;
|
||||
MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
|
||||
if (mp3_frame_info.outputSamps > 0) {
|
||||
int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
|
||||
this->output_buffer_length_ = mp3_frame_info.outputSamps * bytes_per_sample;
|
||||
this->output_buffer_current_ = this->output_buffer_;
|
||||
|
||||
audio::AudioStreamInfo stream_info;
|
||||
stream_info.channels = mp3_frame_info.nChans;
|
||||
stream_info.sample_rate = mp3_frame_info.samprate;
|
||||
stream_info.bits_per_sample = mp3_frame_info.bitsPerSample;
|
||||
this->audio_stream_info_ = stream_info;
|
||||
}
|
||||
}
|
||||
|
||||
return FileDecoderState::MORE_TO_PROCESS;
|
||||
}
|
||||
|
||||
FileDecoderState AudioDecoder::decode_wav_() {
|
||||
if (!this->audio_stream_info_.has_value() && (this->input_buffer_length_ > 44)) {
|
||||
// Header hasn't been processed
|
||||
|
||||
size_t original_buffer_length = this->input_buffer_length_;
|
||||
|
||||
size_t wav_bytes_to_skip = this->wav_decoder_->bytes_to_skip();
|
||||
size_t wav_bytes_to_read = this->wav_decoder_->bytes_needed();
|
||||
|
||||
bool header_finished = false;
|
||||
while (!header_finished) {
|
||||
if (wav_bytes_to_skip > 0) {
|
||||
// Adjust pointer to skip the appropriate bytes
|
||||
this->input_buffer_current_ += wav_bytes_to_skip;
|
||||
this->input_buffer_length_ -= wav_bytes_to_skip;
|
||||
wav_bytes_to_skip = 0;
|
||||
} else if (wav_bytes_to_read > 0) {
|
||||
wav_decoder::WAVDecoderResult result = this->wav_decoder_->next();
|
||||
this->input_buffer_current_ += wav_bytes_to_read;
|
||||
this->input_buffer_length_ -= wav_bytes_to_read;
|
||||
|
||||
if (result == wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {
|
||||
// Header parsing is complete
|
||||
|
||||
// Assume PCM
|
||||
audio::AudioStreamInfo audio_stream_info;
|
||||
audio_stream_info.channels = this->wav_decoder_->num_channels();
|
||||
audio_stream_info.sample_rate = this->wav_decoder_->sample_rate();
|
||||
audio_stream_info.bits_per_sample = this->wav_decoder_->bits_per_sample();
|
||||
this->audio_stream_info_ = audio_stream_info;
|
||||
this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
|
||||
header_finished = true;
|
||||
} else if (result == wav_decoder::WAV_DECODER_SUCCESS_NEXT) {
|
||||
// Continue parsing header
|
||||
wav_bytes_to_skip = this->wav_decoder_->bytes_to_skip();
|
||||
wav_bytes_to_read = this->wav_decoder_->bytes_needed();
|
||||
} else {
|
||||
// Unexpected error parsing the wav header
|
||||
return FileDecoderState::FAILED;
|
||||
}
|
||||
} else {
|
||||
// Something unexpected has happened
|
||||
// Reset state and hope we have enough info next time
|
||||
this->input_buffer_length_ = original_buffer_length;
|
||||
this->input_buffer_current_ = this->input_buffer_;
|
||||
return FileDecoderState::POTENTIALLY_FAILED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (this->wav_bytes_left_ > 0) {
|
||||
size_t bytes_to_write = std::min(this->wav_bytes_left_, this->input_buffer_length_);
|
||||
bytes_to_write = std::min(bytes_to_write, this->internal_buffer_size_);
|
||||
if (bytes_to_write > 0) {
|
||||
std::memcpy(this->output_buffer_, this->input_buffer_current_, bytes_to_write);
|
||||
this->input_buffer_current_ += bytes_to_write;
|
||||
this->input_buffer_length_ -= bytes_to_write;
|
||||
this->output_buffer_current_ = this->output_buffer_;
|
||||
this->output_buffer_length_ = bytes_to_write;
|
||||
this->wav_bytes_left_ -= bytes_to_write;
|
||||
}
|
||||
|
||||
return FileDecoderState::IDLE;
|
||||
}
|
||||
|
||||
return FileDecoderState::END_OF_FILE;
|
||||
}
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
81
esphome/components/nabu/audio_decoder.h
Normal file
81
esphome/components/nabu/audio_decoder.h
Normal file
|
@ -0,0 +1,81 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include <flac_decoder.h>
|
||||
#include <wav_decoder.h>
|
||||
#include <mp3_decoder.h>
|
||||
|
||||
#include "nabu_media_helpers.h"
|
||||
#include "esphome/components/audio/audio.h"
|
||||
|
||||
#include "esphome/core/helpers.h"
|
||||
#include "esphome/core/ring_buffer.h"
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
enum class AudioDecoderState : uint8_t {
|
||||
INITIALIZED = 0,
|
||||
DECODING,
|
||||
FINISHED,
|
||||
FAILED,
|
||||
};
|
||||
|
||||
// Only used within the AudioDecoder class; conveys the state of the particular file type decoder
|
||||
enum class FileDecoderState : uint8_t {
|
||||
MORE_TO_PROCESS,
|
||||
IDLE,
|
||||
POTENTIALLY_FAILED,
|
||||
FAILED,
|
||||
END_OF_FILE,
|
||||
};
|
||||
|
||||
class AudioDecoder {
|
||||
public:
|
||||
AudioDecoder(esphome::RingBuffer *input_ring_buffer, esphome::RingBuffer *output_ring_buffer,
|
||||
size_t internal_buffer_size);
|
||||
~AudioDecoder();
|
||||
|
||||
esp_err_t start(MediaFileType media_file_type);
|
||||
|
||||
AudioDecoderState decode(bool stop_gracefully);
|
||||
|
||||
const optional<audio::AudioStreamInfo> &get_audio_stream_info() const { return this->audio_stream_info_; }
|
||||
|
||||
protected:
|
||||
esp_err_t allocate_buffers_();
|
||||
|
||||
FileDecoderState decode_flac_();
|
||||
FileDecoderState decode_mp3_();
|
||||
FileDecoderState decode_wav_();
|
||||
|
||||
esphome::RingBuffer *input_ring_buffer_;
|
||||
esphome::RingBuffer *output_ring_buffer_;
|
||||
size_t internal_buffer_size_;
|
||||
|
||||
uint8_t *input_buffer_{nullptr};
|
||||
uint8_t *input_buffer_current_{nullptr};
|
||||
size_t input_buffer_length_;
|
||||
|
||||
uint8_t *output_buffer_{nullptr};
|
||||
uint8_t *output_buffer_current_{nullptr};
|
||||
size_t output_buffer_length_;
|
||||
|
||||
std::unique_ptr<flac::FLACDecoder> flac_decoder_;
|
||||
|
||||
HMP3Decoder mp3_decoder_;
|
||||
|
||||
std::unique_ptr<wav_decoder::WAVDecoder> wav_decoder_;
|
||||
size_t wav_bytes_left_;
|
||||
|
||||
MediaFileType media_file_type_{MediaFileType::NONE};
|
||||
optional<audio::AudioStreamInfo> audio_stream_info_{};
|
||||
|
||||
size_t potentially_failed_count_{0};
|
||||
bool end_of_file_{false};
|
||||
};
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
382
esphome/components/nabu/audio_mixer.cpp
Normal file
382
esphome/components/nabu/audio_mixer.cpp
Normal file
|
@ -0,0 +1,382 @@
|
|||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "audio_mixer.h"
|
||||
|
||||
#include <dsp.h>
|
||||
|
||||
#include "esphome/core/hal.h"
|
||||
#include "esphome/core/helpers.h"
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
static const size_t INPUT_RING_BUFFER_SAMPLES = 24000;
|
||||
static const size_t OUTPUT_BUFFER_SAMPLES = 8192;
|
||||
static const size_t QUEUE_COUNT = 20;
|
||||
|
||||
static const uint32_t TASK_STACK_SIZE = 3072;
|
||||
static const size_t TASK_DELAY_MS = 25;
|
||||
|
||||
static const int16_t MAX_AUDIO_SAMPLE_VALUE = INT16_MAX;
|
||||
static const int16_t MIN_AUDIO_SAMPLE_VALUE = INT16_MIN;
|
||||
|
||||
esp_err_t AudioMixer::start(speaker::Speaker *speaker, const std::string &task_name, UBaseType_t priority) {
|
||||
esp_err_t err = this->allocate_buffers_();
|
||||
|
||||
if (err != ESP_OK) {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (this->task_handle_ == nullptr) {
|
||||
this->task_handle_ = xTaskCreateStatic(AudioMixer::audio_mixer_task_, task_name.c_str(), TASK_STACK_SIZE,
|
||||
(void *) this, priority, this->stack_buffer_, &this->task_stack_);
|
||||
}
|
||||
|
||||
if (this->task_handle_ == nullptr) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
this->speaker_ = speaker;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
void AudioMixer::stop() {
|
||||
vTaskDelete(this->task_handle_);
|
||||
this->task_handle_ = nullptr;
|
||||
|
||||
xQueueReset(this->event_queue_);
|
||||
xQueueReset(this->command_queue_);
|
||||
}
|
||||
|
||||
void AudioMixer::suspend_task() {
|
||||
if (this->task_handle_ != nullptr) {
|
||||
vTaskSuspend(this->task_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMixer::resume_task() {
|
||||
if (this->task_handle_ != nullptr) {
|
||||
vTaskResume(task_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMixer::audio_mixer_task_(void *params) {
|
||||
AudioMixer *this_mixer = (AudioMixer *) params;
|
||||
|
||||
TaskEvent event;
|
||||
CommandEvent command_event;
|
||||
|
||||
ExternalRAMAllocator<int16_t> allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
||||
int16_t *media_buffer = allocator.allocate(OUTPUT_BUFFER_SAMPLES);
|
||||
int16_t *announcement_buffer = allocator.allocate(OUTPUT_BUFFER_SAMPLES);
|
||||
int16_t *combination_buffer = allocator.allocate(OUTPUT_BUFFER_SAMPLES);
|
||||
|
||||
int16_t *combination_buffer_current = combination_buffer;
|
||||
size_t combination_buffer_length = 0;
|
||||
|
||||
if ((media_buffer == nullptr) || (announcement_buffer == nullptr)) {
|
||||
event.type = EventType::WARNING;
|
||||
event.err = ESP_ERR_NO_MEM;
|
||||
xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY);
|
||||
|
||||
event.type = EventType::STOPPED;
|
||||
event.err = ESP_OK;
|
||||
xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY);
|
||||
|
||||
while (true) {
|
||||
delay(TASK_DELAY_MS);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// Handles media stream pausing
|
||||
bool transfer_media = true;
|
||||
|
||||
// Parameters to control the ducking dB reduction and its transitions
|
||||
// There is a built in negative sign; e.g., reducing by 5 dB is changing the gain by -5 dB
|
||||
int8_t target_ducking_db_reduction = 0;
|
||||
int8_t current_ducking_db_reduction = 0;
|
||||
|
||||
// Each step represents a change in 1 dB. Positive 1 means the dB reduction is increasing. Negative 1 means the dB
|
||||
// reduction is decreasing.
|
||||
int8_t db_change_per_ducking_step = 1;
|
||||
|
||||
size_t ducking_transition_samples_remaining = 0;
|
||||
size_t samples_per_ducking_step = 0;
|
||||
|
||||
event.type = EventType::STARTED;
|
||||
xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY);
|
||||
|
||||
while (true) {
|
||||
if (xQueueReceive(this_mixer->command_queue_, &command_event, 0) == pdTRUE) {
|
||||
if (command_event.command == CommandEventType::STOP) {
|
||||
break;
|
||||
} else if (command_event.command == CommandEventType::DUCK) {
|
||||
if (target_ducking_db_reduction != command_event.decibel_reduction) {
|
||||
current_ducking_db_reduction = target_ducking_db_reduction;
|
||||
|
||||
target_ducking_db_reduction = command_event.decibel_reduction;
|
||||
|
||||
uint8_t total_ducking_steps = 0;
|
||||
if (target_ducking_db_reduction > current_ducking_db_reduction) {
|
||||
// The dB reduction level is increasing (which results in quieter audio)
|
||||
total_ducking_steps = target_ducking_db_reduction - current_ducking_db_reduction - 1;
|
||||
db_change_per_ducking_step = 1;
|
||||
} else {
|
||||
// The dB reduction level is decreasing (which results in louder audio)
|
||||
total_ducking_steps = current_ducking_db_reduction - target_ducking_db_reduction - 1;
|
||||
db_change_per_ducking_step = -1;
|
||||
}
|
||||
if (total_ducking_steps > 0) {
|
||||
ducking_transition_samples_remaining = command_event.transition_samples;
|
||||
|
||||
samples_per_ducking_step = ducking_transition_samples_remaining / total_ducking_steps;
|
||||
} else {
|
||||
ducking_transition_samples_remaining = 0;
|
||||
}
|
||||
}
|
||||
} else if (command_event.command == CommandEventType::PAUSE_MEDIA) {
|
||||
transfer_media = false;
|
||||
} else if (command_event.command == CommandEventType::RESUME_MEDIA) {
|
||||
transfer_media = true;
|
||||
} else if (command_event.command == CommandEventType::CLEAR_MEDIA) {
|
||||
this_mixer->media_ring_buffer_->reset();
|
||||
} else if (command_event.command == CommandEventType::CLEAR_ANNOUNCEMENT) {
|
||||
this_mixer->announcement_ring_buffer_->reset();
|
||||
}
|
||||
}
|
||||
|
||||
if (combination_buffer_length > 0) {
|
||||
size_t output_bytes_written = this_mixer->speaker_->play((uint8_t *) combination_buffer,
|
||||
combination_buffer_length, pdMS_TO_TICKS(TASK_DELAY_MS));
|
||||
combination_buffer_length -= output_bytes_written;
|
||||
if ((combination_buffer_length > 0) && (output_bytes_written > 0)) {
|
||||
memmove(combination_buffer, combination_buffer + output_bytes_written / sizeof(int16_t),
|
||||
combination_buffer_length);
|
||||
}
|
||||
} else {
|
||||
size_t media_available = this_mixer->media_ring_buffer_->available();
|
||||
size_t announcement_available = this_mixer->announcement_ring_buffer_->available();
|
||||
|
||||
if (media_available * transfer_media + announcement_available > 0) {
|
||||
size_t bytes_to_read = OUTPUT_BUFFER_SAMPLES * sizeof(int16_t);
|
||||
|
||||
if (media_available * transfer_media > 0) {
|
||||
bytes_to_read = std::min(bytes_to_read, media_available);
|
||||
}
|
||||
|
||||
if (announcement_available > 0) {
|
||||
bytes_to_read = std::min(bytes_to_read, announcement_available);
|
||||
}
|
||||
|
||||
if (bytes_to_read > 0) {
|
||||
size_t media_bytes_read = 0;
|
||||
if (media_available * transfer_media > 0) {
|
||||
media_bytes_read = this_mixer->media_ring_buffer_->read((void *) media_buffer, bytes_to_read, 0);
|
||||
if (media_bytes_read > 0) {
|
||||
size_t samples_read = media_bytes_read / sizeof(int16_t);
|
||||
if (ducking_transition_samples_remaining > 0) {
|
||||
// Ducking level is still transitioning
|
||||
|
||||
size_t samples_left = ducking_transition_samples_remaining;
|
||||
|
||||
// There may be more than one step worth of samples to duck in the buffers, so manage positions
|
||||
int16_t *current_media_buffer = media_buffer;
|
||||
|
||||
size_t samples_left_in_step = samples_left % samples_per_ducking_step;
|
||||
if (samples_left_in_step == 0) {
|
||||
// Start of a new ducking step
|
||||
|
||||
current_ducking_db_reduction += db_change_per_ducking_step;
|
||||
samples_left_in_step = samples_per_ducking_step;
|
||||
}
|
||||
size_t samples_left_to_duck = std::min(samples_left_in_step, samples_read);
|
||||
|
||||
size_t total_samples_ducked = 0;
|
||||
|
||||
while (samples_left_to_duck > 0) {
|
||||
// Ensure we only point to valid index in the Q15 scaling factor table
|
||||
uint8_t safe_db_reduction_index =
|
||||
clamp<uint8_t>(current_ducking_db_reduction, 0, decibel_reduction_table.size() - 1);
|
||||
|
||||
int16_t q15_scale_factor = decibel_reduction_table[safe_db_reduction_index];
|
||||
this_mixer->scale_audio_samples_(current_media_buffer, current_media_buffer, q15_scale_factor,
|
||||
samples_left_to_duck);
|
||||
|
||||
current_media_buffer += samples_left_to_duck;
|
||||
|
||||
samples_read -= samples_left_to_duck;
|
||||
samples_left -= samples_left_to_duck;
|
||||
|
||||
total_samples_ducked += samples_left_to_duck;
|
||||
|
||||
samples_left_in_step = samples_left % samples_per_ducking_step;
|
||||
if (samples_left_in_step == 0) {
|
||||
// Start of a new step
|
||||
|
||||
current_ducking_db_reduction += db_change_per_ducking_step;
|
||||
samples_left_in_step = samples_per_ducking_step;
|
||||
}
|
||||
samples_left_to_duck = std::min(samples_left_in_step, samples_read);
|
||||
}
|
||||
} else if (target_ducking_db_reduction > 0) {
|
||||
// We still need to apply a ducking scaling, but we are done transitioning
|
||||
|
||||
uint8_t safe_db_reduction_index =
|
||||
clamp<uint8_t>(target_ducking_db_reduction, 0, decibel_reduction_table.size() - 1);
|
||||
|
||||
int16_t q15_scale_factor = decibel_reduction_table[safe_db_reduction_index];
|
||||
this_mixer->scale_audio_samples_(media_buffer, media_buffer, q15_scale_factor, samples_read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t announcement_bytes_read = 0;
|
||||
if (announcement_available > 0) {
|
||||
announcement_bytes_read =
|
||||
this_mixer->announcement_ring_buffer_->read((void *) announcement_buffer, bytes_to_read, 0);
|
||||
}
|
||||
|
||||
if ((media_bytes_read > 0) && (announcement_bytes_read > 0)) {
|
||||
// We have both a media and an announcement stream, so mix them together
|
||||
|
||||
size_t samples_read = bytes_to_read / sizeof(int16_t);
|
||||
|
||||
this_mixer->mix_audio_samples_without_clipping_(media_buffer, announcement_buffer, combination_buffer,
|
||||
samples_read);
|
||||
|
||||
combination_buffer_length = samples_read * sizeof(int16_t);
|
||||
} else if (media_bytes_read > 0) {
|
||||
memcpy(combination_buffer, media_buffer, media_bytes_read);
|
||||
combination_buffer_length = media_bytes_read;
|
||||
} else if (announcement_bytes_read > 0) {
|
||||
memcpy(combination_buffer, announcement_buffer, announcement_bytes_read);
|
||||
combination_buffer_length = announcement_bytes_read;
|
||||
}
|
||||
|
||||
size_t samples_written = combination_buffer_length / sizeof(int16_t);
|
||||
if (ducking_transition_samples_remaining > 0) {
|
||||
ducking_transition_samples_remaining -= std::min(samples_written, ducking_transition_samples_remaining);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No audio data available in either buffer
|
||||
|
||||
delay(TASK_DELAY_MS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
event.type = EventType::STOPPING;
|
||||
xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY);
|
||||
|
||||
this_mixer->reset_ring_buffers_();
|
||||
allocator.deallocate(media_buffer, OUTPUT_BUFFER_SAMPLES);
|
||||
allocator.deallocate(announcement_buffer, OUTPUT_BUFFER_SAMPLES);
|
||||
allocator.deallocate(combination_buffer, OUTPUT_BUFFER_SAMPLES);
|
||||
|
||||
event.type = EventType::STOPPED;
|
||||
xQueueSend(this_mixer->event_queue_, &event, portMAX_DELAY);
|
||||
|
||||
while (true) {
|
||||
delay(TASK_DELAY_MS);
|
||||
}
|
||||
}
|
||||
|
||||
esp_err_t AudioMixer::allocate_buffers_() {
|
||||
if (this->media_ring_buffer_ == nullptr)
|
||||
this->media_ring_buffer_ = RingBuffer::create(INPUT_RING_BUFFER_SAMPLES * sizeof(int16_t));
|
||||
|
||||
if (this->announcement_ring_buffer_ == nullptr)
|
||||
this->announcement_ring_buffer_ = RingBuffer::create(INPUT_RING_BUFFER_SAMPLES * sizeof(int16_t));
|
||||
|
||||
if ((this->announcement_ring_buffer_ == nullptr) || (this->media_ring_buffer_ == nullptr)) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
if (this->stack_buffer_ == nullptr)
|
||||
this->stack_buffer_ = (StackType_t *) malloc(TASK_STACK_SIZE);
|
||||
|
||||
if (this->stack_buffer_ == nullptr) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
if (this->event_queue_ == nullptr)
|
||||
this->event_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(TaskEvent));
|
||||
|
||||
if (this->command_queue_ == nullptr)
|
||||
this->command_queue_ = xQueueCreate(QUEUE_COUNT, sizeof(CommandEvent));
|
||||
|
||||
if ((this->event_queue_ == nullptr) || (this->command_queue_ == nullptr)) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
void AudioMixer::reset_ring_buffers_() {
|
||||
this->media_ring_buffer_->reset();
|
||||
this->announcement_ring_buffer_->reset();
|
||||
}
|
||||
|
||||
void AudioMixer::mix_audio_samples_without_clipping_(int16_t *media_buffer, int16_t *announcement_buffer,
|
||||
int16_t *combination_buffer, size_t samples_to_mix) {
|
||||
// We first test adding the two clips samples together and check for any clipping
|
||||
// We want the announcement volume to be consistent, regardless if media is playing or not
|
||||
// If there is clipping, we determine what factor we need to multiply that media sample by to avoid it
|
||||
// We take the smallest factor necessary for all the samples so the media volume is consistent on this batch
|
||||
// of samples
|
||||
// Note: This may not be the best approach. Adding 2 audio samples together makes both sound louder, even if
|
||||
// we are not clipping. As a result, the mixed announcement will sound louder (by around 3dB if the audio
|
||||
// streams are independent?) than if it were by itself.
|
||||
|
||||
int16_t q15_scaling_factor = MAX_AUDIO_SAMPLE_VALUE;
|
||||
|
||||
for (int i = 0; i < samples_to_mix; ++i) {
|
||||
int32_t added_sample = static_cast<int32_t>(media_buffer[i]) + static_cast<int32_t>(announcement_buffer[i]);
|
||||
|
||||
if ((added_sample > MAX_AUDIO_SAMPLE_VALUE) || (added_sample < MIN_AUDIO_SAMPLE_VALUE)) {
|
||||
// The largest magnitude the media sample can be to avoid clipping (converted to Q30 fixed point)
|
||||
int32_t q30_media_sample_safe_max =
|
||||
static_cast<int32_t>(std::abs(MIN_AUDIO_SAMPLE_VALUE) - std::abs(announcement_buffer[i])) << 15;
|
||||
|
||||
// Actual media sample value (Q15 number stored in an int32 for future division)
|
||||
int32_t media_sample_value = abs(media_buffer[i]);
|
||||
|
||||
// Calculation to perform the Q15 division for media_sample_safe_max/media_sample_value
|
||||
// Reference: https://sestevenson.wordpress.com/2010/09/20/fixed-point-division-2/ (accessed August 15,
|
||||
// 2024)
|
||||
int16_t necessary_q15_factor = static_cast<int16_t>(q30_media_sample_safe_max / media_sample_value);
|
||||
// Take the minimum scaling factor (the smaller the factor, the more it needs to be scaled down)
|
||||
q15_scaling_factor = std::min(necessary_q15_factor, q15_scaling_factor);
|
||||
} else {
|
||||
// Store the combined samples in the combination buffer. If we do not need to scale, then the samples are already
|
||||
// mixed.
|
||||
combination_buffer[i] = added_sample;
|
||||
}
|
||||
}
|
||||
|
||||
if (q15_scaling_factor < MAX_AUDIO_SAMPLE_VALUE) {
|
||||
// Need to scale to avoid clipping
|
||||
|
||||
this->scale_audio_samples_(media_buffer, media_buffer, q15_scaling_factor, samples_to_mix);
|
||||
|
||||
// Mix both stream by adding them together with no bitshift
|
||||
// The dsps_add functions have the following inputs:
|
||||
// (buffer 1, buffer 2, output buffer, number of samples, buffer 1 step, buffer 2 step, output, buffer step,
|
||||
// bitshift)
|
||||
dsps_add_s16(media_buffer, announcement_buffer, combination_buffer, samples_to_mix, 1, 1, 1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMixer::scale_audio_samples_(int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
|
||||
size_t samples_to_scale) {
|
||||
// Scale the audio samples and store them in the output buffer
|
||||
dsps_mulc_s16(audio_samples, output_buffer, samples_to_scale, scale_factor, 1, 1);
|
||||
}
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
#endif
|
159
esphome/components/nabu/audio_mixer.h
Normal file
159
esphome/components/nabu/audio_mixer.h
Normal file
|
@ -0,0 +1,159 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "esphome/components/media_player/media_player.h"
|
||||
#include "esphome/components/speaker/speaker.h"
|
||||
|
||||
#include "esphome/core/hal.h"
|
||||
#include "esphome/core/helpers.h"
|
||||
#include "esphome/core/ring_buffer.h"
|
||||
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/queue.h>
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
// Mixes two incoming audio streams together
|
||||
// - The media stream intended for music playback
|
||||
// - Able to duck (made quieter)
|
||||
// - Able to pause
|
||||
// - The announcement stream is intended for TTS reponses or various beeps/sound effects
|
||||
// - Unable to duck
|
||||
// - Unable to pause
|
||||
// - Each stream has a corresponding input ring buffer. Retrieved via the `get_media_ring_buffer` and
|
||||
// `get_announcement_ring_buffer` functions
|
||||
// - The mixed audio is sent to the configured speaker component.
|
||||
// - The mixer runs as a FreeRTOS task
|
||||
// - The task reports its state using the TaskEvent queue. Regularly call the `read_event` function to obtain the
|
||||
// current state
|
||||
// - Commands are sent to the task using a the CommandEvent queue. Use the `send_command` function to do so.
|
||||
// - Use the `start` function to initiate. The `stop` function deletes the task, but be sure to send a STOP command
|
||||
// first to avoid memory leaks.
|
||||
|
||||
enum class EventType : uint8_t {
|
||||
STARTING = 0,
|
||||
STARTED,
|
||||
RUNNING,
|
||||
IDLE,
|
||||
STOPPING,
|
||||
STOPPED,
|
||||
WARNING = 255,
|
||||
};
|
||||
|
||||
// Used for reporting the state of the mixer task
|
||||
struct TaskEvent {
|
||||
EventType type;
|
||||
esp_err_t err;
|
||||
};
|
||||
|
||||
enum class CommandEventType : uint8_t {
|
||||
STOP, // Stop mixing to prepare for stopping the mixing task
|
||||
DUCK, // Duck the media audio
|
||||
PAUSE_MEDIA, // Pauses the media stream
|
||||
RESUME_MEDIA, // Resumes the media stream
|
||||
CLEAR_MEDIA, // Resets the media ring buffer
|
||||
CLEAR_ANNOUNCEMENT, // Resets the announcement ring buffer
|
||||
};
|
||||
|
||||
// Used to send commands to the mixer task
|
||||
struct CommandEvent {
|
||||
CommandEventType command;
|
||||
uint8_t decibel_reduction;
|
||||
size_t transition_samples = 0;
|
||||
};
|
||||
|
||||
// Gives the Q15 fixed point scaling factor to reduce by 0 dB, 1dB, ..., 50 dB
|
||||
// dB to PCM scaling factor formula: floating_point_scale_factor = 2^(-db/6.014)
|
||||
// float to Q15 fixed point formula: q15_scale_factor = floating_point_scale_factor * 2^(15)
|
||||
static const std::vector<int16_t> decibel_reduction_table = {
|
||||
32767, 29201, 26022, 23189, 20665, 18415, 16410, 14624, 13032, 11613, 10349, 9222, 8218, 7324, 6527, 5816, 5183,
|
||||
4619, 4116, 3668, 3269, 2913, 2596, 2313, 2061, 1837, 1637, 1459, 1300, 1158, 1032, 920, 820, 731,
|
||||
651, 580, 517, 461, 411, 366, 326, 291, 259, 231, 206, 183, 163, 146, 130, 116, 103};
|
||||
|
||||
class AudioMixer {
|
||||
public:
|
||||
/// @brief Sends a CommandEvent to the command queue
|
||||
/// @param command Pointer to CommandEvent object to be sent
|
||||
/// @param ticks_to_wait The number of FreeRTOS ticks to wait for an event to appear on the queue. Defaults to 0.
|
||||
/// @return pdTRUE if successful, pdFALSE otherwises
|
||||
BaseType_t send_command(CommandEvent *command, TickType_t ticks_to_wait = portMAX_DELAY) {
|
||||
return xQueueSend(this->command_queue_, command, ticks_to_wait);
|
||||
}
|
||||
|
||||
/// @brief Reads a TaskEvent from the event queue indicating its current status
|
||||
/// @param event Pointer to TaskEvent object to store the event in
|
||||
/// @param ticks_to_wait The number of FreeRTOS ticks to wait for an event to appear on the queue. Defaults to 0.
|
||||
/// @return pdTRUE if successful, pdFALSE otherwise
|
||||
BaseType_t read_event(TaskEvent *event, TickType_t ticks_to_wait = 0) {
|
||||
return xQueueReceive(this->event_queue_, event, ticks_to_wait);
|
||||
}
|
||||
|
||||
/// @brief Starts the mixer task
|
||||
/// @param speaker Pointer to Speaker component
|
||||
/// @param task_name FreeRTOS task name
|
||||
/// @param priority FreeRTOS task priority. Defaults to 1
|
||||
/// @return ESP_OK if successful, and error otherwise
|
||||
esp_err_t start(speaker::Speaker *speaker, const std::string &task_name, UBaseType_t priority = 1);
|
||||
|
||||
/// @brief Stops the mixer task and clears the queues
|
||||
void stop();
|
||||
|
||||
/// @brief Retrieves the media stream's ring buffer pointer
|
||||
/// @return pointer to media ring buffer
|
||||
RingBuffer *get_media_ring_buffer() { return this->media_ring_buffer_.get(); }
|
||||
|
||||
/// @brief Retrieves the announcement stream's ring buffer pointer
|
||||
/// @return pointer to announcement ring buffer
|
||||
RingBuffer *get_announcement_ring_buffer() { return this->announcement_ring_buffer_.get(); }
|
||||
|
||||
/// @brief Suspends the mixer task
|
||||
void suspend_task();
|
||||
/// @brief Resumes the mixer task
|
||||
void resume_task();
|
||||
|
||||
protected:
|
||||
/// @brief Allocates the ring buffers, task stack, and queues
|
||||
/// @return ESP_OK if successful or an error otherwise
|
||||
esp_err_t allocate_buffers_();
|
||||
|
||||
/// @brief Resets the media and anouncement ring buffers
|
||||
void reset_ring_buffers_();
|
||||
|
||||
/// @brief Mixes the media and announcement samples. If the resulting audio clips, the media samples are first scaled.
|
||||
/// @param media_buffer buffer for media samples
|
||||
/// @param announcement_buffer buffer for announcement samples
|
||||
/// @param combination_buffer buffer for the mixed samples
|
||||
/// @param samples_to_mix number of samples in the media and annoucnement buffers to mix together
|
||||
void mix_audio_samples_without_clipping_(int16_t *media_buffer, int16_t *announcement_buffer,
|
||||
int16_t *combination_buffer, size_t samples_to_mix);
|
||||
|
||||
/// @brief Scales audio samples. Scales in place when audio_samples == output_buffer.
|
||||
/// @param audio_samples PCM int16 audio samples
|
||||
/// @param output_buffer Buffer to store the scaled samples
|
||||
/// @param scale_factor Q15 fixed point scaling factor
|
||||
/// @param samples_to_scale Number of samples to scale
|
||||
void scale_audio_samples_(int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
|
||||
size_t samples_to_scale);
|
||||
|
||||
static void audio_mixer_task_(void *params);
|
||||
TaskHandle_t task_handle_{nullptr};
|
||||
StaticTask_t task_stack_;
|
||||
StackType_t *stack_buffer_{nullptr};
|
||||
|
||||
// Reports events from the mixer task
|
||||
QueueHandle_t event_queue_;
|
||||
|
||||
// Stores commands to send the mixer task
|
||||
QueueHandle_t command_queue_;
|
||||
|
||||
speaker::Speaker *speaker_{nullptr};
|
||||
|
||||
std::unique_ptr<RingBuffer> media_ring_buffer_;
|
||||
std::unique_ptr<RingBuffer> announcement_ring_buffer_;
|
||||
};
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
540
esphome/components/nabu/audio_pipeline.cpp
Normal file
540
esphome/components/nabu/audio_pipeline.cpp
Normal file
|
@ -0,0 +1,540 @@
|
|||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "audio_pipeline.h"
|
||||
|
||||
#include "esphome/core/helpers.h"
|
||||
#include "esphome/core/log.h"
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
static const size_t FILE_BUFFER_SIZE = 32 * 1024;
|
||||
static const size_t FILE_RING_BUFFER_SIZE = 64 * 1024;
|
||||
static const size_t BUFFER_SIZE_SAMPLES = 32768;
|
||||
static const size_t BUFFER_SIZE_BYTES = BUFFER_SIZE_SAMPLES * sizeof(int16_t);
|
||||
|
||||
static const uint32_t READER_TASK_STACK_SIZE = 5 * 1024;
|
||||
static const uint32_t DECODER_TASK_STACK_SIZE = 3 * 1024;
|
||||
static const uint32_t RESAMPLER_TASK_STACK_SIZE = 3 * 1024;
|
||||
|
||||
static const size_t INFO_ERROR_QUEUE_COUNT = 5;
|
||||
|
||||
static const char *const TAG = "nabu_media_player.pipeline";
|
||||
|
||||
enum EventGroupBits : uint32_t {
|
||||
// The stop() function clears all unfinished bits
|
||||
// MESSAGE_* bits are only set by their respective tasks
|
||||
|
||||
// Stops all activity in the pipeline elements and set by stop() or by each task
|
||||
PIPELINE_COMMAND_STOP = (1 << 0),
|
||||
|
||||
// Read audio from an HTTP source; cleared by reader task and set by start(uri,...)
|
||||
READER_COMMAND_INIT_HTTP = (1 << 4),
|
||||
// Read audio from an audio file from the flash; cleared by reader task and set by start(media_file,...)
|
||||
READER_COMMAND_INIT_FILE = (1 << 5),
|
||||
|
||||
// Audio file type is read after checking it is supported; cleared by decoder task
|
||||
READER_MESSAGE_LOADED_MEDIA_TYPE = (1 << 6),
|
||||
// Reader is done (either through a failure or just end of the stream); cleared by reader task
|
||||
READER_MESSAGE_FINISHED = (1 << 7),
|
||||
// Error reading the file; cleared by get_state()
|
||||
READER_MESSAGE_ERROR = (1 << 8),
|
||||
|
||||
// Decoder has determined the stream information; cleared by resampler
|
||||
DECODER_MESSAGE_LOADED_STREAM_INFO = (1 << 11),
|
||||
// Decoder is done (either through a faiilure or the end of the stream); cleared by decoder task
|
||||
DECODER_MESSAGE_FINISHED = (1 << 12),
|
||||
// Error decoding the file; cleared by get_state() by decoder task
|
||||
DECODER_MESSAGE_ERROR = (1 << 13),
|
||||
|
||||
// Resampler is done (either through a failure or the end of the stream); cleared by resampler task
|
||||
RESAMPLER_MESSAGE_FINISHED = (1 << 17),
|
||||
// Error resampling the file; cleared by get_state()
|
||||
RESAMPLER_MESSAGE_ERROR = (1 << 18),
|
||||
|
||||
// Cleared by respective tasks
|
||||
FINISHED_BITS = READER_MESSAGE_FINISHED | DECODER_MESSAGE_FINISHED | RESAMPLER_MESSAGE_FINISHED,
|
||||
UNFINISHED_BITS = ~(FINISHED_BITS | 0xff000000), // Only 24 bits are valid for the event group, so make sure first 8
|
||||
// bits of uint32 are not set; cleared by stop()
|
||||
};
|
||||
|
||||
AudioPipeline::AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type) {
|
||||
this->mixer_ = mixer;
|
||||
this->pipeline_type_ = pipeline_type;
|
||||
}
|
||||
|
||||
esp_err_t AudioPipeline::start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name,
|
||||
UBaseType_t priority) {
|
||||
esp_err_t err = this->common_start_(target_sample_rate, task_name, priority);
|
||||
|
||||
if (err == ESP_OK) {
|
||||
this->current_uri_ = uri;
|
||||
xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_HTTP);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
esp_err_t AudioPipeline::start(MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name,
|
||||
UBaseType_t priority) {
|
||||
esp_err_t err = this->common_start_(target_sample_rate, task_name, priority);
|
||||
|
||||
if (err == ESP_OK) {
|
||||
this->current_media_file_ = media_file;
|
||||
xEventGroupSetBits(this->event_group_, READER_COMMAND_INIT_FILE);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
esp_err_t AudioPipeline::allocate_buffers_() {
|
||||
if (this->raw_file_ring_buffer_ == nullptr)
|
||||
this->raw_file_ring_buffer_ = RingBuffer::create(FILE_RING_BUFFER_SIZE);
|
||||
|
||||
if (this->decoded_ring_buffer_ == nullptr)
|
||||
this->decoded_ring_buffer_ = RingBuffer::create(BUFFER_SIZE_BYTES);
|
||||
|
||||
if ((this->raw_file_ring_buffer_ == nullptr) || (this->decoded_ring_buffer_ == nullptr)) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
if (this->read_task_stack_buffer_ == nullptr)
|
||||
this->read_task_stack_buffer_ = (StackType_t *) malloc(READER_TASK_STACK_SIZE);
|
||||
|
||||
if (this->decode_task_stack_buffer_ == nullptr)
|
||||
this->decode_task_stack_buffer_ = (StackType_t *) malloc(DECODER_TASK_STACK_SIZE);
|
||||
|
||||
if (this->resample_task_stack_buffer_ == nullptr)
|
||||
this->resample_task_stack_buffer_ = (StackType_t *) malloc(RESAMPLER_TASK_STACK_SIZE);
|
||||
|
||||
if ((this->read_task_stack_buffer_ == nullptr) || (this->decode_task_stack_buffer_ == nullptr) ||
|
||||
(this->resample_task_stack_buffer_ == nullptr)) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
if (this->event_group_ == nullptr)
|
||||
this->event_group_ = xEventGroupCreate();
|
||||
|
||||
if (this->event_group_ == nullptr) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
if (this->info_error_queue_ == nullptr)
|
||||
this->info_error_queue_ = xQueueCreate(INFO_ERROR_QUEUE_COUNT, sizeof(InfoErrorEvent));
|
||||
|
||||
if (this->info_error_queue_ == nullptr)
|
||||
return ESP_ERR_NO_MEM;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t AudioPipeline::common_start_(uint32_t target_sample_rate, const std::string &task_name,
|
||||
UBaseType_t priority) {
|
||||
esp_err_t err = this->allocate_buffers_();
|
||||
if (err != ESP_OK) {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (this->read_task_handle_ == nullptr) {
|
||||
this->read_task_handle_ =
|
||||
xTaskCreateStatic(AudioPipeline::read_task_, (task_name + "_read").c_str(), READER_TASK_STACK_SIZE,
|
||||
(void *) this, priority, this->read_task_stack_buffer_, &this->read_task_stack_);
|
||||
}
|
||||
if (this->decode_task_handle_ == nullptr) {
|
||||
this->decode_task_handle_ =
|
||||
xTaskCreateStatic(AudioPipeline::decode_task_, (task_name + "_decode").c_str(), DECODER_TASK_STACK_SIZE,
|
||||
(void *) this, priority, this->decode_task_stack_buffer_, &this->decode_task_stack_);
|
||||
}
|
||||
if (this->resample_task_handle_ == nullptr) {
|
||||
this->resample_task_handle_ =
|
||||
xTaskCreateStatic(AudioPipeline::resample_task_, (task_name + "_resample").c_str(), RESAMPLER_TASK_STACK_SIZE,
|
||||
(void *) this, priority, this->resample_task_stack_buffer_, &this->resample_task_stack_);
|
||||
}
|
||||
|
||||
if ((this->read_task_handle_ == nullptr) || (this->decode_task_handle_ == nullptr) ||
|
||||
(this->resample_task_handle_ == nullptr)) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
this->target_sample_rate_ = target_sample_rate;
|
||||
|
||||
return this->stop();
|
||||
}
|
||||
|
||||
AudioPipelineState AudioPipeline::get_state() {
|
||||
InfoErrorEvent event;
|
||||
if (this->info_error_queue_ != nullptr) {
|
||||
while (xQueueReceive(this->info_error_queue_, &event, 0)) {
|
||||
switch (event.source) {
|
||||
case InfoErrorSource::READER:
|
||||
if (event.err.has_value()) {
|
||||
ESP_LOGE(TAG, "Media reader encountered an error: %s", esp_err_to_name(event.err.value()));
|
||||
} else if (event.file_type.has_value()) {
|
||||
ESP_LOGD(TAG, "Reading %s file type", media_player_file_type_to_string(event.file_type.value()));
|
||||
}
|
||||
|
||||
break;
|
||||
case InfoErrorSource::DECODER:
|
||||
if (event.err.has_value()) {
|
||||
ESP_LOGE(TAG, "Decoder encountered an error: %s", esp_err_to_name(event.err.value()));
|
||||
}
|
||||
|
||||
if (event.audio_stream_info.has_value()) {
|
||||
ESP_LOGD(TAG, "Decoded audio has %d channels, %" PRId32 " Hz sample rate, and %d bits per sample",
|
||||
event.audio_stream_info.value().channels, event.audio_stream_info.value().sample_rate,
|
||||
event.audio_stream_info.value().bits_per_sample);
|
||||
}
|
||||
|
||||
if (event.decoding_err.has_value()) {
|
||||
switch (event.decoding_err.value()) {
|
||||
case DecodingError::FAILED_HEADER:
|
||||
ESP_LOGE(TAG, "Failed to parse the file's header.");
|
||||
break;
|
||||
case DecodingError::INCOMPATIBLE_BITS_PER_SAMPLE:
|
||||
ESP_LOGE(TAG, "Incompatible bits per sample. Only 16 bits per sample is supported");
|
||||
break;
|
||||
case DecodingError::INCOMPATIBLE_CHANNELS:
|
||||
ESP_LOGE(TAG, "Incompatible number of channels. Only 1 or 2 channel audio is supported.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case InfoErrorSource::RESAMPLER:
|
||||
if (event.err.has_value()) {
|
||||
ESP_LOGE(TAG, "Resampler encountered an error: %s", esp_err_to_name(event.err.has_value()));
|
||||
} else if (event.resample_info.has_value()) {
|
||||
if (event.resample_info.value().resample) {
|
||||
ESP_LOGD(TAG, "Converting the audio sample rate");
|
||||
}
|
||||
if (event.resample_info.value().mono_to_stereo) {
|
||||
ESP_LOGD(TAG, "Converting mono channel audio to stereo channel audio");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EventBits_t event_bits = xEventGroupGetBits(this->event_group_);
|
||||
if (!this->read_task_handle_ && !this->decode_task_handle_ && !this->resample_task_handle_) {
|
||||
return AudioPipelineState::STOPPED;
|
||||
}
|
||||
|
||||
if ((event_bits & READER_MESSAGE_ERROR)) {
|
||||
xEventGroupClearBits(this->event_group_, READER_MESSAGE_ERROR);
|
||||
return AudioPipelineState::ERROR_READING;
|
||||
}
|
||||
|
||||
if ((event_bits & DECODER_MESSAGE_ERROR)) {
|
||||
xEventGroupClearBits(this->event_group_, DECODER_MESSAGE_ERROR);
|
||||
return AudioPipelineState::ERROR_DECODING;
|
||||
}
|
||||
|
||||
if ((event_bits & RESAMPLER_MESSAGE_ERROR)) {
|
||||
xEventGroupClearBits(this->event_group_, RESAMPLER_MESSAGE_ERROR);
|
||||
return AudioPipelineState::ERROR_RESAMPLING;
|
||||
}
|
||||
|
||||
if ((event_bits & READER_MESSAGE_FINISHED) && (event_bits & DECODER_MESSAGE_FINISHED) &&
|
||||
(event_bits & RESAMPLER_MESSAGE_FINISHED)) {
|
||||
return AudioPipelineState::STOPPED;
|
||||
}
|
||||
|
||||
return AudioPipelineState::PLAYING;
|
||||
}
|
||||
|
||||
esp_err_t AudioPipeline::stop() {
|
||||
xEventGroupSetBits(this->event_group_, PIPELINE_COMMAND_STOP);
|
||||
|
||||
uint32_t event_group_bits = xEventGroupWaitBits(this->event_group_,
|
||||
FINISHED_BITS, // Bit message to read
|
||||
pdFALSE, // Clear the bits on exit
|
||||
pdTRUE, // Wait for all the bits,
|
||||
pdMS_TO_TICKS(300)); // Duration to block/wait
|
||||
|
||||
if (!(event_group_bits & READER_MESSAGE_FINISHED)) {
|
||||
// Reader failed to stop
|
||||
xEventGroupSetBits(this->event_group_, EventGroupBits::READER_MESSAGE_ERROR);
|
||||
}
|
||||
if (!(event_group_bits & DECODER_MESSAGE_FINISHED)) {
|
||||
// Ddecoder failed to stop
|
||||
xEventGroupSetBits(this->event_group_, EventGroupBits::DECODER_MESSAGE_ERROR);
|
||||
}
|
||||
if (!(event_group_bits & RESAMPLER_MESSAGE_FINISHED)) {
|
||||
// Resampler failed to stop
|
||||
xEventGroupSetBits(this->event_group_, EventGroupBits::RESAMPLER_MESSAGE_ERROR);
|
||||
}
|
||||
|
||||
if ((event_group_bits & FINISHED_BITS) != FINISHED_BITS) {
|
||||
// Not all bits were set, so it timed out
|
||||
return ESP_ERR_TIMEOUT;
|
||||
}
|
||||
|
||||
// Clear the ring buffer in the mixer; avoids playing incorrect audio when starting a new file while paused
|
||||
CommandEvent command_event;
|
||||
if (this->pipeline_type_ == AudioPipelineType::MEDIA) {
|
||||
command_event.command = CommandEventType::CLEAR_MEDIA;
|
||||
} else {
|
||||
command_event.command = CommandEventType::CLEAR_ANNOUNCEMENT;
|
||||
}
|
||||
this->mixer_->send_command(&command_event);
|
||||
|
||||
xEventGroupClearBits(this->event_group_, UNFINISHED_BITS);
|
||||
this->reset_ring_buffers();
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
void AudioPipeline::reset_ring_buffers() {
|
||||
this->raw_file_ring_buffer_->reset();
|
||||
this->decoded_ring_buffer_->reset();
|
||||
}
|
||||
|
||||
void AudioPipeline::suspend_tasks() {
|
||||
if (this->read_task_handle_ != nullptr) {
|
||||
vTaskSuspend(this->read_task_handle_);
|
||||
}
|
||||
if (this->decode_task_handle_ != nullptr) {
|
||||
vTaskSuspend(this->decode_task_handle_);
|
||||
}
|
||||
if (this->resample_task_handle_ != nullptr) {
|
||||
vTaskSuspend(this->resample_task_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioPipeline::resume_tasks() {
|
||||
if (this->read_task_handle_ != nullptr) {
|
||||
vTaskResume(this->read_task_handle_);
|
||||
}
|
||||
if (this->decode_task_handle_ != nullptr) {
|
||||
vTaskResume(this->decode_task_handle_);
|
||||
}
|
||||
if (this->resample_task_handle_ != nullptr) {
|
||||
vTaskResume(this->resample_task_handle_);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioPipeline::read_task_(void *params) {
|
||||
AudioPipeline *this_pipeline = (AudioPipeline *) params;
|
||||
|
||||
while (true) {
|
||||
xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_FINISHED);
|
||||
|
||||
// Wait until the pipeline notifies us the source of the media file
|
||||
EventBits_t event_bits =
|
||||
xEventGroupWaitBits(this_pipeline->event_group_,
|
||||
READER_COMMAND_INIT_FILE | READER_COMMAND_INIT_HTTP, // Bit message to read
|
||||
pdTRUE, // Clear the bit on exit
|
||||
pdFALSE, // Wait for all the bits,
|
||||
portMAX_DELAY); // Block indefinitely until bit is set
|
||||
|
||||
xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_FINISHED);
|
||||
|
||||
{
|
||||
InfoErrorEvent event;
|
||||
event.source = InfoErrorSource::READER;
|
||||
esp_err_t err = ESP_OK;
|
||||
|
||||
AudioReader reader = AudioReader(this_pipeline->raw_file_ring_buffer_.get(), FILE_BUFFER_SIZE);
|
||||
|
||||
if (event_bits & READER_COMMAND_INIT_FILE) {
|
||||
err = reader.start(this_pipeline->current_media_file_, this_pipeline->current_media_file_type_);
|
||||
} else {
|
||||
err = reader.start(this_pipeline->current_uri_, this_pipeline->current_media_file_type_);
|
||||
}
|
||||
if (err != ESP_OK) {
|
||||
// Send specific error message
|
||||
event.err = err;
|
||||
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
|
||||
|
||||
// Setting up the reader failed, stop the pipeline
|
||||
xEventGroupSetBits(this_pipeline->event_group_,
|
||||
EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
|
||||
} else {
|
||||
// Send the file type to the pipeline
|
||||
event.file_type = this_pipeline->current_media_file_type_;
|
||||
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
|
||||
|
||||
// Inform the decoder that the media type is available
|
||||
xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
event_bits = xEventGroupGetBits(this_pipeline->event_group_);
|
||||
|
||||
if (event_bits & PIPELINE_COMMAND_STOP) {
|
||||
break;
|
||||
}
|
||||
|
||||
AudioReaderState reader_state = reader.read();
|
||||
|
||||
if (reader_state == AudioReaderState::FINISHED) {
|
||||
break;
|
||||
} else if (reader_state == AudioReaderState::FAILED) {
|
||||
xEventGroupSetBits(this_pipeline->event_group_,
|
||||
EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioPipeline::decode_task_(void *params) {
|
||||
AudioPipeline *this_pipeline = (AudioPipeline *) params;
|
||||
|
||||
while (true) {
|
||||
xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED);
|
||||
|
||||
// Wait until the reader notifies us that the media type is available
|
||||
EventBits_t event_bits = xEventGroupWaitBits(this_pipeline->event_group_,
|
||||
READER_MESSAGE_LOADED_MEDIA_TYPE, // Bit message to read
|
||||
pdTRUE, // Clear the bit on exit
|
||||
pdFALSE, // Wait for all the bits,
|
||||
portMAX_DELAY); // Block indefinitely until bit is set
|
||||
|
||||
xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED);
|
||||
|
||||
{
|
||||
InfoErrorEvent event;
|
||||
event.source = InfoErrorSource::DECODER;
|
||||
|
||||
std::unique_ptr<AudioDecoder> decoder = make_unique<AudioDecoder>(
|
||||
this_pipeline->raw_file_ring_buffer_.get(), this_pipeline->decoded_ring_buffer_.get(), FILE_BUFFER_SIZE);
|
||||
esp_err_t err = decoder->start(this_pipeline->current_media_file_type_);
|
||||
|
||||
if (err != ESP_OK) {
|
||||
// Send specific error message
|
||||
event.err = err;
|
||||
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
|
||||
|
||||
// Setting up the decoder failed, stop the pipeline
|
||||
xEventGroupSetBits(this_pipeline->event_group_,
|
||||
EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
|
||||
}
|
||||
|
||||
bool has_stream_info = false;
|
||||
|
||||
while (true) {
|
||||
event_bits = xEventGroupGetBits(this_pipeline->event_group_);
|
||||
|
||||
if (event_bits & PIPELINE_COMMAND_STOP) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Stop gracefully if the reader has finished
|
||||
AudioDecoderState decoder_state = decoder->decode(event_bits & READER_MESSAGE_FINISHED);
|
||||
|
||||
if (decoder_state == AudioDecoderState::FINISHED) {
|
||||
break;
|
||||
} else if (decoder_state == AudioDecoderState::FAILED) {
|
||||
if (!has_stream_info) {
|
||||
event.decoding_err = DecodingError::FAILED_HEADER;
|
||||
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
|
||||
}
|
||||
xEventGroupSetBits(this_pipeline->event_group_,
|
||||
EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!has_stream_info && decoder->get_audio_stream_info().has_value()) {
|
||||
has_stream_info = true;
|
||||
|
||||
this_pipeline->current_audio_stream_info_ = decoder->get_audio_stream_info().value();
|
||||
|
||||
// Send the stream information to the pipeline
|
||||
event.audio_stream_info = this_pipeline->current_audio_stream_info_;
|
||||
|
||||
if (this_pipeline->current_audio_stream_info_.bits_per_sample != 16) {
|
||||
// Error state, incompatible bits per sample
|
||||
event.decoding_err = DecodingError::INCOMPATIBLE_BITS_PER_SAMPLE;
|
||||
xEventGroupSetBits(this_pipeline->event_group_,
|
||||
EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
|
||||
} else if ((this_pipeline->current_audio_stream_info_.channels > 2)) {
|
||||
// Error state, incompatible number of channels
|
||||
event.decoding_err = DecodingError::INCOMPATIBLE_CHANNELS;
|
||||
xEventGroupSetBits(this_pipeline->event_group_,
|
||||
EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
|
||||
} else {
|
||||
// Inform the resampler that the stream information is available
|
||||
xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_LOADED_STREAM_INFO);
|
||||
}
|
||||
|
||||
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioPipeline::resample_task_(void *params) {
|
||||
AudioPipeline *this_pipeline = (AudioPipeline *) params;
|
||||
|
||||
while (true) {
|
||||
xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::RESAMPLER_MESSAGE_FINISHED);
|
||||
|
||||
// Wait until the decoder notifies us that the stream information is available
|
||||
EventBits_t event_bits = xEventGroupWaitBits(this_pipeline->event_group_,
|
||||
DECODER_MESSAGE_LOADED_STREAM_INFO, // Bit message to read
|
||||
pdTRUE, // Clear the bit on exit
|
||||
pdFALSE, // Wait for all the bits,
|
||||
portMAX_DELAY); // Block indefinitely until bit is set
|
||||
|
||||
xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::RESAMPLER_MESSAGE_FINISHED);
|
||||
|
||||
{
|
||||
InfoErrorEvent event;
|
||||
event.source = InfoErrorSource::RESAMPLER;
|
||||
|
||||
RingBuffer *output_ring_buffer = nullptr;
|
||||
|
||||
if (this_pipeline->pipeline_type_ == AudioPipelineType::MEDIA) {
|
||||
output_ring_buffer = this_pipeline->mixer_->get_media_ring_buffer();
|
||||
} else {
|
||||
output_ring_buffer = this_pipeline->mixer_->get_announcement_ring_buffer();
|
||||
}
|
||||
|
||||
AudioResampler resampler =
|
||||
AudioResampler(this_pipeline->decoded_ring_buffer_.get(), output_ring_buffer, BUFFER_SIZE_SAMPLES);
|
||||
|
||||
esp_err_t err = resampler.start(this_pipeline->current_audio_stream_info_, this_pipeline->target_sample_rate_,
|
||||
this_pipeline->current_resample_info_);
|
||||
|
||||
if (err != ESP_OK) {
|
||||
// Send specific error message
|
||||
event.err = err;
|
||||
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
|
||||
|
||||
// Setting up the resampler failed, stop the pipeline
|
||||
xEventGroupSetBits(this_pipeline->event_group_,
|
||||
EventGroupBits::RESAMPLER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
|
||||
} else {
|
||||
event.resample_info = this_pipeline->current_resample_info_;
|
||||
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
event_bits = xEventGroupGetBits(this_pipeline->event_group_);
|
||||
|
||||
if (event_bits & PIPELINE_COMMAND_STOP) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Stop gracefully if the decoder is done
|
||||
AudioResamplerState resampler_state = resampler.resample(event_bits & DECODER_MESSAGE_FINISHED);
|
||||
|
||||
if (resampler_state == AudioResamplerState::FINISHED) {
|
||||
break;
|
||||
} else if (resampler_state == AudioResamplerState::FAILED) {
|
||||
xEventGroupSetBits(this_pipeline->event_group_,
|
||||
EventGroupBits::RESAMPLER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
#endif
|
153
esphome/components/nabu/audio_pipeline.h
Normal file
153
esphome/components/nabu/audio_pipeline.h
Normal file
|
@ -0,0 +1,153 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "audio_reader.h"
|
||||
#include "audio_decoder.h"
|
||||
#include "audio_resampler.h"
|
||||
#include "audio_mixer.h"
|
||||
#include "nabu_media_helpers.h"
|
||||
|
||||
#include "esphome/components/audio/audio.h"
|
||||
|
||||
#include "esphome/core/hal.h"
|
||||
#include "esphome/core/helpers.h"
|
||||
#include "esphome/core/ring_buffer.h"
|
||||
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/event_groups.h>
|
||||
#include <freertos/queue.h>
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
enum class AudioPipelineType : uint8_t {
|
||||
MEDIA,
|
||||
ANNOUNCEMENT,
|
||||
};
|
||||
|
||||
enum class AudioPipelineState : uint8_t {
|
||||
PLAYING,
|
||||
STOPPED,
|
||||
ERROR_READING,
|
||||
ERROR_DECODING,
|
||||
ERROR_RESAMPLING,
|
||||
};
|
||||
|
||||
enum class InfoErrorSource : uint8_t {
|
||||
READER = 0,
|
||||
DECODER,
|
||||
RESAMPLER,
|
||||
};
|
||||
|
||||
enum class DecodingError : uint8_t {
|
||||
FAILED_HEADER = 0,
|
||||
INCOMPATIBLE_BITS_PER_SAMPLE,
|
||||
INCOMPATIBLE_CHANNELS,
|
||||
};
|
||||
|
||||
// Used to pass information from each task.
|
||||
struct InfoErrorEvent {
|
||||
InfoErrorSource source;
|
||||
optional<esp_err_t> err;
|
||||
optional<MediaFileType> file_type;
|
||||
optional<audio::AudioStreamInfo> audio_stream_info;
|
||||
optional<ResampleInfo> resample_info;
|
||||
optional<DecodingError> decoding_err;
|
||||
};
|
||||
|
||||
class AudioPipeline {
|
||||
public:
|
||||
AudioPipeline(AudioMixer *mixer, AudioPipelineType pipeline_type);
|
||||
|
||||
/// @brief Starts an audio pipeline given a media url
|
||||
/// @param uri media file url
|
||||
/// @param target_sample_rate the desired sample rate of the audio stream
|
||||
/// @param task_name FreeRTOS task name
|
||||
/// @param priority FreeRTOS task priority
|
||||
/// @return ESP_OK if successful or an appropriate error if not
|
||||
esp_err_t start(const std::string &uri, uint32_t target_sample_rate, const std::string &task_name,
|
||||
UBaseType_t priority = 1);
|
||||
|
||||
/// @brief Starts an audio pipeline given a MediaFile pointer
|
||||
/// @param media_file pointer to a MediaFile object
|
||||
/// @param target_sample_rate the desired sample rate of the audio stream
|
||||
/// @param task_name FreeRTOS task name
|
||||
/// @param priority FreeRTOS task priority
|
||||
/// @return ESP_OK if successful or an appropriate error if not
|
||||
esp_err_t start(MediaFile *media_file, uint32_t target_sample_rate, const std::string &task_name,
|
||||
UBaseType_t priority = 1);
|
||||
|
||||
/// @brief Stops the pipeline. Sends a stop signal to each task (if running) and clears the ring buffers.
|
||||
/// @return ESP_OK if successful or ESP_ERR_TIMEOUT if the tasks did not indicate they stopped
|
||||
esp_err_t stop();
|
||||
|
||||
/// @brief Gets the state of the audio pipeline based on the info_error_queue_ and event_group_
|
||||
/// @return AudioPipelineState
|
||||
AudioPipelineState get_state();
|
||||
|
||||
/// @brief Resets the ring buffers, discarding any existing data
|
||||
void reset_ring_buffers();
|
||||
|
||||
/// @brief Suspends any running tasks
|
||||
void suspend_tasks();
|
||||
/// @brief Resumes any running tasks
|
||||
void resume_tasks();
|
||||
|
||||
protected:
|
||||
/// @brief Allocates the ring buffers, event group, and info error queue.
|
||||
/// @return ESP_OK if successful or ESP_ERR_NO_MEM if it is unable to allocate all parts
|
||||
esp_err_t allocate_buffers_();
|
||||
|
||||
/// @brief Common start code for the pipeline, regardless if the source is a file or url.
|
||||
/// @param target_sample_rate the desired sample rate of the audio stream
|
||||
/// @param task_name FreeRTOS task name
|
||||
/// @param priority FreeRTOS task priority
|
||||
/// @return ESP_OK if successful or an appropriate error if not
|
||||
esp_err_t common_start_(uint32_t target_sample_rate, const std::string &task_name, UBaseType_t priority);
|
||||
|
||||
// Pointer to the media player's mixer object. The resample task feeds the appropriate ring buffer directly
|
||||
AudioMixer *mixer_;
|
||||
|
||||
std::string current_uri_{};
|
||||
MediaFile *current_media_file_{nullptr};
|
||||
|
||||
MediaFileType current_media_file_type_;
|
||||
audio::AudioStreamInfo current_audio_stream_info_;
|
||||
ResampleInfo current_resample_info_;
|
||||
uint32_t target_sample_rate_;
|
||||
|
||||
AudioPipelineType pipeline_type_;
|
||||
|
||||
std::unique_ptr<RingBuffer> raw_file_ring_buffer_;
|
||||
std::unique_ptr<RingBuffer> decoded_ring_buffer_;
|
||||
|
||||
// Handles basic control/state of the three tasks
|
||||
EventGroupHandle_t event_group_{nullptr};
|
||||
|
||||
// Receives detailed info (file type, stream info, resampling info) or specific errors from the three tasks
|
||||
QueueHandle_t info_error_queue_{nullptr};
|
||||
|
||||
// Handles reading the media file from flash or a url
|
||||
static void read_task_(void *params);
|
||||
TaskHandle_t read_task_handle_{nullptr};
|
||||
StaticTask_t read_task_stack_;
|
||||
StackType_t *read_task_stack_buffer_{nullptr};
|
||||
|
||||
// Decodes the media file into PCM audio
|
||||
static void decode_task_(void *params);
|
||||
TaskHandle_t decode_task_handle_{nullptr};
|
||||
StaticTask_t decode_task_stack_;
|
||||
StackType_t *decode_task_stack_buffer_{nullptr};
|
||||
|
||||
// Resamples the audio to match the specified target sample rate. Converts mono audio to stereo audio if necessary.
|
||||
static void resample_task_(void *params);
|
||||
TaskHandle_t resample_task_handle_{nullptr};
|
||||
StaticTask_t resample_task_stack_;
|
||||
StackType_t *resample_task_stack_buffer_{nullptr};
|
||||
};
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
210
esphome/components/nabu/audio_reader.cpp
Normal file
210
esphome/components/nabu/audio_reader.cpp
Normal file
|
@ -0,0 +1,210 @@
|
|||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "audio_reader.h"
|
||||
|
||||
#include "esphome/core/helpers.h"
|
||||
#include "esphome/core/ring_buffer.h"
|
||||
|
||||
#if CONFIG_MBEDTLS_CERTIFICATE_BUNDLE
|
||||
#include "esp_crt_bundle.h"
|
||||
#endif
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
static const size_t READ_WRITE_TIMEOUT_MS = 20;
|
||||
|
||||
// The number of times the http read times out with no data before throwing an error
|
||||
static const size_t ERROR_COUNT_NO_DATA_READ_TIMEOUT = 10;
|
||||
|
||||
AudioReader::AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size) {
|
||||
this->output_ring_buffer_ = output_ring_buffer;
|
||||
this->transfer_buffer_size_ = transfer_buffer_size;
|
||||
}
|
||||
|
||||
AudioReader::~AudioReader() {
|
||||
if (this->transfer_buffer_ != nullptr) {
|
||||
ExternalRAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
|
||||
allocator.deallocate(this->transfer_buffer_, this->transfer_buffer_size_);
|
||||
}
|
||||
|
||||
this->cleanup_connection_();
|
||||
}
|
||||
|
||||
esp_err_t AudioReader::allocate_buffers_() {
|
||||
ExternalRAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
|
||||
if (this->transfer_buffer_ == nullptr)
|
||||
this->transfer_buffer_ = allocator.allocate(this->transfer_buffer_size_);
|
||||
|
||||
if (this->transfer_buffer_ == nullptr)
|
||||
return ESP_ERR_NO_MEM;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t AudioReader::start(MediaFile *media_file, MediaFileType &file_type) {
|
||||
file_type = MediaFileType::NONE;
|
||||
|
||||
esp_err_t err = this->allocate_buffers_();
|
||||
if (err != ESP_OK) {
|
||||
return err;
|
||||
}
|
||||
|
||||
this->current_media_file_ = media_file;
|
||||
|
||||
this->transfer_buffer_current_ = media_file->data;
|
||||
this->transfer_buffer_length_ = media_file->length;
|
||||
file_type = media_file->file_type;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t AudioReader::start(const std::string &uri, MediaFileType &file_type) {
|
||||
file_type = MediaFileType::NONE;
|
||||
|
||||
esp_err_t err = this->allocate_buffers_();
|
||||
if (err != ESP_OK) {
|
||||
return err;
|
||||
}
|
||||
|
||||
this->cleanup_connection_();
|
||||
|
||||
if (uri.empty()) {
|
||||
return ESP_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
esp_http_client_config_t client_config = {};
|
||||
|
||||
client_config.url = uri.c_str();
|
||||
client_config.cert_pem = nullptr;
|
||||
client_config.disable_auto_redirect = false;
|
||||
client_config.max_redirection_count = 10;
|
||||
client_config.buffer_size = 512;
|
||||
client_config.keep_alive_enable = true;
|
||||
client_config.timeout_ms = 5000; // Doesn't raise an error if exceeded in esp-idf v4.4, it just prevents the
|
||||
// http_client_read command from blocking for too long
|
||||
|
||||
#if CONFIG_MBEDTLS_CERTIFICATE_BUNDLE
|
||||
if (uri.find("https:") != std::string::npos) {
|
||||
client_config.crt_bundle_attach = esp_crt_bundle_attach;
|
||||
}
|
||||
#endif
|
||||
|
||||
this->client_ = esp_http_client_init(&client_config);
|
||||
|
||||
if (this->client_ == nullptr) {
|
||||
return ESP_FAIL;
|
||||
}
|
||||
|
||||
if ((err = esp_http_client_open(this->client_, 0)) != ESP_OK) {
|
||||
this->cleanup_connection_();
|
||||
return err;
|
||||
}
|
||||
|
||||
int content_length = esp_http_client_fetch_headers(this->client_);
|
||||
|
||||
char url[500];
|
||||
err = esp_http_client_get_url(this->client_, url, 500);
|
||||
if (err != ESP_OK) {
|
||||
this->cleanup_connection_();
|
||||
return err;
|
||||
}
|
||||
|
||||
std::string url_string = url;
|
||||
|
||||
if (str_endswith(url_string, ".wav")) {
|
||||
file_type = MediaFileType::WAV;
|
||||
} else if (str_endswith(url_string, ".mp3")) {
|
||||
file_type = MediaFileType::MP3;
|
||||
} else if (str_endswith(url_string, ".flac")) {
|
||||
file_type = MediaFileType::FLAC;
|
||||
} else {
|
||||
file_type = MediaFileType::NONE;
|
||||
this->cleanup_connection_();
|
||||
return ESP_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
this->transfer_buffer_current_ = this->transfer_buffer_;
|
||||
this->transfer_buffer_length_ = 0;
|
||||
this->no_data_read_count_ = 0;
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
AudioReaderState AudioReader::read() {
|
||||
if (this->client_ != nullptr) {
|
||||
return this->http_read_();
|
||||
} else if (this->current_media_file_ != nullptr) {
|
||||
return this->file_read_();
|
||||
}
|
||||
|
||||
return AudioReaderState::FAILED;
|
||||
}
|
||||
|
||||
AudioReaderState AudioReader::file_read_() {
|
||||
if (this->transfer_buffer_length_ > 0) {
|
||||
size_t bytes_written = this->output_ring_buffer_->write_without_replacement(
|
||||
(void *) this->transfer_buffer_current_, this->transfer_buffer_length_, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
|
||||
this->transfer_buffer_length_ -= bytes_written;
|
||||
this->transfer_buffer_current_ += bytes_written;
|
||||
|
||||
return AudioReaderState::READING;
|
||||
}
|
||||
return AudioReaderState::FINISHED;
|
||||
}
|
||||
|
||||
AudioReaderState AudioReader::http_read_() {
|
||||
if (this->transfer_buffer_length_ > 0) {
|
||||
size_t bytes_written = this->output_ring_buffer_->write_without_replacement(
|
||||
(void *) this->transfer_buffer_, this->transfer_buffer_length_, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
|
||||
this->transfer_buffer_length_ -= bytes_written;
|
||||
|
||||
// Shift remaining data to the start of the transfer buffer
|
||||
memmove(this->transfer_buffer_, this->transfer_buffer_ + bytes_written, this->transfer_buffer_length_);
|
||||
}
|
||||
|
||||
if (esp_http_client_is_complete_data_received(this->client_)) {
|
||||
if (this->transfer_buffer_length_ == 0) {
|
||||
this->cleanup_connection_();
|
||||
return AudioReaderState::FINISHED;
|
||||
}
|
||||
} else {
|
||||
size_t bytes_to_read = this->transfer_buffer_size_ - this->transfer_buffer_length_;
|
||||
int received_len = esp_http_client_read(
|
||||
this->client_, (char *) this->transfer_buffer_ + this->transfer_buffer_length_, bytes_to_read);
|
||||
|
||||
if (received_len > 0) {
|
||||
this->transfer_buffer_length_ += received_len;
|
||||
this->no_data_read_count_ = 0;
|
||||
} else if (received_len < 0) {
|
||||
// HTTP read error
|
||||
this->cleanup_connection_();
|
||||
return AudioReaderState::FAILED;
|
||||
} else {
|
||||
if (bytes_to_read > 0) {
|
||||
// Read timed out
|
||||
++this->no_data_read_count_;
|
||||
if (this->no_data_read_count_ >= ERROR_COUNT_NO_DATA_READ_TIMEOUT) {
|
||||
// Timed out with no data read too many times, so the http read has failed
|
||||
this->cleanup_connection_();
|
||||
return AudioReaderState::FAILED;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return AudioReaderState::READING;
|
||||
}
|
||||
|
||||
void AudioReader::cleanup_connection_() {
|
||||
if (this->client_ != nullptr) {
|
||||
esp_http_client_close(this->client_);
|
||||
esp_http_client_cleanup(this->client_);
|
||||
this->client_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
54
esphome/components/nabu/audio_reader.h
Normal file
54
esphome/components/nabu/audio_reader.h
Normal file
|
@ -0,0 +1,54 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "nabu_media_helpers.h"
|
||||
#include "esphome/core/ring_buffer.h"
|
||||
|
||||
#include <esp_http_client.h>
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
enum class AudioReaderState : uint8_t {
|
||||
READING = 0,
|
||||
FINISHED,
|
||||
FAILED,
|
||||
};
|
||||
|
||||
class AudioReader {
|
||||
public:
|
||||
AudioReader(esphome::RingBuffer *output_ring_buffer, size_t transfer_buffer_size);
|
||||
~AudioReader();
|
||||
|
||||
esp_err_t start(const std::string &uri, MediaFileType &file_type);
|
||||
esp_err_t start(MediaFile *media_file, MediaFileType &file_type);
|
||||
|
||||
AudioReaderState read();
|
||||
|
||||
protected:
|
||||
esp_err_t allocate_buffers_();
|
||||
|
||||
AudioReaderState file_read_();
|
||||
AudioReaderState http_read_();
|
||||
|
||||
void cleanup_connection_();
|
||||
|
||||
esphome::RingBuffer *output_ring_buffer_;
|
||||
|
||||
size_t transfer_buffer_length_; // Amount of data currently stored in transfer buffer (in bytes)
|
||||
size_t transfer_buffer_size_; // Capacity of transfer buffer (in bytes)
|
||||
|
||||
ssize_t no_data_read_count_;
|
||||
|
||||
uint8_t *transfer_buffer_{nullptr};
|
||||
const uint8_t *transfer_buffer_current_{nullptr};
|
||||
|
||||
esp_http_client_handle_t client_{nullptr};
|
||||
|
||||
MediaFile *current_media_file_{nullptr};
|
||||
};
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
317
esphome/components/nabu/audio_resampler.cpp
Normal file
317
esphome/components/nabu/audio_resampler.cpp
Normal file
|
@ -0,0 +1,317 @@
|
|||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "audio_resampler.h"
|
||||
|
||||
#include "esphome/core/ring_buffer.h"
|
||||
#include "esphome/core/helpers.h"
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
static const size_t NUM_TAPS = 32;
|
||||
static const size_t NUM_FILTERS = 32;
|
||||
static const bool USE_PRE_POST_FILTER = true;
|
||||
|
||||
// These output parameters are currently hardcoded in the elements further down the pipeline (mixer and speaker)
|
||||
static const uint8_t OUTPUT_CHANNELS = 2;
|
||||
static const uint8_t OUTPUT_BITS_PER_SAMPLE = 16;
|
||||
|
||||
static const size_t READ_WRITE_TIMEOUT_MS = 20;
|
||||
|
||||
AudioResampler::AudioResampler(RingBuffer *input_ring_buffer, RingBuffer *output_ring_buffer,
|
||||
size_t internal_buffer_samples) {
|
||||
this->input_ring_buffer_ = input_ring_buffer;
|
||||
this->output_ring_buffer_ = output_ring_buffer;
|
||||
this->internal_buffer_samples_ = internal_buffer_samples;
|
||||
}
|
||||
|
||||
AudioResampler::~AudioResampler() {
|
||||
ExternalRAMAllocator<int16_t> int16_allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
||||
ExternalRAMAllocator<float> float_allocator(ExternalRAMAllocator<float>::ALLOW_FAILURE);
|
||||
|
||||
if (this->input_buffer_ != nullptr) {
|
||||
int16_allocator.deallocate(this->input_buffer_, this->internal_buffer_samples_);
|
||||
}
|
||||
if (this->output_buffer_ != nullptr) {
|
||||
int16_allocator.deallocate(this->output_buffer_, this->internal_buffer_samples_);
|
||||
}
|
||||
if (this->float_input_buffer_ != nullptr) {
|
||||
float_allocator.deallocate(this->float_input_buffer_, this->internal_buffer_samples_);
|
||||
}
|
||||
if (this->float_output_buffer_ != nullptr) {
|
||||
float_allocator.deallocate(this->float_output_buffer_, this->internal_buffer_samples_);
|
||||
}
|
||||
if (this->resampler_ != nullptr) {
|
||||
resampleFree(this->resampler_);
|
||||
this->resampler_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
esp_err_t AudioResampler::allocate_buffers_() {
|
||||
ExternalRAMAllocator<int16_t> int16_allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE);
|
||||
ExternalRAMAllocator<float> float_allocator(ExternalRAMAllocator<float>::ALLOW_FAILURE);
|
||||
|
||||
if (this->input_buffer_ == nullptr)
|
||||
this->input_buffer_ = int16_allocator.allocate(this->internal_buffer_samples_);
|
||||
if (this->output_buffer_ == nullptr)
|
||||
this->output_buffer_ = int16_allocator.allocate(this->internal_buffer_samples_);
|
||||
|
||||
if (this->float_input_buffer_ == nullptr)
|
||||
this->float_input_buffer_ = float_allocator.allocate(this->internal_buffer_samples_);
|
||||
|
||||
if (this->float_output_buffer_ == nullptr)
|
||||
this->float_output_buffer_ = float_allocator.allocate(this->internal_buffer_samples_);
|
||||
|
||||
if ((this->input_buffer_ == nullptr) || (this->output_buffer_ == nullptr) || (this->float_input_buffer_ == nullptr) ||
|
||||
(this->float_output_buffer_ == nullptr)) {
|
||||
return ESP_ERR_NO_MEM;
|
||||
}
|
||||
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
esp_err_t AudioResampler::start(audio::AudioStreamInfo &stream_info, uint32_t target_sample_rate,
|
||||
ResampleInfo &resample_info) {
|
||||
esp_err_t err = this->allocate_buffers_();
|
||||
if (err != ESP_OK) {
|
||||
return err;
|
||||
}
|
||||
|
||||
this->stream_info_ = stream_info;
|
||||
|
||||
this->input_buffer_current_ = this->input_buffer_;
|
||||
this->input_buffer_length_ = 0;
|
||||
this->float_input_buffer_current_ = this->float_input_buffer_;
|
||||
this->float_input_buffer_length_ = 0;
|
||||
|
||||
this->output_buffer_current_ = this->output_buffer_;
|
||||
this->output_buffer_length_ = 0;
|
||||
this->float_output_buffer_current_ = this->float_output_buffer_;
|
||||
this->float_output_buffer_length_ = 0;
|
||||
|
||||
resample_info.mono_to_stereo = (stream_info.channels != 2);
|
||||
|
||||
if ((stream_info.channels > OUTPUT_CHANNELS) || (stream_info_.bits_per_sample != OUTPUT_BITS_PER_SAMPLE)) {
|
||||
return ESP_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
if (stream_info.channels > 0) {
|
||||
this->channel_factor_ = 2 / stream_info.channels;
|
||||
}
|
||||
|
||||
if (stream_info.sample_rate != target_sample_rate) {
|
||||
int flags = 0;
|
||||
|
||||
resample_info.resample = true;
|
||||
|
||||
this->sample_ratio_ = static_cast<float>(target_sample_rate) / static_cast<float>(stream_info.sample_rate);
|
||||
|
||||
if (this->sample_ratio_ < 1.0) {
|
||||
this->lowpass_ratio_ -= (10.24 / 16);
|
||||
|
||||
if (this->lowpass_ratio_ < 0.84) {
|
||||
this->lowpass_ratio_ = 0.84;
|
||||
}
|
||||
|
||||
if (this->lowpass_ratio_ < this->sample_ratio_) {
|
||||
// avoid discontinuities near unity sample ratios
|
||||
this->lowpass_ratio_ = this->sample_ratio_;
|
||||
}
|
||||
}
|
||||
if (this->lowpass_ratio_ * this->sample_ratio_ < 0.98 && USE_PRE_POST_FILTER) {
|
||||
float cutoff = this->lowpass_ratio_ * this->sample_ratio_ / 2.0;
|
||||
biquad_lowpass(&this->lowpass_coeff_, cutoff);
|
||||
this->pre_filter_ = true;
|
||||
}
|
||||
|
||||
if (this->lowpass_ratio_ / this->sample_ratio_ < 0.98 && USE_PRE_POST_FILTER && !this->pre_filter_) {
|
||||
float cutoff = this->lowpass_ratio_ / this->sample_ratio_ / 2.0;
|
||||
biquad_lowpass(&this->lowpass_coeff_, cutoff);
|
||||
this->post_filter_ = true;
|
||||
}
|
||||
|
||||
if (this->pre_filter_ || this->post_filter_) {
|
||||
for (int i = 0; i < stream_info.channels; ++i) {
|
||||
biquad_init(&this->lowpass_[i][0], &this->lowpass_coeff_, 1.0);
|
||||
biquad_init(&this->lowpass_[i][1], &this->lowpass_coeff_, 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
if (this->sample_ratio_ < 1.0) {
|
||||
this->resampler_ = resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS,
|
||||
this->sample_ratio_ * this->lowpass_ratio_, flags | INCLUDE_LOWPASS);
|
||||
} else if (this->lowpass_ratio_ < 1.0) {
|
||||
this->resampler_ =
|
||||
resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS, this->lowpass_ratio_, flags | INCLUDE_LOWPASS);
|
||||
} else {
|
||||
this->resampler_ = resampleInit(stream_info.channels, NUM_TAPS, NUM_FILTERS, 1.0, flags);
|
||||
}
|
||||
|
||||
resampleAdvancePosition(this->resampler_, NUM_TAPS / 2.0);
|
||||
|
||||
} else {
|
||||
resample_info.resample = false;
|
||||
}
|
||||
|
||||
this->resample_info_ = resample_info;
|
||||
return ESP_OK;
|
||||
}
|
||||
|
||||
AudioResamplerState AudioResampler::resample(bool stop_gracefully) {
|
||||
if (stop_gracefully) {
|
||||
if ((this->input_ring_buffer_->available() == 0) && (this->output_ring_buffer_->available() == 0) &&
|
||||
(this->input_buffer_length_ == 0) && (this->output_buffer_length_ == 0)) {
|
||||
return AudioResamplerState::FINISHED;
|
||||
}
|
||||
}
|
||||
|
||||
if (this->output_buffer_length_ > 0) {
|
||||
size_t bytes_to_write = this->output_buffer_length_;
|
||||
|
||||
if (bytes_to_write > 0) {
|
||||
size_t bytes_written = this->output_ring_buffer_->write_without_replacement(
|
||||
(void *) this->output_buffer_current_, bytes_to_write, pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
|
||||
|
||||
this->output_buffer_current_ += bytes_written / sizeof(int16_t);
|
||||
this->output_buffer_length_ -= bytes_written;
|
||||
}
|
||||
|
||||
return AudioResamplerState::RESAMPLING;
|
||||
}
|
||||
|
||||
// Copy audio data directly to output_buffer if resampling isn't required
|
||||
if (!this->resample_info_.resample && !this->resample_info_.mono_to_stereo) {
|
||||
size_t bytes_read =
|
||||
this->input_ring_buffer_->read((void *) this->output_buffer_, this->internal_buffer_samples_ * sizeof(int16_t),
|
||||
pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
|
||||
|
||||
this->output_buffer_current_ = this->output_buffer_;
|
||||
this->output_buffer_length_ += bytes_read;
|
||||
|
||||
return AudioResamplerState::RESAMPLING;
|
||||
}
|
||||
|
||||
//////
|
||||
// Refill input buffer
|
||||
//////
|
||||
|
||||
// Depending on if we are converting mono to stereo or if we are upsampling, we may need to restrict how many input
|
||||
// samples we transfer
|
||||
size_t max_input_samples = this->internal_buffer_samples_;
|
||||
|
||||
// Mono to stereo -> cut in half
|
||||
max_input_samples /= (2 / this->stream_info_.channels);
|
||||
|
||||
if (this->sample_ratio_ > 1.0) {
|
||||
// Upsampling -> reduce by a factor of the ceiling of sample_ratio_
|
||||
uint32_t upsampling_factor = std::ceil(this->sample_ratio_);
|
||||
max_input_samples /= upsampling_factor;
|
||||
}
|
||||
|
||||
// Move old data to the start of the buffer
|
||||
if (this->input_buffer_length_ > 0) {
|
||||
memmove((void *) this->input_buffer_, (void *) this->input_buffer_current_, this->input_buffer_length_);
|
||||
}
|
||||
this->input_buffer_current_ = this->input_buffer_;
|
||||
|
||||
// Copy new data to the end of the of the buffer
|
||||
size_t bytes_to_read = max_input_samples * sizeof(int16_t) - this->input_buffer_length_;
|
||||
|
||||
if (bytes_to_read > 0) {
|
||||
int16_t *new_input_buffer_data = this->input_buffer_ + this->input_buffer_length_ / sizeof(int16_t);
|
||||
size_t bytes_read = this->input_ring_buffer_->read((void *) new_input_buffer_data, bytes_to_read,
|
||||
pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
|
||||
|
||||
this->input_buffer_length_ += bytes_read;
|
||||
}
|
||||
|
||||
if (this->input_buffer_length_ == 0) {
|
||||
return AudioResamplerState::RESAMPLING;
|
||||
}
|
||||
|
||||
if (this->resample_info_.resample) {
|
||||
if (this->input_buffer_length_ > 0) {
|
||||
// Samples are indiviudal int16 values. Frames include 1 sample for mono and 2 samples for stereo
|
||||
// Be careful converting between bytes, samples, and frames!
|
||||
// 1 sample = 2 bytes = sizeof(int16_t)
|
||||
// if mono:
|
||||
// 1 frame = 1 sample
|
||||
// if stereo:
|
||||
// 1 frame = 2 samples (left and right)
|
||||
|
||||
size_t samples_read = this->input_buffer_length_ / sizeof(int16_t);
|
||||
|
||||
for (int i = 0; i < samples_read; ++i) {
|
||||
this->float_input_buffer_[i] = static_cast<float>(this->input_buffer_[i]) / 32768.0f;
|
||||
}
|
||||
|
||||
size_t frames_read = samples_read / this->stream_info_.channels;
|
||||
|
||||
if (this->pre_filter_) {
|
||||
for (int i = 0; i < this->stream_info_.channels; ++i) {
|
||||
biquad_apply_buffer(&this->lowpass_[i][0], this->float_input_buffer_ + i, frames_read,
|
||||
this->stream_info_.channels);
|
||||
biquad_apply_buffer(&this->lowpass_[i][1], this->float_input_buffer_ + i, frames_read,
|
||||
this->stream_info_.channels);
|
||||
}
|
||||
}
|
||||
|
||||
ResampleResult res;
|
||||
|
||||
res = resampleProcessInterleaved(this->resampler_, this->float_input_buffer_, frames_read,
|
||||
this->float_output_buffer_,
|
||||
this->internal_buffer_samples_ / this->channel_factor_, this->sample_ratio_);
|
||||
|
||||
size_t frames_used = res.input_used;
|
||||
size_t samples_used = frames_used * this->stream_info_.channels;
|
||||
|
||||
size_t frames_generated = res.output_generated;
|
||||
if (this->post_filter_) {
|
||||
for (int i = 0; i < this->stream_info_.channels; ++i) {
|
||||
biquad_apply_buffer(&this->lowpass_[i][0], this->float_output_buffer_ + i, frames_generated,
|
||||
this->stream_info_.channels);
|
||||
biquad_apply_buffer(&this->lowpass_[i][1], this->float_output_buffer_ + i, frames_generated,
|
||||
this->stream_info_.channels);
|
||||
}
|
||||
}
|
||||
|
||||
size_t samples_generated = frames_generated * this->stream_info_.channels;
|
||||
|
||||
for (int i = 0; i < samples_generated; ++i) {
|
||||
this->output_buffer_[i] = static_cast<int16_t>(this->float_output_buffer_[i] * 32767);
|
||||
}
|
||||
|
||||
this->input_buffer_current_ += samples_used;
|
||||
this->input_buffer_length_ -= samples_used * sizeof(int16_t);
|
||||
|
||||
this->output_buffer_current_ = this->output_buffer_;
|
||||
this->output_buffer_length_ += samples_generated * sizeof(int16_t);
|
||||
}
|
||||
} else {
|
||||
size_t bytes_to_transfer =
|
||||
std::min(this->internal_buffer_samples_ / this->channel_factor_ * sizeof(int16_t), this->input_buffer_length_);
|
||||
std::memcpy((void *) this->output_buffer_, (void *) this->input_buffer_current_, bytes_to_transfer);
|
||||
|
||||
this->input_buffer_current_ += bytes_to_transfer / sizeof(int16_t);
|
||||
this->input_buffer_length_ -= bytes_to_transfer;
|
||||
|
||||
this->output_buffer_current_ = this->output_buffer_;
|
||||
this->output_buffer_length_ += bytes_to_transfer;
|
||||
}
|
||||
|
||||
if (this->resample_info_.mono_to_stereo) {
|
||||
// Convert mono to stereo
|
||||
for (int i = this->output_buffer_length_ / (sizeof(int16_t)) - 1; i >= 0; --i) {
|
||||
this->output_buffer_[2 * i] = this->output_buffer_[i];
|
||||
this->output_buffer_[2 * i + 1] = this->output_buffer_[i];
|
||||
}
|
||||
|
||||
this->output_buffer_length_ *= 2; // double the bytes for stereo samples
|
||||
}
|
||||
return AudioResamplerState::RESAMPLING;
|
||||
}
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
82
esphome/components/nabu/audio_resampler.h
Normal file
82
esphome/components/nabu/audio_resampler.h
Normal file
|
@ -0,0 +1,82 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "biquad.h"
|
||||
#include "resampler.h"
|
||||
|
||||
#include "esphome/components/audio/audio.h"
|
||||
#include "esphome/core/ring_buffer.h"
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
enum class AudioResamplerState : uint8_t {
|
||||
INITIALIZED = 0,
|
||||
RESAMPLING,
|
||||
FINISHED,
|
||||
FAILED,
|
||||
};
|
||||
|
||||
struct ResampleInfo {
|
||||
bool resample;
|
||||
bool mono_to_stereo;
|
||||
};
|
||||
|
||||
class AudioResampler {
|
||||
public:
|
||||
AudioResampler(esphome::RingBuffer *input_ring_buffer, esphome::RingBuffer *output_ring_buffer,
|
||||
size_t internal_buffer_samples);
|
||||
~AudioResampler();
|
||||
|
||||
/// @brief Sets up the various bits necessary to resample
|
||||
/// @param stream_info the incoming sample rate, bits per sample, and number of channels
|
||||
/// @param target_sample_rate the necessary sample rate to convert to
|
||||
/// @return ESP_OK if it is able to convert the incoming stream or an error otherwise
|
||||
esp_err_t start(audio::AudioStreamInfo &stream_info, uint32_t target_sample_rate, ResampleInfo &resample_info);
|
||||
|
||||
AudioResamplerState resample(bool stop_gracefully);
|
||||
|
||||
protected:
|
||||
esp_err_t allocate_buffers_();
|
||||
|
||||
esphome::RingBuffer *input_ring_buffer_;
|
||||
esphome::RingBuffer *output_ring_buffer_;
|
||||
size_t internal_buffer_samples_;
|
||||
|
||||
int16_t *input_buffer_{nullptr};
|
||||
int16_t *input_buffer_current_{nullptr};
|
||||
size_t input_buffer_length_;
|
||||
|
||||
int16_t *output_buffer_{nullptr};
|
||||
int16_t *output_buffer_current_{nullptr};
|
||||
size_t output_buffer_length_;
|
||||
|
||||
float *float_input_buffer_{nullptr};
|
||||
float *float_input_buffer_current_{nullptr};
|
||||
size_t float_input_buffer_length_;
|
||||
|
||||
float *float_output_buffer_{nullptr};
|
||||
float *float_output_buffer_current_{nullptr};
|
||||
size_t float_output_buffer_length_;
|
||||
|
||||
audio::AudioStreamInfo stream_info_;
|
||||
ResampleInfo resample_info_;
|
||||
|
||||
Resample *resampler_{nullptr};
|
||||
|
||||
Biquad lowpass_[2][2];
|
||||
BiquadCoefficients lowpass_coeff_;
|
||||
|
||||
float sample_ratio_{1.0};
|
||||
float lowpass_ratio_{1.0};
|
||||
uint8_t channel_factor_{1};
|
||||
|
||||
bool pre_filter_{false};
|
||||
bool post_filter_{false};
|
||||
};
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
44
esphome/components/nabu/automation.h
Normal file
44
esphome/components/nabu/automation.h
Normal file
|
@ -0,0 +1,44 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "esphome/core/automation.h"
|
||||
#include "nabu_media_player.h"
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
template<typename... Ts> class DuckingSetAction : public Action<Ts...>, public Parented<NabuMediaPlayer> {
|
||||
TEMPLATABLE_VALUE(uint8_t, decibel_reduction)
|
||||
TEMPLATABLE_VALUE(float, duration)
|
||||
void play(Ts... x) override {
|
||||
this->parent_->set_ducking_reduction(this->decibel_reduction_.value(x...), this->duration_.value(x...));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename... Ts> class PlayLocalMediaAction : public Action<Ts...>, public Parented<NabuMediaPlayer> {
|
||||
TEMPLATABLE_VALUE(MediaFile *, media_file)
|
||||
TEMPLATABLE_VALUE(bool, announcement)
|
||||
void play(Ts... x) override {
|
||||
this->parent_->play_file(this->media_file_.value(x...), this->announcement_.value(x...));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename... Ts> class StopPipelineAction : public Action<Ts...>, public Parented<NabuMediaPlayer> {
|
||||
TEMPLATABLE_VALUE(AudioPipelineType, pipeline_type)
|
||||
void play(Ts... x) override {
|
||||
bool announcement = false;
|
||||
if (this->pipeline_type_.value(x...) == AudioPipelineType::ANNOUNCEMENT) {
|
||||
announcement = true;
|
||||
}
|
||||
this->parent_->make_call()
|
||||
.set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_STOP)
|
||||
.set_announcement(announcement)
|
||||
.perform();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
382
esphome/components/nabu/media_player.py
Normal file
382
esphome/components/nabu/media_player.py
Normal file
|
@ -0,0 +1,382 @@
|
|||
"""Nabu Media Player Setup."""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from esphome import automation, external_files
|
||||
import esphome.codegen as cg
|
||||
from esphome.components import esp32, media_player, speaker
|
||||
import esphome.config_validation as cv
|
||||
from esphome.const import (
|
||||
CONF_DURATION,
|
||||
CONF_FILE,
|
||||
CONF_FILES,
|
||||
CONF_ID,
|
||||
CONF_PATH,
|
||||
CONF_RAW_DATA_ID,
|
||||
CONF_SAMPLE_RATE,
|
||||
CONF_SPEAKER,
|
||||
CONF_TYPE,
|
||||
CONF_URL,
|
||||
)
|
||||
from esphome.core import CORE, HexInt
|
||||
from esphome.external_files import download_content
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
AUTO_LOAD = ["audio", "psram"]
|
||||
|
||||
CODEOWNERS = ["@synesthesiam", "@kahrendt"]
|
||||
DEPENDENCIES = ["media_player"]
|
||||
DOMAIN = "file"
|
||||
|
||||
TYPE_LOCAL = "local"
|
||||
TYPE_WEB = "web"
|
||||
|
||||
CONF_DECIBEL_REDUCTION = "decibel_reduction"
|
||||
|
||||
CONF_ANNOUNCEMENT = "announcement"
|
||||
CONF_MEDIA_FILE = "media_file"
|
||||
CONF_PIPELINE = "pipeline"
|
||||
CONF_VOLUME_INCREMENT = "volume_increment"
|
||||
CONF_VOLUME_MIN = "volume_min"
|
||||
CONF_VOLUME_MAX = "volume_max"
|
||||
|
||||
CONF_ON_MUTE = "on_mute"
|
||||
CONF_ON_UNMUTE = "on_unmute"
|
||||
CONF_ON_VOLUME = "on_volume"
|
||||
|
||||
nabu_ns = cg.esphome_ns.namespace("nabu")
|
||||
NabuMediaPlayer = nabu_ns.class_("NabuMediaPlayer")
|
||||
NabuMediaPlayer = nabu_ns.class_(
|
||||
"NabuMediaPlayer",
|
||||
NabuMediaPlayer,
|
||||
media_player.MediaPlayer,
|
||||
cg.Component,
|
||||
)
|
||||
|
||||
MediaFile = nabu_ns.struct("MediaFile")
|
||||
MediaFileType = nabu_ns.enum("MediaFileType", is_class=True)
|
||||
MEDIA_FILE_TYPE_ENUM = {
|
||||
"NONE": MediaFileType.NONE,
|
||||
"WAV": MediaFileType.WAV,
|
||||
"MP3": MediaFileType.MP3,
|
||||
"FLAC": MediaFileType.FLAC,
|
||||
}
|
||||
|
||||
PipelineType = nabu_ns.enum("AudioPipelineType", is_class=True)
|
||||
PIPELINE_TYPE_ENUM = {
|
||||
"MEDIA": PipelineType.MEDIA,
|
||||
"ANNOUNCEMENT": PipelineType.ANNOUNCEMENT,
|
||||
}
|
||||
|
||||
PlayLocalMediaAction = nabu_ns.class_(
|
||||
"PlayLocalMediaAction", automation.Action, cg.Parented.template(NabuMediaPlayer)
|
||||
)
|
||||
StopPipelineAction = nabu_ns.class_(
|
||||
"StopPipelineAction", automation.Action, cg.Parented.template(NabuMediaPlayer)
|
||||
)
|
||||
DuckingSetAction = nabu_ns.class_(
|
||||
"DuckingSetAction", automation.Action, cg.Parented.template(NabuMediaPlayer)
|
||||
)
|
||||
|
||||
|
||||
def _compute_local_file_path(value: dict) -> Path:
|
||||
url = value[CONF_URL]
|
||||
h = hashlib.new("sha256")
|
||||
h.update(url.encode())
|
||||
key = h.hexdigest()[:8]
|
||||
base_dir = external_files.compute_local_file_dir(DOMAIN)
|
||||
_LOGGER.debug("_compute_local_file_path: base_dir=%s", base_dir / key)
|
||||
return base_dir / key
|
||||
|
||||
|
||||
def _download_web_file(value):
|
||||
url = value[CONF_URL]
|
||||
path = _compute_local_file_path(value)
|
||||
|
||||
download_content(url, path)
|
||||
_LOGGER.debug("download_web_file: path=%s", path)
|
||||
return value
|
||||
|
||||
|
||||
def _validate_file_shorthand(value):
|
||||
value = cv.string_strict(value)
|
||||
if value.startswith("http://") or value.startswith("https://"):
|
||||
return _file_schema(
|
||||
{
|
||||
CONF_TYPE: TYPE_WEB,
|
||||
CONF_URL: value,
|
||||
}
|
||||
)
|
||||
return _file_schema(
|
||||
{
|
||||
CONF_TYPE: TYPE_LOCAL,
|
||||
CONF_PATH: value,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _file_schema(value):
|
||||
if isinstance(value, str):
|
||||
return _validate_file_shorthand(value)
|
||||
return TYPED_FILE_SCHEMA(value)
|
||||
|
||||
|
||||
def _read_audio_file_and_type(file_config):
|
||||
conf_file = file_config[CONF_FILE]
|
||||
file_source = conf_file[CONF_TYPE]
|
||||
if file_source == TYPE_LOCAL:
|
||||
path = CORE.relative_config_path(conf_file[CONF_PATH])
|
||||
elif file_source == TYPE_WEB:
|
||||
path = _compute_local_file_path(conf_file)
|
||||
else:
|
||||
raise cv.Invalid("Unsupported file source.")
|
||||
|
||||
with open(path, "rb") as f:
|
||||
data = f.read()
|
||||
|
||||
try:
|
||||
import puremagic
|
||||
|
||||
file_type: str = puremagic.from_string(data)
|
||||
except ImportError:
|
||||
try:
|
||||
from magic import Magic
|
||||
|
||||
magic = Magic(mime=True)
|
||||
file_type: str = magic.from_buffer(data)
|
||||
except ImportError as exc:
|
||||
raise cv.Invalid("Please install puremagic") from exc
|
||||
if file_type.startswith("."):
|
||||
file_type = file_type[1:]
|
||||
|
||||
media_file_type = MEDIA_FILE_TYPE_ENUM["NONE"]
|
||||
if file_type in ("wav"):
|
||||
media_file_type = MEDIA_FILE_TYPE_ENUM["WAV"]
|
||||
elif file_type in ("mp3", "mpeg", "mpga"):
|
||||
media_file_type = MEDIA_FILE_TYPE_ENUM["MP3"]
|
||||
elif file_type in ("flac"):
|
||||
media_file_type = MEDIA_FILE_TYPE_ENUM["FLAC"]
|
||||
|
||||
return data, media_file_type
|
||||
|
||||
|
||||
def _supported_local_file_validate(config):
|
||||
if files_list := config.get(CONF_FILES):
|
||||
for file_config in files_list:
|
||||
_, media_file_type = _read_audio_file_and_type(file_config)
|
||||
if str(media_file_type) == str(MEDIA_FILE_TYPE_ENUM["NONE"]):
|
||||
raise cv.Invalid("Unsupported local media file.")
|
||||
|
||||
|
||||
LOCAL_SCHEMA = cv.Schema(
|
||||
{
|
||||
cv.Required(CONF_PATH): cv.file_,
|
||||
}
|
||||
)
|
||||
|
||||
WEB_SCHEMA = cv.All(
|
||||
{
|
||||
cv.Required(CONF_URL): cv.url,
|
||||
},
|
||||
_download_web_file,
|
||||
)
|
||||
|
||||
|
||||
TYPED_FILE_SCHEMA = cv.typed_schema(
|
||||
{
|
||||
TYPE_LOCAL: LOCAL_SCHEMA,
|
||||
TYPE_WEB: WEB_SCHEMA,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
MEDIA_FILE_TYPE_SCHEMA = cv.Schema(
|
||||
{
|
||||
cv.Required(CONF_ID): cv.declare_id(MediaFile),
|
||||
cv.Required(CONF_FILE): _file_schema,
|
||||
cv.GenerateID(CONF_RAW_DATA_ID): cv.declare_id(cg.uint8),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
CONFIG_SCHEMA = cv.All(
|
||||
media_player.MEDIA_PLAYER_SCHEMA.extend(
|
||||
{
|
||||
cv.GenerateID(): cv.declare_id(NabuMediaPlayer),
|
||||
cv.Required(CONF_SPEAKER): cv.use_id(speaker.Speaker),
|
||||
cv.Optional(CONF_SAMPLE_RATE, default=16000): cv.int_range(min=1),
|
||||
cv.Optional(CONF_VOLUME_INCREMENT, default=0.05): cv.percentage,
|
||||
cv.Optional(CONF_VOLUME_MAX, default=1.0): cv.percentage,
|
||||
cv.Optional(CONF_VOLUME_MIN, default=0.0): cv.percentage,
|
||||
cv.Optional(CONF_FILES): cv.ensure_list(MEDIA_FILE_TYPE_SCHEMA),
|
||||
cv.Optional(CONF_ON_MUTE): automation.validate_automation(single=True),
|
||||
cv.Optional(CONF_ON_UNMUTE): automation.validate_automation(single=True),
|
||||
cv.Optional(CONF_ON_VOLUME): automation.validate_automation(single=True),
|
||||
}
|
||||
),
|
||||
cv.only_with_esp_idf,
|
||||
)
|
||||
FINAL_VALIDATE_SCHEMA = _supported_local_file_validate
|
||||
|
||||
|
||||
async def to_code(config):
|
||||
cg.add_library("https://github.com/esphome/esp-audio-libs", "1.0.0")
|
||||
|
||||
# Wifi settings based on https://github.com/espressif/esp-adf/issues/297#issuecomment-783811702
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_RX_BUFFER_NUM", 16)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_DYNAMIC_RX_BUFFER_NUM", 512)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_TX_BUFFER", True)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_TX_BUFFER_TYPE", 0)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_TX_BUFFER_NUM", 8)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_CACHE_TX_BUFFER_NUM", 32)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_AMPDU_TX_ENABLED", True)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_TX_BA_WIN", 16)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_AMPDU_RX_ENABLED", True)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_RX_BA_WIN", 32)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_LWIP_MAX_ACTIVE_TCP", 16)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_LWIP_MAX_LISTENING_TCP", 16)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_MAXRTX", 12)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_SYNMAXRTX", 6)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_MSS", 1436)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_MSL", 60000)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_SND_BUF_DEFAULT", 5840)
|
||||
esp32.add_idf_sdkconfig_option(
|
||||
"CONFIG_TCP_WND_DEFAULT", 65535
|
||||
) # Adjusted from referenced settings to avoid compilation error
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_RECVMBOX_SIZE", 512)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_QUEUE_OOSEQ", True)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_OVERSIZE_MSS", True)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_LWIP_WND_SCALE", True)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_TCP_RCV_SCALE", 3)
|
||||
esp32.add_idf_sdkconfig_option("CONFIG_LWIP_TCPIP_RECVMBOX_SIZE", 512)
|
||||
|
||||
var = cg.new_Pvariable(config[CONF_ID])
|
||||
await cg.register_component(var, config)
|
||||
await media_player.register_media_player(var, config)
|
||||
|
||||
cg.add_define("USE_OTA_STATE_CALLBACK")
|
||||
|
||||
cg.add(var.set_sample_rate(config[CONF_SAMPLE_RATE]))
|
||||
|
||||
cg.add(var.set_volume_increment(config[CONF_VOLUME_INCREMENT]))
|
||||
cg.add(var.set_volume_max(config[CONF_VOLUME_MAX]))
|
||||
cg.add(var.set_volume_min(config[CONF_VOLUME_MIN]))
|
||||
|
||||
spkr = await cg.get_variable(config[CONF_SPEAKER])
|
||||
cg.add(var.set_speaker(spkr))
|
||||
|
||||
if on_mute := config.get(CONF_ON_MUTE):
|
||||
await automation.build_automation(
|
||||
var.get_mute_trigger(),
|
||||
[],
|
||||
on_mute,
|
||||
)
|
||||
if on_unmute := config.get(CONF_ON_UNMUTE):
|
||||
await automation.build_automation(
|
||||
var.get_unmute_trigger(),
|
||||
[],
|
||||
on_unmute,
|
||||
)
|
||||
if on_volume := config.get(CONF_ON_VOLUME):
|
||||
await automation.build_automation(
|
||||
var.get_volume_trigger(),
|
||||
[(cg.float_, "x")],
|
||||
on_volume,
|
||||
)
|
||||
|
||||
if files_list := config.get(CONF_FILES):
|
||||
for file_config in files_list:
|
||||
data, media_file_type = _read_audio_file_and_type(file_config)
|
||||
|
||||
rhs = [HexInt(x) for x in data]
|
||||
prog_arr = cg.progmem_array(file_config[CONF_RAW_DATA_ID], rhs)
|
||||
|
||||
media_files_struct = cg.StructInitializer(
|
||||
MediaFile,
|
||||
(
|
||||
"data",
|
||||
prog_arr,
|
||||
),
|
||||
(
|
||||
"length",
|
||||
len(rhs),
|
||||
),
|
||||
(
|
||||
"file_type",
|
||||
media_file_type,
|
||||
),
|
||||
)
|
||||
|
||||
cg.new_Pvariable(
|
||||
file_config[CONF_ID],
|
||||
media_files_struct,
|
||||
)
|
||||
|
||||
|
||||
@automation.register_action(
|
||||
"nabu.play_local_media_file",
|
||||
PlayLocalMediaAction,
|
||||
cv.maybe_simple_value(
|
||||
{
|
||||
cv.GenerateID(): cv.use_id(NabuMediaPlayer),
|
||||
cv.Required(CONF_MEDIA_FILE): cv.use_id(MediaFile),
|
||||
cv.Optional(CONF_ANNOUNCEMENT, default=False): cv.boolean,
|
||||
},
|
||||
key=CONF_MEDIA_FILE,
|
||||
),
|
||||
)
|
||||
async def nabu_play_local_media_media_action(config, action_id, template_arg, args):
|
||||
var = cg.new_Pvariable(action_id, template_arg)
|
||||
await cg.register_parented(var, config[CONF_ID])
|
||||
media_file = await cg.get_variable(config[CONF_MEDIA_FILE])
|
||||
cg.add(var.set_media_file(media_file))
|
||||
cg.add(var.set_announcement(config[CONF_ANNOUNCEMENT]))
|
||||
return var
|
||||
|
||||
|
||||
@automation.register_action(
|
||||
"nabu.stop_pipeline",
|
||||
StopPipelineAction,
|
||||
cv.maybe_simple_value(
|
||||
{
|
||||
cv.GenerateID(): cv.use_id(NabuMediaPlayer),
|
||||
cv.Required(CONF_PIPELINE): cv.enum(PIPELINE_TYPE_ENUM, upper=True),
|
||||
},
|
||||
key=CONF_PIPELINE,
|
||||
),
|
||||
)
|
||||
async def nabu_stop_pipeline_action(config, action_id, template_arg, args):
|
||||
var = cg.new_Pvariable(action_id, template_arg)
|
||||
await cg.register_parented(var, config[CONF_ID])
|
||||
cg.add(var.set_pipeline_type(config[CONF_PIPELINE]))
|
||||
return var
|
||||
|
||||
|
||||
@automation.register_action(
|
||||
"nabu.set_ducking",
|
||||
DuckingSetAction,
|
||||
cv.Schema(
|
||||
{
|
||||
cv.GenerateID(): cv.use_id(NabuMediaPlayer),
|
||||
cv.Required(CONF_DECIBEL_REDUCTION): cv.templatable(
|
||||
cv.int_range(min=0, max=51)
|
||||
),
|
||||
cv.Optional(CONF_DURATION, default="0.0s"): cv.templatable(
|
||||
cv.positive_time_period_seconds
|
||||
),
|
||||
}
|
||||
),
|
||||
)
|
||||
async def ducking_set_to_code(config, action_id, template_arg, args):
|
||||
var = cg.new_Pvariable(action_id, template_arg)
|
||||
await cg.register_parented(var, config[CONF_ID])
|
||||
decibel_reduction = await cg.templatable(
|
||||
config[CONF_DECIBEL_REDUCTION], args, cg.uint8
|
||||
)
|
||||
cg.add(var.set_decibel_reduction(decibel_reduction))
|
||||
duration = await cg.templatable(config[CONF_DURATION], args, cg.float_)
|
||||
cg.add(var.set_duration(duration))
|
||||
return var
|
28
esphome/components/nabu/nabu_media_helpers.h
Normal file
28
esphome/components/nabu/nabu_media_helpers.h
Normal file
|
@ -0,0 +1,28 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
enum class MediaFileType : uint8_t {
|
||||
NONE = 0,
|
||||
WAV,
|
||||
MP3,
|
||||
FLAC,
|
||||
};
|
||||
const char *media_player_file_type_to_string(MediaFileType file_type);
|
||||
|
||||
struct MediaFile {
|
||||
const uint8_t *data;
|
||||
size_t length;
|
||||
MediaFileType file_type;
|
||||
};
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
475
esphome/components/nabu/nabu_media_player.cpp
Normal file
475
esphome/components/nabu/nabu_media_player.cpp
Normal file
|
@ -0,0 +1,475 @@
|
|||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "nabu_media_player.h"
|
||||
|
||||
#include "esphome/components/audio/audio.h"
|
||||
|
||||
#include "esphome/core/hal.h"
|
||||
#include "esphome/core/log.h"
|
||||
|
||||
#ifdef USE_OTA
|
||||
#include "esphome/components/ota/ota_backend.h"
|
||||
#endif
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
// Framework:
|
||||
// - Media player that can handle two streams; one for media and one for announcements
|
||||
// - If played together, they are mixed with the announcement stream staying at full volume
|
||||
// - The media audio is scaled, if necessary, to avoid clipping when mixing an announcement stream
|
||||
// - The media audio can be further ducked via the ``set_ducking_reduction`` function
|
||||
// - Each stream is handled by an ``AudioPipeline`` object with three parts/tasks
|
||||
// - ``AudioReader`` handles reading from an HTTP source or from a PROGMEM flash set at compile time
|
||||
// - ``AudioDecoder`` handles decoding the audio file. All formats are limited to two channels and 16 bits per sample
|
||||
// - FLAC
|
||||
// - WAV
|
||||
// - MP3 (based on the libhelix decoder - a random mp3 file may be incompatible)
|
||||
// - ``AudioResampler`` handles converting the sample rate to the configured output sample rate and converting mono
|
||||
// to stereo
|
||||
// - The quality is not good, and it is slow! Please use audio at the configured sample rate to avoid these issues
|
||||
// - Each task will always run once started, but they will not doing anything until they are needed
|
||||
// - FreeRTOS Event Groups make up the inter-task communication
|
||||
// - The ``AudioPipeline`` sets up an output ring buffer for the Reader and Decoder parts. The next part/task
|
||||
// automatically pulls from the previous ring buffer
|
||||
// - The streams are mixed together in the ``AudioMixer`` task
|
||||
// - Each stream has a corresponding input buffer that the ``AudioResampler`` feeds directly
|
||||
// - Pausing the media stream is done here
|
||||
// - Media stream ducking is done here
|
||||
// - The output ring buffer feeds the configured speaker the audio directly
|
||||
// - Generic media player commands are received by the ``control`` function. The commands are added to the
|
||||
// ``media_control_command_queue_`` to be processed in the component's loop
|
||||
// - Local file play back is initiatied with ``play_file`` and adds it to the ``media_control_command_queue_``
|
||||
// - Starting a stream intializes the appropriate pipeline or stops it if it is already running
|
||||
// - Volume and mute commands are achieved by the ``mute``, ``unmute``, ``set_volume`` functions. The speaker
|
||||
// component handles the implementation details.
|
||||
// - Volume commands are ignored if the media control queue is full to avoid crashing when the track wheel is spun
|
||||
// fast
|
||||
// - Pausing is sent to the ``AudioMixer`` task. It only effects the media stream.
|
||||
// - The components main loop performs housekeeping:
|
||||
// - It reads the media control queue and processes it directly
|
||||
// - It watches the state of speaker and mixer tasks
|
||||
// - It determines the overall state of the media player by considering the state of each pipeline
|
||||
// - announcement playback takes highest priority
|
||||
// - All logging happens in the main loop task to reduce task stack memory usage.
|
||||
|
||||
static const size_t QUEUE_LENGTH = 20;
|
||||
|
||||
static const uint8_t NUMBER_OF_CHANNELS = 2; // Hard-coded expectation of stereo (2 channel) audio
|
||||
|
||||
static const UBaseType_t MEDIA_PIPELINE_TASK_PRIORITY = 1;
|
||||
static const UBaseType_t ANNOUNCEMENT_PIPELINE_TASK_PRIORITY = 1;
|
||||
static const UBaseType_t MIXER_TASK_PRIORITY = 10;
|
||||
|
||||
static const size_t TASK_DELAY_MS = 10;
|
||||
|
||||
static const float FIRST_BOOT_DEFAULT_VOLUME = 0.5f;
|
||||
|
||||
static const char *const TAG = "nabu_media_player";
|
||||
|
||||
const char *media_player_file_type_to_string(MediaFileType file_type) {
|
||||
switch (file_type) {
|
||||
case MediaFileType::FLAC:
|
||||
return "FLAC";
|
||||
case MediaFileType::MP3:
|
||||
return "MP3";
|
||||
case MediaFileType::WAV:
|
||||
return "WAV";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::setup() {
|
||||
state = media_player::MEDIA_PLAYER_STATE_IDLE;
|
||||
|
||||
this->media_control_command_queue_ = xQueueCreate(QUEUE_LENGTH, sizeof(MediaCallCommand));
|
||||
|
||||
this->pref_ = global_preferences->make_preference<VolumeRestoreState>(this->get_object_id_hash());
|
||||
|
||||
VolumeRestoreState volume_restore_state;
|
||||
if (this->pref_.load(&volume_restore_state)) {
|
||||
this->set_volume_(volume_restore_state.volume);
|
||||
this->set_mute_state_(volume_restore_state.is_muted);
|
||||
} else {
|
||||
this->set_volume_(FIRST_BOOT_DEFAULT_VOLUME);
|
||||
this->set_mute_state_(false);
|
||||
}
|
||||
|
||||
#ifdef USE_OTA
|
||||
ota::get_global_ota_callback()->add_on_state_callback(
|
||||
[this](ota::OTAState state, float progress, uint8_t error, ota::OTAComponent *comp) {
|
||||
if (state == ota::OTA_STARTED) {
|
||||
if (this->audio_mixer_ != nullptr) {
|
||||
this->audio_mixer_->suspend_task();
|
||||
}
|
||||
if (this->media_pipeline_ != nullptr) {
|
||||
this->media_pipeline_->suspend_tasks();
|
||||
}
|
||||
if (this->announcement_pipeline_ != nullptr) {
|
||||
this->announcement_pipeline_->suspend_tasks();
|
||||
}
|
||||
} else if (state == ota::OTA_ERROR) {
|
||||
if (this->audio_mixer_ != nullptr) {
|
||||
this->audio_mixer_->resume_task();
|
||||
}
|
||||
if (this->media_pipeline_ != nullptr) {
|
||||
this->media_pipeline_->resume_tasks();
|
||||
}
|
||||
if (this->announcement_pipeline_ != nullptr) {
|
||||
this->announcement_pipeline_->resume_tasks();
|
||||
}
|
||||
}
|
||||
});
|
||||
#endif
|
||||
|
||||
ESP_LOGI(TAG, "Set up nabu media player");
|
||||
}
|
||||
|
||||
esp_err_t NabuMediaPlayer::start_pipeline_(AudioPipelineType type, bool url) {
|
||||
esp_err_t err = ESP_OK;
|
||||
|
||||
if (this->speaker_ != nullptr) {
|
||||
audio::AudioStreamInfo audio_stream_info;
|
||||
audio_stream_info.channels = 2;
|
||||
audio_stream_info.bits_per_sample = 16;
|
||||
audio_stream_info.sample_rate = this->sample_rate_;
|
||||
|
||||
this->speaker_->set_audio_stream_info(audio_stream_info);
|
||||
}
|
||||
|
||||
if (this->audio_mixer_ == nullptr) {
|
||||
this->audio_mixer_ = make_unique<AudioMixer>();
|
||||
err = this->audio_mixer_->start(this->speaker_, "mixer", MIXER_TASK_PRIORITY);
|
||||
if (err != ESP_OK) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == AudioPipelineType::MEDIA) {
|
||||
if (this->media_pipeline_ == nullptr) {
|
||||
this->media_pipeline_ = make_unique<AudioPipeline>(this->audio_mixer_.get(), type);
|
||||
}
|
||||
|
||||
if (url) {
|
||||
err = this->media_pipeline_->start(this->media_url_.value(), this->sample_rate_, "media",
|
||||
MEDIA_PIPELINE_TASK_PRIORITY);
|
||||
} else {
|
||||
err = this->media_pipeline_->start(this->media_file_.value(), this->sample_rate_, "media",
|
||||
MEDIA_PIPELINE_TASK_PRIORITY);
|
||||
}
|
||||
|
||||
if (this->is_paused_) {
|
||||
CommandEvent command_event;
|
||||
command_event.command = CommandEventType::RESUME_MEDIA;
|
||||
this->audio_mixer_->send_command(&command_event);
|
||||
}
|
||||
this->is_paused_ = false;
|
||||
} else if (type == AudioPipelineType::ANNOUNCEMENT) {
|
||||
if (this->announcement_pipeline_ == nullptr) {
|
||||
this->announcement_pipeline_ = make_unique<AudioPipeline>(this->audio_mixer_.get(), type);
|
||||
}
|
||||
|
||||
if (url) {
|
||||
err = this->announcement_pipeline_->start(this->announcement_url_.value(), this->sample_rate_, "ann",
|
||||
ANNOUNCEMENT_PIPELINE_TASK_PRIORITY);
|
||||
} else {
|
||||
err = this->announcement_pipeline_->start(this->announcement_file_.value(), this->sample_rate_, "ann",
|
||||
ANNOUNCEMENT_PIPELINE_TASK_PRIORITY);
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::watch_media_commands_() {
|
||||
MediaCallCommand media_command;
|
||||
CommandEvent command_event;
|
||||
esp_err_t err = ESP_OK;
|
||||
|
||||
if (xQueueReceive(this->media_control_command_queue_, &media_command, 0) == pdTRUE) {
|
||||
if (media_command.new_url.has_value() && media_command.new_url.value()) {
|
||||
if (media_command.announce.has_value() && media_command.announce.value()) {
|
||||
err = this->start_pipeline_(AudioPipelineType::ANNOUNCEMENT, true);
|
||||
} else {
|
||||
err = this->start_pipeline_(AudioPipelineType::MEDIA, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (media_command.new_file.has_value() && media_command.new_file.value()) {
|
||||
if (media_command.announce.has_value() && media_command.announce.value()) {
|
||||
err = this->start_pipeline_(AudioPipelineType::ANNOUNCEMENT, false);
|
||||
} else {
|
||||
err = this->start_pipeline_(AudioPipelineType::MEDIA, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (err != ESP_OK) {
|
||||
ESP_LOGE(TAG, "Error starting the audio pipeline: %s", esp_err_to_name(err));
|
||||
this->status_set_error();
|
||||
} else {
|
||||
this->status_clear_error();
|
||||
}
|
||||
|
||||
if (media_command.volume.has_value()) {
|
||||
this->set_volume_(media_command.volume.value());
|
||||
this->publish_state();
|
||||
}
|
||||
|
||||
if (media_command.command.has_value()) {
|
||||
switch (media_command.command.value()) {
|
||||
case media_player::MEDIA_PLAYER_COMMAND_PLAY:
|
||||
if ((this->audio_mixer_ != nullptr) && this->is_paused_) {
|
||||
command_event.command = CommandEventType::RESUME_MEDIA;
|
||||
this->audio_mixer_->send_command(&command_event);
|
||||
}
|
||||
this->is_paused_ = false;
|
||||
break;
|
||||
case media_player::MEDIA_PLAYER_COMMAND_PAUSE:
|
||||
if ((this->audio_mixer_ != nullptr) && !this->is_paused_) {
|
||||
command_event.command = CommandEventType::PAUSE_MEDIA;
|
||||
this->audio_mixer_->send_command(&command_event);
|
||||
}
|
||||
this->is_paused_ = true;
|
||||
break;
|
||||
case media_player::MEDIA_PLAYER_COMMAND_STOP:
|
||||
command_event.command = CommandEventType::STOP;
|
||||
if (media_command.announce.has_value() && media_command.announce.value()) {
|
||||
if (this->announcement_pipeline_ != nullptr) {
|
||||
this->announcement_pipeline_->stop();
|
||||
}
|
||||
} else {
|
||||
if (this->media_pipeline_ != nullptr) {
|
||||
this->media_pipeline_->stop();
|
||||
}
|
||||
}
|
||||
break;
|
||||
case media_player::MEDIA_PLAYER_COMMAND_TOGGLE:
|
||||
if ((this->audio_mixer_ != nullptr) && this->is_paused_) {
|
||||
command_event.command = CommandEventType::RESUME_MEDIA;
|
||||
this->audio_mixer_->send_command(&command_event);
|
||||
this->is_paused_ = false;
|
||||
} else if (this->audio_mixer_ != nullptr) {
|
||||
command_event.command = CommandEventType::PAUSE_MEDIA;
|
||||
this->audio_mixer_->send_command(&command_event);
|
||||
this->is_paused_ = true;
|
||||
}
|
||||
break;
|
||||
case media_player::MEDIA_PLAYER_COMMAND_MUTE: {
|
||||
this->set_mute_state_(true);
|
||||
|
||||
this->publish_state();
|
||||
break;
|
||||
}
|
||||
case media_player::MEDIA_PLAYER_COMMAND_UNMUTE:
|
||||
this->set_mute_state_(false);
|
||||
this->publish_state();
|
||||
break;
|
||||
case media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP:
|
||||
this->set_volume_(std::min(1.0f, this->volume + this->volume_increment_));
|
||||
this->publish_state();
|
||||
break;
|
||||
case media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN:
|
||||
this->set_volume_(std::max(0.0f, this->volume - this->volume_increment_));
|
||||
this->publish_state();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::watch_mixer_() {
|
||||
TaskEvent event;
|
||||
if (this->audio_mixer_ != nullptr) {
|
||||
while (this->audio_mixer_->read_event(&event))
|
||||
if (event.type == EventType::WARNING) {
|
||||
ESP_LOGD(TAG, "Mixer encountered an error: %s", esp_err_to_name(event.err));
|
||||
this->status_set_error();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::loop() {
|
||||
this->watch_media_commands_();
|
||||
this->watch_mixer_();
|
||||
|
||||
// Determine state of the media player
|
||||
media_player::MediaPlayerState old_state = this->state;
|
||||
|
||||
if (this->announcement_pipeline_ != nullptr)
|
||||
this->announcement_pipeline_state_ = this->announcement_pipeline_->get_state();
|
||||
|
||||
if (this->media_pipeline_ != nullptr)
|
||||
this->media_pipeline_state_ = this->media_pipeline_->get_state();
|
||||
|
||||
if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) {
|
||||
ESP_LOGE(TAG, "The media pipeline's file reader encountered an error.");
|
||||
} else if (this->media_pipeline_state_ == AudioPipelineState::ERROR_DECODING) {
|
||||
ESP_LOGE(TAG, "The media pipeline's audio decoder encountered an error.");
|
||||
} else if (this->media_pipeline_state_ == AudioPipelineState::ERROR_RESAMPLING) {
|
||||
ESP_LOGE(TAG, "The media pipeline's audio resampler encountered an error.");
|
||||
}
|
||||
|
||||
if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_READING) {
|
||||
ESP_LOGE(TAG, "The announcement pipeline's file reader encountered an error.");
|
||||
} else if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_DECODING) {
|
||||
ESP_LOGE(TAG, "The announcement pipeline's audio decoder encountered an error.");
|
||||
} else if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_RESAMPLING) {
|
||||
ESP_LOGE(TAG, "The announcement pipeline's audio resampler encountered an error.");
|
||||
}
|
||||
|
||||
if (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED) {
|
||||
this->state = media_player::MEDIA_PLAYER_STATE_ANNOUNCING;
|
||||
} else {
|
||||
if (this->media_pipeline_state_ == AudioPipelineState::STOPPED) {
|
||||
this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
|
||||
} else if (this->is_paused_) {
|
||||
this->state = media_player::MEDIA_PLAYER_STATE_PAUSED;
|
||||
} else {
|
||||
this->state = media_player::MEDIA_PLAYER_STATE_PLAYING;
|
||||
}
|
||||
}
|
||||
|
||||
if (this->state != old_state) {
|
||||
this->publish_state();
|
||||
}
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::set_ducking_reduction(uint8_t decibel_reduction, float duration) {
|
||||
if (this->audio_mixer_ != nullptr) {
|
||||
CommandEvent command_event;
|
||||
command_event.command = CommandEventType::DUCK;
|
||||
command_event.decibel_reduction = decibel_reduction;
|
||||
|
||||
// Convert the duration in seconds to number of samples, accounting for the sample rate and number of channels
|
||||
command_event.transition_samples = static_cast<size_t>(duration * this->sample_rate_ * NUMBER_OF_CHANNELS);
|
||||
this->audio_mixer_->send_command(&command_event);
|
||||
}
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::play_file(MediaFile *media_file, bool announcement) {
|
||||
if (!this->is_ready()) {
|
||||
// Ignore any commands sent before the media player is setup
|
||||
return;
|
||||
}
|
||||
|
||||
MediaCallCommand media_command;
|
||||
|
||||
media_command.new_file = true;
|
||||
if (announcement) {
|
||||
this->announcement_file_ = media_file;
|
||||
media_command.announce = true;
|
||||
} else {
|
||||
this->media_file_ = media_file;
|
||||
media_command.announce = false;
|
||||
}
|
||||
xQueueSend(this->media_control_command_queue_, &media_command, portMAX_DELAY);
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::control(const media_player::MediaPlayerCall &call) {
|
||||
if (!this->is_ready()) {
|
||||
// Ignore any commands sent before the media player is setup
|
||||
return;
|
||||
}
|
||||
|
||||
MediaCallCommand media_command;
|
||||
|
||||
if (call.get_announcement().has_value() && call.get_announcement().value()) {
|
||||
media_command.announce = true;
|
||||
} else {
|
||||
media_command.announce = false;
|
||||
}
|
||||
|
||||
if (call.get_media_url().has_value()) {
|
||||
std::string new_uri = call.get_media_url().value();
|
||||
|
||||
media_command.new_url = true;
|
||||
if (call.get_announcement().has_value() && call.get_announcement().value()) {
|
||||
this->announcement_url_ = new_uri;
|
||||
} else {
|
||||
this->media_url_ = new_uri;
|
||||
}
|
||||
xQueueSend(this->media_control_command_queue_, &media_command, portMAX_DELAY);
|
||||
return;
|
||||
}
|
||||
|
||||
if (call.get_volume().has_value()) {
|
||||
media_command.volume = call.get_volume().value();
|
||||
// Wait 0 ticks for queue to be free, volume sets aren't that important!
|
||||
xQueueSend(this->media_control_command_queue_, &media_command, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (call.get_command().has_value()) {
|
||||
media_command.command = call.get_command().value();
|
||||
TickType_t ticks_to_wait = portMAX_DELAY;
|
||||
if ((call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP) ||
|
||||
(call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN)) {
|
||||
ticks_to_wait = 0; // Wait 0 ticks for queue to be free, volume sets aren't that important!
|
||||
}
|
||||
xQueueSend(this->media_control_command_queue_, &media_command, ticks_to_wait);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
media_player::MediaPlayerTraits NabuMediaPlayer::get_traits() {
|
||||
auto traits = media_player::MediaPlayerTraits();
|
||||
traits.set_supports_pause(true);
|
||||
traits.get_supported_formats().push_back(
|
||||
media_player::MediaPlayerSupportedFormat{.format = "flac",
|
||||
.sample_rate = this->sample_rate_,
|
||||
.num_channels = 2,
|
||||
.purpose = media_player::MediaPlayerFormatPurpose::PURPOSE_DEFAULT,
|
||||
.sample_bytes = 2});
|
||||
traits.get_supported_formats().push_back(
|
||||
media_player::MediaPlayerSupportedFormat{.format = "flac",
|
||||
.sample_rate = this->sample_rate_,
|
||||
.num_channels = 1,
|
||||
.purpose = media_player::MediaPlayerFormatPurpose::PURPOSE_ANNOUNCEMENT,
|
||||
.sample_bytes = 2});
|
||||
return traits;
|
||||
};
|
||||
|
||||
void NabuMediaPlayer::save_volume_restore_state_() {
|
||||
VolumeRestoreState volume_restore_state;
|
||||
volume_restore_state.volume = this->volume;
|
||||
volume_restore_state.is_muted = this->is_muted_;
|
||||
this->pref_.save(&volume_restore_state);
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::set_mute_state_(bool mute_state) {
|
||||
this->speaker_->set_mute_state(mute_state);
|
||||
|
||||
bool old_mute_state = this->is_muted_;
|
||||
this->is_muted_ = mute_state;
|
||||
|
||||
this->save_volume_restore_state_();
|
||||
|
||||
if (old_mute_state != mute_state) {
|
||||
if (mute_state) {
|
||||
this->defer([this]() { this->mute_trigger_->trigger(); });
|
||||
} else {
|
||||
this->defer([this]() { this->unmute_trigger_->trigger(); });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NabuMediaPlayer::set_volume_(float volume, bool publish) {
|
||||
// Remap the volume to fit with in the configured limits
|
||||
float bounded_volume = remap<float, float>(volume, 0.0f, 1.0f, this->volume_min_, this->volume_max_);
|
||||
|
||||
this->speaker_->set_volume(bounded_volume);
|
||||
|
||||
if (publish) {
|
||||
this->volume = volume;
|
||||
this->save_volume_restore_state_();
|
||||
}
|
||||
|
||||
this->defer([this, volume]() { this->volume_trigger_->trigger(volume); });
|
||||
}
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
#endif
|
133
esphome/components/nabu/nabu_media_player.h
Normal file
133
esphome/components/nabu/nabu_media_player.h
Normal file
|
@ -0,0 +1,133 @@
|
|||
#pragma once
|
||||
|
||||
#ifdef USE_ESP_IDF
|
||||
|
||||
#include "audio_mixer.h"
|
||||
#include "audio_pipeline.h"
|
||||
|
||||
#include "nabu_media_helpers.h"
|
||||
|
||||
#include "esphome/components/media_player/media_player.h"
|
||||
#include "esphome/components/speaker/speaker.h"
|
||||
|
||||
#include "esphome/core/automation.h"
|
||||
#include "esphome/core/component.h"
|
||||
#include "esphome/core/preferences.h"
|
||||
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/queue.h>
|
||||
|
||||
#include <esp_http_client.h>
|
||||
|
||||
namespace esphome {
|
||||
namespace nabu {
|
||||
|
||||
struct MediaCallCommand {
|
||||
optional<media_player::MediaPlayerCommand> command;
|
||||
optional<float> volume;
|
||||
optional<bool> announce;
|
||||
optional<bool> new_url;
|
||||
optional<bool> new_file;
|
||||
};
|
||||
|
||||
struct VolumeRestoreState {
|
||||
float volume;
|
||||
bool is_muted;
|
||||
};
|
||||
|
||||
class NabuMediaPlayer : public Component, public media_player::MediaPlayer {
|
||||
public:
|
||||
float get_setup_priority() const override { return esphome::setup_priority::LATE; }
|
||||
void setup() override;
|
||||
void loop() override;
|
||||
|
||||
// MediaPlayer implementations
|
||||
media_player::MediaPlayerTraits get_traits() override;
|
||||
bool is_muted() const override { return this->is_muted_; }
|
||||
|
||||
/// @brief Sets the ducking level for the media stream in the mixer
|
||||
/// @param decibel_reduction (uint8_t) The dB reduction level. For example, 0 is no change, 10 is a reduction by 10 dB
|
||||
/// @param duration (float) The duration (in seconds) for transitioning to the new ducking level
|
||||
void set_ducking_reduction(uint8_t decibel_reduction, float duration);
|
||||
|
||||
void set_sample_rate(uint32_t sample_rate) { this->sample_rate_ = sample_rate; }
|
||||
|
||||
// Percentage to increase or decrease the volume for volume up or volume down commands
|
||||
void set_volume_increment(float volume_increment) { this->volume_increment_ = volume_increment; }
|
||||
|
||||
void set_volume_max(float volume_max) { this->volume_max_ = volume_max; }
|
||||
void set_volume_min(float volume_min) { this->volume_min_ = volume_min; }
|
||||
|
||||
void set_speaker(speaker::Speaker *speaker) { this->speaker_ = speaker; }
|
||||
|
||||
Trigger<> *get_mute_trigger() const { return this->mute_trigger_; }
|
||||
Trigger<> *get_unmute_trigger() const { return this->unmute_trigger_; }
|
||||
Trigger<float> *get_volume_trigger() const { return this->volume_trigger_; }
|
||||
|
||||
void play_file(MediaFile *media_file, bool announcement);
|
||||
|
||||
protected:
|
||||
// Receives commands from HA or from the voice assistant component
|
||||
// Sends commands to the media_control_commanda_queue_
|
||||
void control(const media_player::MediaPlayerCall &call) override;
|
||||
|
||||
/// @brief Updates this->volume and saves volume/mute state to flash for restortation if publish is true.
|
||||
void set_volume_(float volume, bool publish = true);
|
||||
|
||||
/// @brief Sets the mute state. Restores previous volume if unmuting. Always saves volume/mute state to flash for
|
||||
/// restoration.
|
||||
/// @param mute_state If true, audio will be muted. If false, audio will be unmuted
|
||||
void set_mute_state_(bool mute_state);
|
||||
|
||||
/// @brief Saves the current volume and mute state to the flash for restoration.
|
||||
void save_volume_restore_state_();
|
||||
|
||||
// Reads commands from media_control_command_queue_. Starts pipelines and mixer if necessary.
|
||||
void watch_media_commands_();
|
||||
|
||||
std::unique_ptr<AudioPipeline> media_pipeline_;
|
||||
std::unique_ptr<AudioPipeline> announcement_pipeline_;
|
||||
std::unique_ptr<AudioMixer> audio_mixer_;
|
||||
|
||||
speaker::Speaker *speaker_{nullptr};
|
||||
|
||||
// Monitors the mixer task
|
||||
void watch_mixer_();
|
||||
|
||||
// Starts the ``type`` pipeline with a ``url`` or file. Starts the mixer, pipeline, and speaker tasks if necessary.
|
||||
// Unpauses if starting media in paused state
|
||||
esp_err_t start_pipeline_(AudioPipelineType type, bool url);
|
||||
|
||||
AudioPipelineState media_pipeline_state_{AudioPipelineState::STOPPED};
|
||||
AudioPipelineState announcement_pipeline_state_{AudioPipelineState::STOPPED};
|
||||
|
||||
optional<std::string> media_url_{}; // only modified by control function
|
||||
optional<std::string> announcement_url_{}; // only modified by control function
|
||||
optional<MediaFile *> media_file_{}; // only modified by play_file function
|
||||
optional<MediaFile *> announcement_file_{}; // only modified by play_file function
|
||||
|
||||
QueueHandle_t media_control_command_queue_;
|
||||
|
||||
uint32_t sample_rate_;
|
||||
|
||||
bool is_paused_{false};
|
||||
bool is_muted_{false};
|
||||
|
||||
// The amount to change the volume on volume up/down commands
|
||||
float volume_increment_;
|
||||
|
||||
float volume_max_;
|
||||
float volume_min_;
|
||||
|
||||
// Used to save volume/mute state for restoration on reboot
|
||||
ESPPreferenceObject pref_;
|
||||
|
||||
Trigger<> *mute_trigger_ = new Trigger<>();
|
||||
Trigger<> *unmute_trigger_ = new Trigger<>();
|
||||
Trigger<float> *volume_trigger_ = new Trigger<float>();
|
||||
};
|
||||
|
||||
} // namespace nabu
|
||||
} // namespace esphome
|
||||
|
||||
#endif
|
Loading…
Reference in a new issue