Update RTTTL component to allow I2S (#5177)

Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
2025-04-04 15:49:05 +02:00 · 2023-11-02 05:14:05 +01:00 · 2023-11-02 05:14:05 +01:00 · 453600f18e
commit 453600f18e
parent 4edf3efdf3
3 changed files with 245 additions and 80 deletions
--- a/esphome/components/rtttl/init.py
+++ b/esphome/components/rtttl/init.py
@ -4,7 +4,15 @@ import esphome.config_validation as cv
 import esphome.final_validate as fv
 from esphome import automation
 from esphome.components.output import FloatOutput
-from esphome.const import CONF_ID, CONF_OUTPUT, CONF_PLATFORM, CONF_TRIGGER_ID
+from esphome.components.speaker import Speaker
+
+from esphome.const import (
+    CONF_ID,
+    CONF_OUTPUT,
+    CONF_PLATFORM,
+    CONF_TRIGGER_ID,
+    CONF_SPEAKER,
+)

 _LOGGER = logging.getLogger(__name__)

@ -24,17 +32,23 @@ IsPlayingCondition = rtttl_ns.class_("IsPlayingCondition", automation.Condition)

 MULTI_CONF = True

-CONFIG_SCHEMA = cv.Schema(
-    {
-        cv.GenerateID(CONF_ID): cv.declare_id(Rtttl),
-        cv.Required(CONF_OUTPUT): cv.use_id(FloatOutput),
-        cv.Optional(CONF_ON_FINISHED_PLAYBACK): automation.validate_automation(
-            {
-                cv.GenerateID(CONF_TRIGGER_ID): cv.declare_id(FinishedPlaybackTrigger),
-            }
-        ),
-    }
-).extend(cv.COMPONENT_SCHEMA)
+CONFIG_SCHEMA = cv.All(
+    cv.Schema(
+        {
+            cv.GenerateID(CONF_ID): cv.declare_id(Rtttl),
+            cv.Optional(CONF_OUTPUT): cv.use_id(FloatOutput),
+            cv.Optional(CONF_SPEAKER): cv.use_id(Speaker),
+            cv.Optional(CONF_ON_FINISHED_PLAYBACK): automation.validate_automation(
+                {
+                    cv.GenerateID(CONF_TRIGGER_ID): cv.declare_id(
+                        FinishedPlaybackTrigger
+                    ),
+                }
+            ),
+        }
+    ).extend(cv.COMPONENT_SCHEMA),
+    cv.has_exactly_one_key(CONF_OUTPUT, CONF_SPEAKER),
+)


 def validate_parent_output_config(value):
@ -63,9 +77,9 @@ def validate_parent_output_config(value):

 FINAL_VALIDATE_SCHEMA = cv.Schema(
    {
-        cv.Required(CONF_OUTPUT): fv.id_declaration_match_schema(
+        cv.Optional(CONF_OUTPUT): fv.id_declaration_match_schema(
            validate_parent_output_config
-        )
+        ),
    },
    extra=cv.ALLOW_EXTRA,
 )
@ -75,8 +89,14 @@ async def to_code(config):
    var = cg.new_Pvariable(config[CONF_ID])
    await cg.register_component(var, config)

-    out = await cg.get_variable(config[CONF_OUTPUT])
-    cg.add(var.set_output(out))
+    if CONF_OUTPUT in config:
+        out = await cg.get_variable(config[CONF_OUTPUT])
+        cg.add(var.set_output(out))
+        cg.add_define("USE_OUTPUT")
+
+    if CONF_SPEAKER in config:
+        out = await cg.get_variable(config[CONF_SPEAKER])
+        cg.add(var.set_speaker(out))

    for conf in config.get(CONF_ON_FINISHED_PLAYBACK, []):
        trigger = cg.new_Pvariable(conf[CONF_TRIGGER_ID], var)
--- a/esphome/components/rtttl/rtttl.cpp
+++ b/esphome/components/rtttl/rtttl.cpp
@ -1,4 +1,5 @@
 #include "rtttl.h"
+#include <cmath>
 #include "esphome/core/hal.h"
 #include "esphome/core/log.h"

@ -15,104 +16,185 @@ static const uint16_t NOTES[] = {0,    262,  277,  294,  311,  330,  349,  370,
                                 1109, 1175, 1245, 1319, 1397, 1480, 1568, 1661, 1760, 1865, 1976, 2093, 2217,
                                 2349, 2489, 2637, 2794, 2960, 3136, 3322, 3520, 3729, 3951};

+static const uint16_t I2S_SPEED = 1600;
+
+#undef HALF_PI
+static const double HALF_PI = 1.5707963267948966192313216916398;
+
+inline double deg2rad(double degrees) {
+  static const double PI_ON_180 = 4.0 * atan(1.0) / 180.0;
+  return degrees * PI_ON_180;
+}
+
 void Rtttl::dump_config() { ESP_LOGCONFIG(TAG, "Rtttl"); }

 void Rtttl::play(std::string rtttl) {
-  rtttl_ = std::move(rtttl);
+  this->rtttl_ = std::move(rtttl);
+
+  this->default_duration_ = 4;
+  this->default_octave_ = 6;
+  this->note_duration_ = 0;

-  default_duration_ = 4;
-  default_octave_ = 6;
  int bpm = 63;
  uint8_t num;

  // Get name
-  position_ = rtttl_.find(':');
+  this->position_ = rtttl_.find(':');

  // it's somewhat documented to be up to 10 characters but let's be a bit flexible here
-  if (position_ == std::string::npos || position_ > 15) {
+  if (this->position_ == std::string::npos || this->position_ > 15) {
    ESP_LOGE(TAG, "Missing ':' when looking for name.");
    return;
  }

-  auto name = this->rtttl_.substr(0, position_);
+  auto name = this->rtttl_.substr(0, this->position_);
  ESP_LOGD(TAG, "Playing song %s", name.c_str());

  // get default duration
-  position_ = this->rtttl_.find("d=", position_);
-  if (position_ == std::string::npos) {
+  this->position_ = this->rtttl_.find("d=", this->position_);
+  if (this->position_ == std::string::npos) {
    ESP_LOGE(TAG, "Missing 'd='");
    return;
  }
-  position_ += 2;
+  this->position_ += 2;
  num = this->get_integer_();
  if (num > 0)
-    default_duration_ = num;
+    this->default_duration_ = num;

  // get default octave
-  position_ = rtttl_.find("o=", position_);
-  if (position_ == std::string::npos) {
+  this->position_ = this->rtttl_.find("o=", this->position_);
+  if (this->position_ == std::string::npos) {
    ESP_LOGE(TAG, "Missing 'o=");
    return;
  }
-  position_ += 2;
+  this->position_ += 2;
  num = get_integer_();
  if (num >= 3 && num <= 7)
-    default_octave_ = num;
+    this->default_octave_ = num;

  // get BPM
-  position_ = rtttl_.find("b=", position_);
-  if (position_ == std::string::npos) {
+  this->position_ = this->rtttl_.find("b=", this->position_);
+  if (this->position_ == std::string::npos) {
    ESP_LOGE(TAG, "Missing b=");
    return;
  }
-  position_ += 2;
+  this->position_ += 2;
  num = get_integer_();
  if (num != 0)
    bpm = num;

-  position_ = rtttl_.find(':', position_);
-  if (position_ == std::string::npos) {
+  this->position_ = this->rtttl_.find(':', this->position_);
+  if (this->position_ == std::string::npos) {
    ESP_LOGE(TAG, "Missing second ':'");
    return;
  }
-  position_++;
+  this->position_++;

  // BPM usually expresses the number of quarter notes per minute
-  wholenote_ = 60 * 1000L * 4 / bpm;  // this is the time for whole note (in milliseconds)
+  this->wholenote_ = 60 * 1000L * 4 / bpm;  // this is the time for whole note (in milliseconds)

-  output_freq_ = 0;
-  last_note_ = millis();
-  note_duration_ = 1;
+  this->output_freq_ = 0;
+  this->last_note_ = millis();
+  this->note_duration_ = 1;
+
+#ifdef USE_SPEAKER
+  this->samples_sent_ = 0;
+  this->samples_count_ = 0;
+#endif
+}
+
+void Rtttl::stop() {
+  this->note_duration_ = 0;
+#ifdef USE_OUTPUT
+  if (this->output_ != nullptr) {
+    this->output_->set_level(0.0);
+  }
+#endif
+#ifdef USE_SPEAKER
+  if (this->speaker_ != nullptr) {
+    if (this->speaker_->is_running()) {
+      this->speaker_->stop();
+    }
+  }
+#endif
 }

 void Rtttl::loop() {
-  if (note_duration_ == 0 || millis() - last_note_ < note_duration_)
+  if (this->note_duration_ == 0)
    return;

-  if (!rtttl_[position_]) {
-    output_->set_level(0.0);
+#ifdef USE_SPEAKER
+  if (this->speaker_ != nullptr) {
+    if (this->samples_sent_ != this->samples_count_) {
+      SpeakerSample sample[SAMPLE_BUFFER_SIZE + 1];
+      int x = 0;
+      double rem = 0.0;
+
+      while (true) {
+        // Try and send out the remainder of the existing note, one per loop()
+
+        if (this->samples_per_wave_ != 0 && this->samples_sent_ >= this->samples_gap_) {  // Play note//
+          rem = ((this->samples_sent_ << 10) % this->samples_per_wave_) * (360.0 / this->samples_per_wave_);
+
+          int16_t val = 8192 * sin(deg2rad(rem));
+
+          sample[x].left = val;
+          sample[x].right = val;
+
+        } else {
+          sample[x].left = 0;
+          sample[x].right = 0;
+        }
+
+        if (x >= SAMPLE_BUFFER_SIZE || this->samples_sent_ >= this->samples_count_) {
+          break;
+        }
+        this->samples_sent_++;
+        x++;
+      }
+      if (x > 0) {
+        int send = this->speaker_->play((uint8_t *) (&sample), x * 4);
+        if (send != x * 4) {
+          this->samples_sent_ -= (x - (send / 4));
+        }
+        return;
+      }
+    }
+  }
+#endif
+#ifdef USE_OUTPUT
+  if (this->output_ != nullptr && millis() - this->last_note_ < this->note_duration_)
+    return;
+#endif
+  if (!this->rtttl_[position_]) {
+    this->note_duration_ = 0;
+#ifdef USE_OUTPUT
+    if (this->output_ != nullptr) {
+      this->output_->set_level(0.0);
+    }
+#endif
    ESP_LOGD(TAG, "Playback finished");
    this->on_finished_playback_callback_.call();
-    note_duration_ = 0;
    return;
  }

  // align to note: most rtttl's out there does not add and space after the ',' separator but just in case...
-  while (rtttl_[position_] == ',' || rtttl_[position_] == ' ')
-    position_++;
+  while (this->rtttl_[this->position_] == ',' || this->rtttl_[this->position_] == ' ')
+    this->position_++;

  // first, get note duration, if available
  uint8_t num = this->get_integer_();

  if (num) {
-    note_duration_ = wholenote_ / num;
+    this->note_duration_ = this->wholenote_ / num;
  } else {
-    note_duration_ = wholenote_ / default_duration_;  // we will need to check if we are a dotted note after
+    this->note_duration_ =
+        this->wholenote_ / this->default_duration_;  // we will need to check if we are a dotted note after
  }

  uint8_t note;

-  switch (rtttl_[position_]) {
+  switch (this->rtttl_[this->position_]) {
    case 'c':
      note = 1;
      break;
@ -138,51 +220,81 @@ void Rtttl::loop() {
    default:
      note = 0;
  }
-  position_++;
+  this->position_++;

  // now, get optional '#' sharp
-  if (rtttl_[position_] == '#') {
+  if (this->rtttl_[this->position_] == '#') {
    note++;
-    position_++;
+    this->position_++;
  }

  // now, get optional '.' dotted note
-  if (rtttl_[position_] == '.') {
-    note_duration_ += note_duration_ / 2;
-    position_++;
+  if (this->rtttl_[this->position_] == '.') {
+    this->note_duration_ += this->note_duration_ / 2;
+    this->position_++;
  }

  // now, get scale
  uint8_t scale = get_integer_();
  if (scale == 0)
-    scale = default_octave_;
+    scale = this->default_octave_;
+  bool need_note_gap = false;

  // Now play the note
  if (note) {
    auto note_index = (scale - 4) * 12 + note;
    if (note_index < 0 || note_index >= (int) sizeof(NOTES)) {
      ESP_LOGE(TAG, "Note out of valid range");
+      this->note_duration_ = 0;
      return;
    }
    auto freq = NOTES[note_index];
+    need_note_gap = freq == this->output_freq_;

-    if (freq == output_freq_) {
-      // Add small silence gap between same note
-      output_->set_level(0.0);
-      delay(DOUBLE_NOTE_GAP_MS);
-      note_duration_ -= DOUBLE_NOTE_GAP_MS;
-    }
-    output_freq_ = freq;
+    // Add small silence gap between same note
+    this->output_freq_ = freq;

-    ESP_LOGVV(TAG, "playing note: %d for %dms", note, note_duration_);
-    output_->update_frequency(freq);
-    output_->set_level(0.5);
+    ESP_LOGVV(TAG, "playing note: %d for %dms", note, this->note_duration_);
  } else {
-    ESP_LOGVV(TAG, "waiting: %dms", note_duration_);
-    output_->set_level(0.0);
+    ESP_LOGVV(TAG, "waiting: %dms", this->note_duration_);
+    this->output_freq_ = 0;
  }

-  last_note_ = millis();
+#ifdef USE_OUTPUT
+  if (this->output_ != nullptr) {
+    if (need_note_gap) {
+      this->output_->set_level(0.0);
+      delay(DOUBLE_NOTE_GAP_MS);
+      this->note_duration_ -= DOUBLE_NOTE_GAP_MS;
+    }
+    if (this->output_freq_ != 0) {
+      this->output_->update_frequency(this->output_freq_);
+      this->output_->set_level(0.5);
+    } else {
+      this->output_->set_level(0.0);
+    }
+  }
+#endif
+#ifdef USE_SPEAKER
+  if (this->speaker_ != nullptr) {
+    this->samples_sent_ = 0;
+    this->samples_count_ = (this->sample_rate_ * this->note_duration_) / I2S_SPEED;
+    // Convert from frequency in Hz to high and low samples in fixed point
+    if (this->output_freq_ != 0) {
+      this->samples_per_wave_ = (this->sample_rate_ << 10) / this->output_freq_;
+    } else {
+      this->samples_per_wave_ = 0;
+    }
+    if (need_note_gap) {
+      this->samples_gap_ = (this->sample_rate_ * DOUBLE_NOTE_GAP_MS) / I2S_SPEED;
+    } else {
+      this->samples_gap_ = 0;
+    }
+  }
+#endif
+
+  this->last_note_ = millis();
 }
+
 }  // namespace rtttl
 }  // namespace esphome
--- a/esphome/components/rtttl/rtttl.h
+++ b/esphome/components/rtttl/rtttl.h
@ -1,23 +1,41 @@
 #pragma once

-#include "esphome/core/component.h"
 #include "esphome/core/automation.h"
+#include "esphome/core/component.h"
+
+#ifdef USE_OUTPUT
 #include "esphome/components/output/float_output.h"
+#endif
+
+#ifdef USE_SPEAKER
+#include "esphome/components/speaker/speaker.h"
+#endif

 namespace esphome {
 namespace rtttl {

+#ifdef USE_SPEAKER
+static const size_t SAMPLE_BUFFER_SIZE = 256;
+
+struct SpeakerSample {
+  int16_t left{0};
+  int16_t right{0};
+};
+#endif
+
 class Rtttl : public Component {
 public:
-  void set_output(output::FloatOutput *output) { output_ = output; }
+#ifdef USE_OUTPUT
+  void set_output(output::FloatOutput *output) { this->output_ = output; }
+#endif
+#ifdef USE_SPEAKER
+  void set_speaker(speaker::Speaker *speaker) { this->speaker_ = speaker; }
+#endif
  void play(std::string rtttl);
-  void stop() {
-    note_duration_ = 0;
-    output_->set_level(0.0);
-  }
+  void stop();
  void dump_config() override;

-  bool is_playing() { return note_duration_ != 0; }
+  bool is_playing() { return this->note_duration_ != 0; }
  void loop() override;

  void add_on_finished_playback_callback(std::function<void()> callback) {
@ -27,14 +45,14 @@ class Rtttl : public Component {
 protected:
  inline uint8_t get_integer_() {
    uint8_t ret = 0;
-    while (isdigit(rtttl_[position_])) {
-      ret = (ret * 10) + (rtttl_[position_++] - '0');
+    while (isdigit(this->rtttl_[this->position_])) {
+      ret = (ret * 10) + (this->rtttl_[this->position_++] - '0');
    }
    return ret;
  }

-  std::string rtttl_;
-  size_t position_;
+  std::string rtttl_{""};
+  size_t position_{0};
  uint16_t wholenote_;
  uint16_t default_duration_;
  uint16_t default_octave_;
@ -42,7 +60,22 @@ class Rtttl : public Component {
  uint16_t note_duration_;

  uint32_t output_freq_;
+
+#ifdef USE_OUTPUT
  output::FloatOutput *output_;
+#endif
+
+  void play_output_();
+
+#ifdef USE_SPEAKER
+  speaker::Speaker *speaker_;
+  void play_speaker_();
+  int sample_rate_{16000};
+  int samples_per_wave_{0};
+  int samples_sent_{0};
+  int samples_count_{0};
+  int samples_gap_{0};
+#endif

  CallbackManager<void()> on_finished_playback_callback_;
 };