Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 55ad36e0 authored by Yun-Hao Chung's avatar Yun-Hao Chung
Browse files

Floss: Fix cleanup procedure when chipset is down

In the current implementation, when the chipset is down, we raise SIGINT
directly, then the upper layer exit the process. The OS will then free
the resources but in an undetermined order. This could cause some
thread (usually caused by DoInThreadDelayed) to run in a bad state
during this cleanup and cause crashes.

This CL implements the following to prevent the above from happening.
1. instead of raise, only send the sigal so that the thread can continue
   to run and avoid fall into a bad state, which makes the module unable
   to be unregistered.
2. as soon as we know the chipset is unable to use, flag it and prevent
   any read and write operation.
3. when btadapterd receive the signal, don't terminate the process
   directly, instead, follow the normal terminating procedure to cleanup
   all resources.

NO_IFTTT= Floss only change.

Bug: 335146967, 304997914, 353900236
Tag: #floss
Test: mmm packages/modules/Bluetooth
Test: run below test mannually while a2dp and hogp is in used
      1. rmmod btusb && modprobe btusb while
      2. hcitool cmd 3f 4e (to trigger hardware error)
Flag: EXEMPT, Floss-only changes
Change-Id: I74ca9762d98a89a76f43e4fd78c457a4c97c0278
parent 1b652bd8
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -101,6 +101,9 @@ public:
  // Get the MSFT opcode (as specified in Microsoft-defined Bluetooth HCI
  // extensions)
  virtual uint16_t getMsftOpcode() { return 0; }

  // Mark the controller as broken to prevent further read / write operation.
  virtual void markControllerBroken() { return; }
};
// LINT.ThenChange(fuzz/fuzz_hci_hal.h)

+31 −7
Original line number Diff line number Diff line
@@ -252,6 +252,9 @@ public:

  void sendHciCommand(HciPacket command) override {
    std::lock_guard<std::mutex> lock(api_mutex_);
    if (controller_broken_) {
      return;
    }
    log::assert_that(sock_fd_ != INVALID_FD, "assert failed: sock_fd_ != INVALID_FD");
    std::vector<uint8_t> packet = std::move(command);
    btsnoop_logger_->Capture(packet, SnoopLogger::Direction::OUTGOING,
@@ -262,6 +265,9 @@ public:

  void sendAclData(HciPacket data) override {
    std::lock_guard<std::mutex> lock(api_mutex_);
    if (controller_broken_) {
      return;
    }
    log::assert_that(sock_fd_ != INVALID_FD, "assert failed: sock_fd_ != INVALID_FD");
    std::vector<uint8_t> packet = std::move(data);
    btsnoop_logger_->Capture(packet, SnoopLogger::Direction::OUTGOING,
@@ -272,6 +278,10 @@ public:

  void sendScoData(HciPacket data) override {
    std::lock_guard<std::mutex> lock(api_mutex_);
    if (controller_broken_) {
      return;
    }

    log::assert_that(sock_fd_ != INVALID_FD, "assert failed: sock_fd_ != INVALID_FD");
    std::vector<uint8_t> packet = std::move(data);
    btsnoop_logger_->Capture(packet, SnoopLogger::Direction::OUTGOING,
@@ -282,6 +292,9 @@ public:

  void sendIsoData(HciPacket data) override {
    std::lock_guard<std::mutex> lock(api_mutex_);
    if (controller_broken_) {
      return;
    }
    log::assert_that(sock_fd_ != INVALID_FD, "assert failed: sock_fd_ != INVALID_FD");
    std::vector<uint8_t> packet = std::move(data);
    btsnoop_logger_->Capture(packet, SnoopLogger::Direction::OUTGOING,
@@ -292,6 +305,15 @@ public:

  uint16_t getMsftOpcode() override { return Mgmt().get_vs_opcode(MGMT_VS_OPCODE_MSFT); }

  void markControllerBroken() override {
    std::lock_guard<std::mutex> lock(api_mutex_);
    if (controller_broken_) {
      log::error("Controller already marked as broken!");
      return;
    }
    controller_broken_ = true;
  }

protected:
  void ListDependencies(ModuleList* list) const {
    list->add<LinkClocker>();
@@ -307,7 +329,7 @@ protected:
    // We don't want to crash when the chipset is broken.
    if (sock_fd_ == INVALID_FD) {
      log::error("Failed to connect to HCI socket. Aborting HAL initialization process.");
      raise(SIGINT);
      kill(getpid(), SIGTERM);
      return;
    }

@@ -357,6 +379,7 @@ private:
  std::queue<std::vector<uint8_t>> hci_outgoing_queue_;
  SnoopLogger* btsnoop_logger_ = nullptr;
  LinkClocker* link_clocker_ = nullptr;
  bool controller_broken_ = false;

  void write_to_fd(HciPacket packet) {
    // TODO: replace this with new queue when it's ready
@@ -376,7 +399,9 @@ private:
    auto bytes_written = write(sock_fd_, (void*)packet_to_send.data(), packet_to_send.size());
    hci_outgoing_queue_.pop();
    if (bytes_written == -1) {
      abort();
      log::error("Can't write to socket: {}", strerror(errno));
      markControllerBroken();
      kill(getpid(), SIGTERM);
    }
    if (hci_outgoing_queue_.empty()) {
      hci_incoming_thread_.GetReactor()->ModifyRegistration(reactable_,
@@ -400,16 +425,15 @@ private:
    // we don't want crash when the chipset is broken.
    if (received_size == -1) {
      log::error("Can't receive from socket: {}", strerror(errno));
      close(sock_fd_);
      raise(SIGINT);
      markControllerBroken();
      kill(getpid(), SIGTERM);
      return;
    }

    if (received_size == 0) {
      log::warn("Can't read H4 header. EOF received");
      // First close sock fd before raising sigint
      close(sock_fd_);
      raise(SIGINT);
      markControllerBroken();
      kill(getpid(), SIGTERM);
      return;
    }

+4 −2
Original line number Diff line number Diff line
@@ -465,10 +465,12 @@ struct HciLayer::impl {
    log::assert_that(event_view.IsValid(), "assert failed: event_view.IsValid()");
#ifdef TARGET_FLOSS
    log::warn("Hardware Error Event with code 0x{:02x}", event_view.GetHardwareCode());
    // Sending SIGINT to process the exception from BT controller.
    // Sending SIGTERM to process the exception from BT controller.
    // The Floss daemon will be restarted. HCI reset during restart will clear the
    // error state of the BT controller.
    kill(getpid(), SIGINT);
    auto hal = module_.GetDependency<hal::HciHal>();
    hal->markControllerBroken();
    kill(getpid(), SIGTERM);
#else
    log::fatal("Hardware Error Event with code 0x{:02x}", event_view.GetHardwareCode());
#endif
+0 −14
Original line number Diff line number Diff line
@@ -262,15 +262,8 @@ fn main() -> Result<(), Box<dyn Error>> {
                signal::SigSet::empty(),
            );

            let sig_action_int = signal::SigAction::new(
                signal::SigHandler::Handler(handle_sigint),
                signal::SaFlags::empty(),
                signal::SigSet::empty(),
            );

            unsafe {
                signal::sigaction(signal::SIGTERM, &sig_action_term).unwrap();
                signal::sigaction(signal::SIGINT, &sig_action_int).unwrap();
            }
        }

@@ -318,10 +311,3 @@ extern "C" fn handle_sigterm(_signum: i32) {
    log::debug!("Sigterm completed");
    std::process::exit(0);
}

extern "C" fn handle_sigint(_signum: i32) {
    // Assumed this is from HAL Host, which is likely caused by chipset error.
    // In this case, don't crash the daemon and don't try to power off the adapter.
    log::debug!("Sigint completed");
    std::process::exit(0);
}