sandboxed-api/sandboxed_api/sandbox2/monitor_base.cc

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Implementation file for the sandbox2::MonitorBase class.

#include "sandboxed_api/sandbox2/monitor_base.h"

#include <pthread.h>
#include <sched.h>
#include <signal.h>
#include <sys/resource.h>
#include <syscall.h>

#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <iomanip>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>

#include "absl/cleanup/cleanup.h"
#include "absl/flags/declare.h"
#include "absl/flags/flag.h"
#include "absl/log/check.h"
#include "absl/log/log.h"
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/notification.h"
#include "absl/time/time.h"
#include "sandboxed_api/sandbox2/client.h"
#include "sandboxed_api/sandbox2/comms.h"
#include "sandboxed_api/sandbox2/executor.h"
#include "sandboxed_api/sandbox2/limits.h"
#include "sandboxed_api/sandbox2/mounts.h"
#include "sandboxed_api/sandbox2/namespace.h"
#include "sandboxed_api/sandbox2/network_proxy/client.h"
#include "sandboxed_api/sandbox2/network_proxy/server.h"
#include "sandboxed_api/sandbox2/notify.h"
#include "sandboxed_api/sandbox2/policy.h"
#include "sandboxed_api/sandbox2/result.h"
#include "sandboxed_api/sandbox2/stack_trace.h"
#include "sandboxed_api/sandbox2/syscall.h"
#include "sandboxed_api/sandbox2/util.h"
#include "sandboxed_api/util/file_helpers.h"
#include "sandboxed_api/util/raw_logging.h"
#include "sandboxed_api/util/strerror.h"
#include "sandboxed_api/util/temp_file.h"

ABSL_FLAG(bool, sandbox2_report_on_sandboxee_signal, true,
          "Report sandbox2 sandboxee deaths caused by signals");

ABSL_FLAG(bool, sandbox2_report_on_sandboxee_timeout, true,
          "Report sandbox2 sandboxee timeouts");

ABSL_DECLARE_FLAG(bool, sandbox2_danger_danger_permit_all);
ABSL_DECLARE_FLAG(std::string, sandbox2_danger_danger_permit_all_and_log);

ABSL_DECLARE_FLAG(bool, sandbox_libunwind_crash_handler);

namespace sandbox2 {
namespace {

void MaybeEnableTomoyoLsmWorkaround(Mounts& mounts, std::string& comms_fd_dev) {
  static auto tomoyo_active = []() -> bool {
    std::string lsm_list;
    if (auto status = sapi::file::GetContents(
            "/sys/kernel/security/lsm", &lsm_list, sapi::file::Defaults());
        !status.ok() && !absl::IsNotFound(status)) {
      VLOG(1) << "Checking active LSMs failed: " << status.message() << ": "
              << sapi::StrError(errno);
      return false;
    }
    return absl::StrContains(lsm_list, "tomoyo");
  }();

  if (!tomoyo_active) {
    return;
  }
  VLOG(1) << "Tomoyo LSM active, enabling workaround";

  if (mounts.ResolvePath("/dev").ok() || mounts.ResolvePath("/dev/fd").ok()) {
    // Avoid shadowing /dev/fd/1022 below if /dev or /dev/fd is already mapped.
    VLOG(1) << "Parent dir already mapped, skipping";
    return;
  }

  auto temp_file = sapi::CreateNamedTempFileAndClose("/tmp/");
  if (!temp_file.ok()) {
    LOG(WARNING) << "Failed to create empty temp file: " << temp_file.status();
    return;
  }
  comms_fd_dev = std::move(*temp_file);

  // Ignore errors here, as the file itself might already be mapped.
  if (auto status = mounts.AddFileAt(
          comms_fd_dev, absl::StrCat("/dev/fd/", Comms::kSandbox2TargetExecFD),
          false);
      !status.ok()) {
    VLOG(1) << "Mapping comms FD: %s" << status.message();
  }
}

void LogContainer(const std::vector<std::string>& container) {
  for (size_t i = 0; i < container.size(); ++i) {
    LOG(INFO) << "[" << std::setfill('0') << std::setw(4) << i
              << "]=" << container[i];
  }
}

}  // namespace

MonitorBase::MonitorBase(Executor* executor, Policy* policy, Notify* notify)
    : executor_(executor),
      notify_(notify),
      policy_(policy),
      // NOLINTNEXTLINE clang-diagnostic-deprecated-declarations
      comms_(executor_->ipc()->comms()),
      ipc_(executor_->ipc()),
      uses_custom_forkserver_(executor_->fork_client_ != nullptr) {
  // It's a pre-connected Comms channel, no need to accept new connection.
  CHECK(comms_->IsConnected());
  std::string path =
      absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all_and_log);
  if (!path.empty()) {
    log_file_ = std::fopen(path.c_str(), "a+");
    PCHECK(log_file_ != nullptr) << "Failed to open log file '" << path << "'";
  }

  if (auto& ns = policy_->namespace_; ns) {
    // Check for the Tomoyo LSM, which is active by default in several common
    // distribution kernels (esp. Debian).
    MaybeEnableTomoyoLsmWorkaround(ns->mounts(), comms_fd_dev_);
  }
}

MonitorBase::~MonitorBase() {
  if (!comms_fd_dev_.empty()) {
    std::remove(comms_fd_dev_.c_str());
  }
  if (log_file_) {
    std::fclose(log_file_);
  }
  if (network_proxy_server_) {
    network_proxy_thread_.join();
  }
}

void MonitorBase::OnDone() {
  if (done_notification_.HasBeenNotified()) {
    return;
  }

  notify_->EventFinished(result_);
  ipc_->InternalCleanupFdMap();
  done_notification_.Notify();
}

void MonitorBase::Launch() {

  absl::Cleanup process_cleanup = [this] {
    if (process_.init_pid > 0) {
      kill(process_.init_pid, SIGKILL);
    } else if (process_.main_pid > 0) {
      kill(process_.main_pid, SIGKILL);
    }
  };
  absl::Cleanup monitor_done = [this] { OnDone(); };

  const Namespace* ns = policy_->GetNamespaceOrNull();
  if (SAPI_VLOG_IS_ON(1) && ns != nullptr) {
    std::vector<std::string> outside_entries;
    std::vector<std::string> inside_entries;
    ns->mounts().RecursivelyListMounts(
        /*outside_entries=*/&outside_entries,
        /*inside_entries=*/&inside_entries);
    VLOG(1) << "Outside entries mapped to chroot:";
    LogContainer(outside_entries);
    VLOG(1) << "Inside entries as they appear in chroot:";
    LogContainer(inside_entries);
  }

  // Don't trace the child: it will allow to use 'strace -f' with the whole
  // sandbox master/monitor, which ptrace_attach'es to the child.
  int clone_flags = CLONE_UNTRACED;

  if (policy_->allowed_hosts_) {
    EnableNetworkProxyServer();
  }

  // Get PID of the sandboxee.
  bool should_have_init = ns && (ns->clone_flags() & CLONE_NEWPID);
  absl::StatusOr<SandboxeeProcess> process =
      executor_->StartSubProcess(clone_flags, ns, type_);

  if (!process.ok()) {
    LOG(ERROR) << "Starting sandboxed subprocess failed: " << process.status();
    SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SUBPROCESS);
    return;
  }

  process_ = *std::move(process);

  if (process_.main_pid <= 0 || (should_have_init && process_.init_pid <= 0)) {
    SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SUBPROCESS);
    return;
  }

  if (!notify_->EventStarted(process_.main_pid, comms_)) {
    SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_NOTIFY);
    return;
  }
  if (!InitSendIPC()) {
    SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_IPC);
    return;
  }
  if (!InitSendCwd()) {
    SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_CWD);
    return;
  }
  if (!InitSendPolicy()) {
    SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_POLICY);
    return;
  }
  if (!WaitForSandboxReady()) {
    SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_WAIT);
    return;
  }
  if (!InitApplyLimits()) {
    SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_LIMITS);
    return;
  }
  std::move(process_cleanup).Cancel();

  RunInternal();
  std::move(monitor_done).Cancel();
}

absl::StatusOr<Result> MonitorBase::AwaitResultWithTimeout(
    absl::Duration timeout) {
  auto done = done_notification_.WaitForNotificationWithTimeout(timeout);
  if (!done) {
    return absl::DeadlineExceededError("Sandbox did not finish within timeout");
  }

  Join();
  return result_;
}

void MonitorBase::SetExitStatusCode(Result::StatusEnum final_status,
                                    uintptr_t reason_code) {
  CHECK(result_.final_status() == Result::UNSET);
  result_.SetExitStatusCode(final_status, reason_code);
}

bool MonitorBase::InitSendPolicy() {
  if (!policy_->SendPolicy(comms_, type_ == FORKSERVER_MONITOR_UNOTIFY)) {
    LOG(ERROR) << "Couldn't send policy";
    return false;
  }

  return true;
}

bool MonitorBase::InitSendCwd() {
  if (!comms_->SendString(executor_->cwd_)) {
    PLOG(ERROR) << "Couldn't send cwd";
    return false;
  }

  return true;
}

bool MonitorBase::InitApplyLimit(pid_t pid, int resource,
                                 const rlimit64& rlim) const {
#if defined(__ANDROID__)
  using RlimitResource = int;
#else
  using RlimitResource = __rlimit_resource;
#endif

  rlimit64 curr_limit;
  if (prlimit64(pid, static_cast<RlimitResource>(resource), nullptr,
                &curr_limit) == -1) {
    PLOG(ERROR) << "prlimit64(" << pid << ", " << util::GetRlimitName(resource)
                << ")";
  } else if (rlim.rlim_cur > curr_limit.rlim_max) {
    // In such case, don't update the limits, as it will fail. Just stick to the
    // current ones (which are already lower than intended).
    LOG(ERROR) << util::GetRlimitName(resource)
               << ": new.current > current.max (" << rlim.rlim_cur << " > "
               << curr_limit.rlim_max << "), skipping";
    return true;
  }

  if (prlimit64(pid, static_cast<RlimitResource>(resource), &rlim, nullptr) ==
      -1) {
    PLOG(ERROR) << "prlimit64(" << pid << ", " << util::GetRlimitName(resource)
                << ", " << rlim.rlim_cur << ")";
    return false;
  }

  return true;
}

bool MonitorBase::InitApplyLimits() {
  Limits* limits = executor_->limits();
  return InitApplyLimit(process_.main_pid, RLIMIT_AS, limits->rlimit_as()) &&
         InitApplyLimit(process_.main_pid, RLIMIT_CPU, limits->rlimit_cpu()) &&
         InitApplyLimit(process_.main_pid, RLIMIT_FSIZE,
                        limits->rlimit_fsize()) &&
         InitApplyLimit(process_.main_pid, RLIMIT_NOFILE,
                        limits->rlimit_nofile()) &&
         InitApplyLimit(process_.main_pid, RLIMIT_CORE, limits->rlimit_core());
}

bool MonitorBase::InitSendIPC() { return ipc_->SendFdsOverComms(); }

bool MonitorBase::WaitForSandboxReady() {
  uint32_t tmp;
  if (!comms_->RecvUint32(&tmp)) {
    LOG(ERROR) << "Couldn't receive 'Client::kClient2SandboxReady' message";
    return false;
  }
  if (tmp != Client::kClient2SandboxReady) {
    LOG(ERROR) << "Received " << tmp << " != Client::kClient2SandboxReady ("
               << Client::kClient2SandboxReady << ")";
    return false;
  }
  return true;
}

void MonitorBase::LogSyscallViolation(const Syscall& syscall) const {
  // Do not unwind libunwind.
  if (executor_->libunwind_sbox_for_pid_ != 0) {
    LOG(ERROR) << "Sandbox violation during execution of libunwind: "
               << syscall.GetDescription();
    return;
  }

  // So, this is an invalid syscall. Will be killed by seccomp-bpf policies as
  // well, but we should be on a safe side here as well.
  LOG(ERROR) << "SANDBOX VIOLATION : PID: " << syscall.pid() << ", PROG: '"
             << util::GetProgName(syscall.pid())
             << "' : " << syscall.GetDescription();
  if (SAPI_VLOG_IS_ON(1)) {
    VLOG(1) << "Cmdline: " << util::GetCmdLine(syscall.pid());
    VLOG(1) << "Task Name: " << util::GetProcStatusLine(syscall.pid(), "Name");
    VLOG(1) << "Tgid: " << util::GetProcStatusLine(syscall.pid(), "Tgid");
  }

  LogSyscallViolationExplanation(syscall);
}

void MonitorBase::LogSyscallViolationExplanation(const Syscall& syscall) const {
  const uintptr_t syscall_nr = syscall.nr();
  const uintptr_t arg0 = syscall.args()[0];

  // This follows policy in Policy::GetDefaultPolicy - keep it in sync.
  if (syscall.arch() != Syscall::GetHostArch()) {
    LOG(ERROR)
        << "This is a violation because the syscall was issued because the"
        << " sandboxee and executor architectures are different.";
    return;
  }
  if (syscall_nr == __NR_ptrace) {
    LOG(ERROR)
        << "This is a violation because the ptrace syscall would be unsafe in"
        << " sandbox2, so it has been blocked.";
    return;
  }
  if (syscall_nr == __NR_bpf) {
    LOG(ERROR)
        << "This is a violation because the bpf syscall would be risky in"
        << " a sandbox, so it has been blocked.";
    return;
  }
  if (syscall_nr == __NR_clone && ((arg0 & CLONE_UNTRACED) != 0)) {
    LOG(ERROR) << "This is a violation because calling clone with CLONE_UNTRACE"
               << " would be unsafe in sandbox2, so it has been blocked.";
    return;
  }
}

bool MonitorBase::StackTraceCollectionPossible() const {
  // Only get the stacktrace if we are not in the libunwind sandbox (avoid
  // recursion).
  if ((policy_->GetNamespace() ||
       absl::GetFlag(FLAGS_sandbox_libunwind_crash_handler) == false) &&
      executor_->libunwind_recursion_depth() <= 1) {
    return true;
  }
  LOG(ERROR) << "Cannot collect stack trace. Unwind pid "
             << executor_->libunwind_sbox_for_pid_ << ", namespace "
             << policy_->GetNamespaceOrNull();
  return false;
}

void MonitorBase::EnableNetworkProxyServer() {
  int fd = ipc_->ReceiveFd(NetworkProxyClient::kFDName);

  network_proxy_server_ = std::make_unique<NetworkProxyServer>(
      fd, &policy_->allowed_hosts_.value(), pthread_self());

  network_proxy_thread_ = std::thread(&NetworkProxyServer::Run,
  network_proxy_server_.get());
}

bool MonitorBase::ShouldCollectStackTrace(Result::StatusEnum status) const {
  if (!StackTraceCollectionPossible()) {
    return false;
  }
  switch (status) {
    case Result::EXTERNAL_KILL:
      return policy_->collect_stacktrace_on_kill_;
    case Result::TIMEOUT:
      return policy_->collect_stacktrace_on_timeout_;
    case Result::SIGNALED:
      return policy_->collect_stacktrace_on_signal_;
    case Result::VIOLATION:
      return policy_->collect_stacktrace_on_violation_;
    case Result::OK:
      return policy_->collect_stacktrace_on_exit_;
    default:
      return false;
  }
}

absl::StatusOr<std::vector<std::string>> MonitorBase::GetStackTrace(
    const Regs* regs) {
  return sandbox2::GetStackTrace(regs, policy_->GetNamespaceOrNull(),
                                 uses_custom_forkserver_,
                                 executor_->libunwind_recursion_depth() + 1);
}

absl::StatusOr<std::vector<std::string>> MonitorBase::GetAndLogStackTrace(
    const Regs* regs) {
  auto stack_trace = GetStackTrace(regs);
  if (!stack_trace.ok()) {
    return stack_trace.status();
  }

  LOG(INFO) << "Stack trace: [";
  for (const auto& frame : CompactStackTrace(*stack_trace)) {
    LOG(INFO) << "  " << frame;
  }
  LOG(INFO) << "]";

  return stack_trace;
}
}  // namespace sandbox2