mirror of
https://github.com/google/sandboxed-api.git
synced 2024-03-22 13:11:30 +08:00
5aa13876a4
PiperOrigin-RevId: 239159980 Change-Id: Ic6185368392622bf3f4c661e37f6b9fcca0d60a6
1060 lines
36 KiB
C++
1060 lines
36 KiB
C++
// Copyright 2019 Google LLC. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Implementation file for the sandbox2::Monitor class.
|
|
|
|
#include "sandboxed_api/sandbox2/monitor.h"
|
|
|
|
#include <linux/posix_types.h> // NOLINT: Needs to come before linux/ipc.h
|
|
|
|
#include <linux/ipc.h>
|
|
#include <sched.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/ptrace.h>
|
|
#include <sys/time.h>
|
|
#include <sys/wait.h>
|
|
#include <syscall.h>
|
|
#include <unistd.h>
|
|
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <cerrno>
|
|
#include <csignal>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <ctime>
|
|
#include <fstream>
|
|
#include <memory>
|
|
#include <set>
|
|
#include <sstream>
|
|
#include <string>
|
|
|
|
#include <glog/logging.h>
|
|
#include "sandboxed_api/util/flag.h"
|
|
#include "absl/memory/memory.h"
|
|
#include "absl/strings/str_cat.h"
|
|
#include "absl/strings/str_format.h"
|
|
#include "absl/time/time.h"
|
|
#include "sandboxed_api/sandbox2/client.h"
|
|
#include "sandboxed_api/sandbox2/comms.h"
|
|
#include "sandboxed_api/sandbox2/executor.h"
|
|
#include "sandboxed_api/sandbox2/limits.h"
|
|
#include "sandboxed_api/sandbox2/mounts.h"
|
|
#include "sandboxed_api/sandbox2/namespace.h"
|
|
#include "sandboxed_api/sandbox2/policy.h"
|
|
#include "sandboxed_api/sandbox2/regs.h"
|
|
#include "sandboxed_api/sandbox2/result.h"
|
|
#include "sandboxed_api/sandbox2/sanitizer.h"
|
|
#include "sandboxed_api/sandbox2/stack-trace.h"
|
|
#include "sandboxed_api/sandbox2/syscall.h"
|
|
#include "sandboxed_api/sandbox2/util.h"
|
|
|
|
ABSL_FLAG(bool, sandbox2_report_on_sandboxee_signal, true,
|
|
"Report sandbox2 sandboxee deaths caused by signals");
|
|
|
|
ABSL_FLAG(bool, sandbox2_report_on_sandboxee_timeout, true,
|
|
"Report sandbox2 sandboxee timeouts");
|
|
|
|
ABSL_DECLARE_FLAG(bool, sandbox2_danger_danger_permit_all);
|
|
ABSL_DECLARE_FLAG(string, sandbox2_danger_danger_permit_all_and_log);
|
|
|
|
namespace sandbox2 {
|
|
|
|
namespace {
|
|
|
|
// We could use the ProcMapsIterator, however we want the full file content.
|
|
std::string ReadProcMaps(pid_t pid) {
|
|
std::ifstream input(absl::StrCat("/proc/", pid, "/maps"),
|
|
std::ios_base::in | std::ios_base::binary);
|
|
std::ostringstream contents;
|
|
contents << input.rdbuf();
|
|
return contents.str();
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Monitor::Monitor(Executor* executor, Policy* policy, Notify* notify)
|
|
: executor_(executor),
|
|
notify_(notify),
|
|
policy_(policy),
|
|
comms_(executor_->ipc()->comms()),
|
|
ipc_(executor_->ipc()),
|
|
setup_counter_(new absl::BlockingCounter(1)),
|
|
done_(false),
|
|
wait_for_execve_(executor->enable_sandboxing_pre_execve_) {
|
|
std::string path = absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all_and_log);
|
|
if (!path.empty()) {
|
|
log_file_ = std::fopen(path.c_str(), "a+");
|
|
PCHECK(log_file_ != nullptr) << "Failed to open log file '" << path << "'";
|
|
}
|
|
}
|
|
|
|
Monitor::~Monitor() {
|
|
CleanUpTimer();
|
|
if (log_file_) {
|
|
std::fclose(log_file_);
|
|
}
|
|
}
|
|
|
|
void Monitor::Run() {
|
|
using DecrementCounter = decltype(setup_counter_);
|
|
std::unique_ptr<DecrementCounter, std::function<void(DecrementCounter*)>>
|
|
decrement_count{&setup_counter_, [](DecrementCounter* counter) {
|
|
(*counter)->DecrementCount();
|
|
}};
|
|
|
|
struct MonitorCleanup {
|
|
~MonitorCleanup() {
|
|
getrusage(RUSAGE_THREAD, capture->result_.GetRUsageMonitor());
|
|
capture->notify_->EventFinished(capture->result_);
|
|
capture->ipc_->InternalCleanupFdMap();
|
|
absl::MutexLock lock(&capture->done_mutex_);
|
|
capture->done_.store(true, std::memory_order_release);
|
|
}
|
|
Monitor* capture;
|
|
} monitor_cleanup{this};
|
|
|
|
if (!InitSetupTimer()) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_TIMERS);
|
|
return;
|
|
}
|
|
|
|
// It'd be costly to initialize the sigset_t for each sigtimedwait()
|
|
// invocation, so do it once per Monitor.
|
|
sigset_t sigtimedwait_sset;
|
|
if (!InitSetupSignals(&sigtimedwait_sset)) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SIGNALS);
|
|
return;
|
|
}
|
|
|
|
// Don't trace the child: it will allow to use 'strace -f' with the whole
|
|
// sandbox master/monitor, which ptrace_attach'es to the child.
|
|
int clone_flags = CLONE_UNTRACED;
|
|
|
|
// Get PID of the sandboxee.
|
|
pid_t init_pid = 0;
|
|
pid_ = executor_->StartSubProcess(clone_flags, policy_->GetNamespace(),
|
|
policy_->GetCapabilities(), &init_pid);
|
|
|
|
if (init_pid < 0) {
|
|
// TODO(hamacher): does this require additional handling here?
|
|
LOG(ERROR) << "Spawning init process failed";
|
|
} else if (init_pid > 0) {
|
|
PCHECK(ptrace(PTRACE_SEIZE, init_pid, 0, PTRACE_O_EXITKILL) == 0);
|
|
}
|
|
|
|
if (pid_ < 0) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SUBPROCESS);
|
|
return;
|
|
}
|
|
|
|
if (!notify_->EventStarted(pid_, comms_)) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_NOTIFY);
|
|
return;
|
|
}
|
|
if (!InitAcceptConnection()) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_CONNECTION);
|
|
return;
|
|
}
|
|
if (!InitSendIPC()) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_IPC);
|
|
return;
|
|
}
|
|
if (!InitSendCwd()) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_CWD);
|
|
return;
|
|
}
|
|
if (!InitSendPolicy()) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_POLICY);
|
|
return;
|
|
}
|
|
if (!WaitForSandboxReady()) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_WAIT);
|
|
return;
|
|
}
|
|
if (!InitApplyLimits()) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_LIMITS);
|
|
return;
|
|
}
|
|
// This call should be the last in the init sequence, because it can cause the
|
|
// sandboxee to enter ptrace-stopped state, in which it will not be able to
|
|
// send any messages over the Comms channel.
|
|
if (!InitPtraceAttach()) {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_PTRACE);
|
|
return;
|
|
}
|
|
|
|
// Tell the parent thread (Sandbox2 object) that we're done with the initial
|
|
// set-up process of the sandboxee.
|
|
decrement_count.reset();
|
|
|
|
MainLoop(&sigtimedwait_sset);
|
|
|
|
// Disarm the timer: it will be deleted in ~Monitor, but the Monitor object
|
|
// lifetime is controlled by owner of Sandbox2, and we don't want to leave any
|
|
// timers behind (esp. armed ones) in the meantime.
|
|
TimerArm(absl::ZeroDuration());
|
|
}
|
|
|
|
bool Monitor::IsActivelyMonitoring() {
|
|
// If we're still waiting for execve(), then we allow all syscalls.
|
|
return !wait_for_execve_;
|
|
}
|
|
|
|
void Monitor::SetActivelyMonitoring() { wait_for_execve_ = false; }
|
|
|
|
void Monitor::MainSignals(int signo, siginfo_t* si) {
|
|
VLOG(3) << "Signal '" << strsignal(signo) << "' (" << signo
|
|
<< ") received from PID: " << si->si_pid;
|
|
|
|
// SIGCHLD is received frequently due to ptrace() events being sent by child
|
|
// processes; return early to avoid costly syscalls.
|
|
if (signo == SIGCHLD) {
|
|
return;
|
|
}
|
|
|
|
// We should only receive signals from the same process (thread group). Other
|
|
// signals are suspicious (esp. if coming from a sandboxed process) Using
|
|
// syscall(__NR_getpid) here because getpid() is cached in glibc, and it
|
|
// might return previous pid if bare syscall(__NR_fork) was used instead of
|
|
// fork().
|
|
//
|
|
// The notable exception are signals caused by timer_settime which are sent
|
|
// by the kernel.
|
|
if (signo != Monitor::kTimerWallTimeSignal &&
|
|
si->si_pid != util::Syscall(__NR_getpid)) {
|
|
LOG(ERROR) << "Monitor received signal '" << strsignal(signo) << "' ("
|
|
<< signo << ") from PID " << si->si_pid
|
|
<< " which is not in the current thread group";
|
|
return;
|
|
}
|
|
|
|
switch (signo) {
|
|
case Monitor::kExternalKillSignal:
|
|
VLOG(1) << "Will kill the main pid";
|
|
ActionProcessKill(pid_, Result::EXTERNAL_KILL, 0);
|
|
break;
|
|
case Monitor::kTimerWallTimeSignal:
|
|
VLOG(1) << "Sandbox process hit timeout due to the walltime timer";
|
|
ActionProcessKill(pid_, Result::TIMEOUT, 0);
|
|
break;
|
|
case Monitor::kTimerSetSignal:
|
|
VLOG(1) << "Will set the walltime timer to " << si->si_value.sival_int
|
|
<< " seconds";
|
|
TimerArm(absl::Seconds(si->si_value.sival_int));
|
|
break;
|
|
case Monitor::kDumpStackSignal:
|
|
VLOG(1) << "Dump the main pid's stack";
|
|
should_dump_stack_ = true;
|
|
PidInterrupt(pid_);
|
|
break;
|
|
default:
|
|
LOG(ERROR) << "Unknown signal received: " << signo;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Not defined in glibc.
|
|
#define __WPTRACEEVENT(x) ((x & 0xff0000) >> 16)
|
|
bool Monitor::MainWait() {
|
|
// All possible process status change event must be checked as SIGCHLD
|
|
// is reported once only for all events that arrived at the same time.
|
|
for (;;) {
|
|
int status;
|
|
// It should be a non-blocking operation (hence WNOHANG), so this function
|
|
// returns quickly if there are no events to be processed.
|
|
int ret = waitpid(-1, &status, __WNOTHREAD | __WALL | WUNTRACED | WNOHANG);
|
|
|
|
// No traced processes have changed their status yet.
|
|
if (ret == 0) {
|
|
return false;
|
|
}
|
|
|
|
if (ret == -1 && errno == ECHILD) {
|
|
LOG(ERROR) << "PANIC(). The main process has not exited yet, "
|
|
<< "yet we haven't seen its exit event";
|
|
// We'll simply exit which will kill all remaining processes (if
|
|
// there are any) because of the PTRACE_O_EXITKILL ptrace() flag.
|
|
return true;
|
|
}
|
|
if (ret == -1 && errno == EINTR) {
|
|
VLOG(3) << "waitpid() interruped with EINTR";
|
|
continue;
|
|
}
|
|
if (ret == -1) {
|
|
PLOG(ERROR) << "waitpid() failed";
|
|
continue;
|
|
}
|
|
|
|
VLOG(3) << "waitpid() returned with PID: " << ret << ", status: " << status;
|
|
|
|
if (WIFEXITED(status)) {
|
|
VLOG(1) << "PID: " << ret
|
|
<< " finished with code: " << WEXITSTATUS(status);
|
|
// That's the main process, set the exit code, and exit. It will kill
|
|
// all remaining processes (if there are any) because of the
|
|
// PTRACE_O_EXITKILL ptrace() flag.
|
|
if (ret == pid_) {
|
|
if (IsActivelyMonitoring()) {
|
|
result_.SetExitStatusCode(Result::OK, WEXITSTATUS(status));
|
|
} else {
|
|
result_.SetExitStatusCode(Result::SETUP_ERROR,
|
|
Result::FAILED_MONITOR);
|
|
}
|
|
return true;
|
|
}
|
|
} else if (WIFSIGNALED(status)) {
|
|
VLOG(1) << "PID: " << ret << " terminated with signal: "
|
|
<< util::GetSignalName(WTERMSIG(status));
|
|
if (ret == pid_) {
|
|
// That's the main process, depending on the result of the process take
|
|
// the register content and/or the stack trace. The death of this
|
|
// process will cause all remaining processes to be killed (if there are
|
|
// any), see the PTRACE_O_EXITKILL ptrace() flag.
|
|
|
|
// When the process is killed from a signal from within the result
|
|
// status will be still unset, fix this.
|
|
// The other cases should either be already handled, or (in the case of
|
|
// Result::OK) should be impossible to reach.
|
|
if (result_.final_status() == Result::UNSET) {
|
|
result_.SetExitStatusCode(Result::SIGNALED, WTERMSIG(status));
|
|
} else if (result_.final_status() == Result::OK) {
|
|
LOG(ERROR) << "Unexpected codepath taken";
|
|
}
|
|
return true;
|
|
}
|
|
} else if (WIFSTOPPED(status)) {
|
|
VLOG(2) << "PID: " << ret
|
|
<< " received signal: " << util::GetSignalName(WSTOPSIG(status))
|
|
<< " with event: " << __WPTRACEEVENT(status);
|
|
StateProcessStopped(ret, status);
|
|
} else if (WIFCONTINUED(status)) {
|
|
VLOG(2) << "PID: " << ret << " is being continued";
|
|
}
|
|
}
|
|
}
|
|
|
|
void Monitor::MainLoop(sigset_t* sset) {
|
|
for (;;) {
|
|
// Use a time-out, so we can check for missed waitpid() events. It should
|
|
// not happen during regular operations, so it's a defense-in-depth
|
|
// mechanism against SIGCHLD signals being lost by the kernel (since these
|
|
// are not-RT signals - i.e. not queued).
|
|
static const timespec ts = {kWakeUpPeriodSec, kWakeUpPeriodNSec};
|
|
|
|
// Wait for any kind of events, e.g. signals sent from the parent process,
|
|
// or SIGCHLD sent by kernel indicating that state of one of the traced
|
|
// processes has changed.
|
|
siginfo_t si;
|
|
int ret = sigtimedwait(sset, &si, &ts);
|
|
if (ret > 0) {
|
|
// Process signals which arrived.
|
|
MainSignals(ret, &si);
|
|
}
|
|
|
|
// If CheckWait reported no more traced processes, or that
|
|
// the main pid had exited, we should break this loop (i.e. our job is
|
|
// done here).
|
|
//
|
|
// MainWait() should use a not-blocking (e.g. WNOHANG with waitpid())
|
|
// syntax, so it returns quickly if there are not status changes in
|
|
// traced processes.
|
|
if (MainWait()) {
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Monitor::InitSetupTimer() {
|
|
walltime_timer_ = absl::make_unique<timer_t>();
|
|
|
|
// Set the wall-time timer.
|
|
sigevent sevp;
|
|
sevp.sigev_value.sival_ptr = walltime_timer_.get();
|
|
sevp.sigev_signo = kTimerWallTimeSignal;
|
|
sevp.sigev_notify = SIGEV_THREAD_ID | SIGEV_SIGNAL;
|
|
sevp._sigev_un._tid = static_cast<pid_t>(util::Syscall(__NR_gettid));
|
|
// GLibc's implementation seem to mis-behave during timer_delete, as it's
|
|
// trying to find out whether POSIX TIMERs are available. So, we stick to
|
|
// syscalls for this class of calls.
|
|
if (util::Syscall(__NR_timer_create, CLOCK_REALTIME,
|
|
reinterpret_cast<uintptr_t>(&sevp),
|
|
reinterpret_cast<uintptr_t>(walltime_timer_.get())) == -1) {
|
|
walltime_timer_ = nullptr;
|
|
PLOG(ERROR) << "timer_create(CLOCK_REALTIME, walltime_timer_)";
|
|
return false;
|
|
}
|
|
return TimerArm(executor_->limits()->wall_time_limit());
|
|
}
|
|
|
|
// Can be used from a signal handler. Avoid non-reentrant functions.
|
|
bool Monitor::TimerArm(absl::Duration duration) {
|
|
VLOG(2) << (duration == absl::ZeroDuration() ? "Disarming" : "Arming")
|
|
<< " the walltime timer with " << absl::FormatDuration(duration);
|
|
|
|
itimerspec ts;
|
|
absl::Duration rem;
|
|
ts.it_value.tv_sec = absl::IDivDuration(duration, absl::Seconds(1), &rem);
|
|
ts.it_value.tv_nsec = absl::ToInt64Nanoseconds(rem);
|
|
ts.it_interval.tv_sec =
|
|
duration != absl::ZeroDuration() ? 1L : 0L; // Re-fire every 1 sec.
|
|
ts.it_interval.tv_nsec = 0UL;
|
|
itimerspec* null_ts = nullptr;
|
|
if (util::Syscall(__NR_timer_settime,
|
|
reinterpret_cast<uintptr_t>(*walltime_timer_), 0,
|
|
reinterpret_cast<uintptr_t>(&ts),
|
|
reinterpret_cast<uintptr_t>(null_ts)) == -1) {
|
|
PLOG(ERROR) << "timer_settime(): time: " << absl::FormatDuration(duration);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void Monitor::CleanUpTimer() {
|
|
if (walltime_timer_) {
|
|
if (util::Syscall(__NR_timer_delete,
|
|
reinterpret_cast<uintptr_t>(*walltime_timer_)) == -1) {
|
|
PLOG(ERROR) << "timer_delete()";
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Monitor::InitSetupSig(int signo, sigset_t* sset) {
|
|
// sigtimedwait will react (wake-up) to arrival of this signal.
|
|
sigaddset(sset, signo);
|
|
|
|
// Block this specific signal, so only sigtimedwait reacts to it.
|
|
sigset_t block_set;
|
|
if (sigemptyset(&block_set) == -1) {
|
|
PLOG(ERROR) << "sigemptyset()";
|
|
return false;
|
|
}
|
|
if (sigaddset(&block_set, signo) == -1) {
|
|
PLOG(ERROR) << "sigaddset(" << signo << ")";
|
|
return false;
|
|
}
|
|
if (pthread_sigmask(SIG_BLOCK, &block_set, nullptr) == -1) {
|
|
PLOG(ERROR) << "pthread_sigmask(SIG_BLOCK, " << signo << ")";
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Monitor::InitSetupSignals(sigset_t* sset) {
|
|
sigemptyset(sset);
|
|
|
|
return Monitor::InitSetupSig(kExternalKillSignal, sset) &&
|
|
Monitor::InitSetupSig(kTimerWallTimeSignal, sset) &&
|
|
Monitor::InitSetupSig(kTimerSetSignal, sset) &&
|
|
Monitor::InitSetupSig(kDumpStackSignal, sset) &&
|
|
// SIGCHLD means that a new children process status change event
|
|
// has been delivered (e.g. due ptrace notification).
|
|
Monitor::InitSetupSig(SIGCHLD, sset);
|
|
}
|
|
|
|
bool Monitor::InitSendPolicy() {
|
|
if (!policy_->SendPolicy(comms_)) {
|
|
LOG(ERROR) << "Couldn't send policy";
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Monitor::InitSendCwd() {
|
|
if (!comms_->SendString(executor_->cwd_)) {
|
|
PLOG(ERROR) << "Couldn't send cwd";
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Monitor::InitApplyLimit(pid_t pid, __rlimit_resource resource,
|
|
const rlimit64& rlim) const {
|
|
std::string rlim_name = absl::StrCat("UNKNOWN: ", resource);
|
|
switch (resource) {
|
|
case RLIMIT_AS:
|
|
rlim_name = "RLIMIT_AS";
|
|
break;
|
|
case RLIMIT_FSIZE:
|
|
rlim_name = "RLIMIT_FSIZE";
|
|
break;
|
|
case RLIMIT_NOFILE:
|
|
rlim_name = "RLIMIT_NOFILE";
|
|
break;
|
|
case RLIMIT_CPU:
|
|
rlim_name = "RLIMIT_CPU";
|
|
break;
|
|
case RLIMIT_CORE:
|
|
rlim_name = "RLIMIT_CORE";
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
rlimit64 curr_limit;
|
|
if (prlimit64(pid, resource, nullptr, &curr_limit) == -1) {
|
|
PLOG(ERROR) << "prlimit64(" << pid << ", " << rlim_name << ")";
|
|
} else {
|
|
// In such case, don't update the limits, as it will fail. Just stick to the
|
|
// current ones (which are already lower than intended).
|
|
if (rlim.rlim_cur > curr_limit.rlim_max) {
|
|
LOG(ERROR) << rlim_name << ": new.current > current.max ("
|
|
<< rlim.rlim_cur << " > " << curr_limit.rlim_max
|
|
<< "), skipping";
|
|
return true;
|
|
}
|
|
}
|
|
if (prlimit64(pid, resource, &rlim, nullptr) == -1) {
|
|
PLOG(ERROR) << "prlimit64(RLIMIT_AS, " << rlim.rlim_cur << ")";
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Monitor::InitApplyLimits() {
|
|
Limits* limits = executor_->limits();
|
|
return InitApplyLimit(pid_, RLIMIT_AS, limits->rlimit_as()) &&
|
|
InitApplyLimit(pid_, RLIMIT_CPU, limits->rlimit_cpu()) &&
|
|
InitApplyLimit(pid_, RLIMIT_FSIZE, limits->rlimit_fsize()) &&
|
|
InitApplyLimit(pid_, RLIMIT_NOFILE, limits->rlimit_nofile()) &&
|
|
InitApplyLimit(pid_, RLIMIT_CORE, limits->rlimit_core());
|
|
}
|
|
|
|
bool Monitor::InitSendIPC() { return ipc_->SendFdsOverComms(); }
|
|
|
|
bool Monitor::WaitForSandboxReady() {
|
|
uint32_t tmp;
|
|
if (!comms_->RecvUint32(&tmp)) {
|
|
LOG(ERROR) << "Couldn't receive 'Client::kClient2SandboxReady' message";
|
|
return false;
|
|
}
|
|
if (tmp != Client::kClient2SandboxReady) {
|
|
LOG(ERROR) << "Received " << tmp << " != Client::kClient2SandboxReady ("
|
|
<< Client::kClient2SandboxReady << ")";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Monitor::InitPtraceAttach() {
|
|
sanitizer::WaitForTsan();
|
|
|
|
// Get a list of tasks.
|
|
std::set<int> tasks;
|
|
if (!sanitizer::GetListOfTasks(pid_, &tasks)) {
|
|
LOG(ERROR) << "Could not get list of tasks";
|
|
return false;
|
|
}
|
|
|
|
// With TSYNC, we can allow threads: seccomp applies to all threads.
|
|
|
|
if (tasks.size() > 1) {
|
|
LOG(WARNING) << "PID " << pid_ << " has " << tasks.size() << " threads,"
|
|
<< " at the time of call to SandboxMeHere. If you are seeing"
|
|
<< " more sandbox violations than expected, this might be"
|
|
<< " the reason why"
|
|
<< ".";
|
|
}
|
|
|
|
intptr_t ptrace_opts =
|
|
PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK |
|
|
PTRACE_O_TRACEVFORKDONE | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC |
|
|
PTRACE_O_TRACEEXIT | PTRACE_O_TRACESECCOMP | PTRACE_O_EXITKILL;
|
|
|
|
bool main_pid_found = false;
|
|
for (auto task : tasks) {
|
|
if (task == pid_) {
|
|
main_pid_found = true;
|
|
}
|
|
|
|
// In some situations we allow ptrace to try again when it fails.
|
|
bool ptrace_succeeded = false;
|
|
int retries = 0;
|
|
auto deadline = absl::Now() + absl::Seconds(2);
|
|
while (absl::Now() < deadline) {
|
|
int ret = ptrace(PTRACE_SEIZE, task, 0, ptrace_opts);
|
|
if (ret == 0) {
|
|
ptrace_succeeded = true;
|
|
break;
|
|
}
|
|
if (ret != 0 && errno == ESRCH) {
|
|
// A task may have exited since we captured the task list, we will allow
|
|
// things to continue after we log a warning.
|
|
PLOG(WARNING) << "ptrace(PTRACE_SEIZE, " << task << ", "
|
|
<< absl::StrCat("0x", absl::Hex(ptrace_opts))
|
|
<< ") skipping exited task. Continuing with other tasks.";
|
|
ptrace_succeeded = true;
|
|
break;
|
|
}
|
|
if (ret != 0 && errno == EPERM) {
|
|
// Sometimes when a task is exiting we can get an EPERM from ptrace.
|
|
// Let's try again up until the timeout in this situation.
|
|
PLOG(WARNING) << "ptrace(PTRACE_SEIZE, " << task << ", "
|
|
<< absl::StrCat("0x", absl::Hex(ptrace_opts))
|
|
<< "), trying again...";
|
|
|
|
// Exponential Backoff.
|
|
constexpr auto kInitialRetry = absl::Milliseconds(1);
|
|
constexpr auto kMaxRetry = absl::Milliseconds(20);
|
|
const auto retry_interval =
|
|
kInitialRetry * (1 << std::min(10, retries++));
|
|
absl::SleepFor(std::min(retry_interval, kMaxRetry));
|
|
continue;
|
|
}
|
|
|
|
// Any other errno will be considered a failure.
|
|
PLOG(ERROR) << "ptrace(PTRACE_SEIZE, " << task << ", "
|
|
<< absl::StrCat("0x", absl::Hex(ptrace_opts)) << ") failed.";
|
|
return false;
|
|
}
|
|
|
|
if (!ptrace_succeeded) {
|
|
LOG(ERROR) << "ptrace(PTRACE_SEIZE, " << task << ", "
|
|
<< absl::StrCat("0x", absl::Hex(ptrace_opts))
|
|
<< ") failed after retrying until the timeout.";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (!main_pid_found) {
|
|
LOG(ERROR) << "The pid " << pid_ << " was not found in its own tasklist.";
|
|
return false;
|
|
}
|
|
|
|
// Get a list of tasks after attaching.
|
|
std::set<int> tasks_after;
|
|
if (!sanitizer::GetListOfTasks(pid_, &tasks_after)) {
|
|
LOG(ERROR) << "Could not get list of tasks";
|
|
return false;
|
|
}
|
|
|
|
// Check that no new threads have shown up. Note: tasks_after can have fewer
|
|
// tasks than before but no new tasks can be added as they would be missing
|
|
// from the initial task list.
|
|
if (!std::includes(tasks.begin(), tasks.end(), tasks_after.begin(),
|
|
tasks_after.end())) {
|
|
LOG(ERROR) << "The pid " << pid_
|
|
<< " spawned new threads while we were trying to attach to it.";
|
|
return false;
|
|
}
|
|
|
|
// No glibc wrapper for gettid - see 'man gettid'.
|
|
VLOG(1) << "Monitor (PID: " << getpid()
|
|
<< ", TID: " << util::Syscall(__NR_gettid)
|
|
<< ") attached to PID: " << pid_;
|
|
|
|
// Technically, the sandboxee can be in a ptrace-stopped state right now,
|
|
// because some signal could have arrived in the meantime. Yet, this
|
|
// Comms::SendUint32 call shouldn't lock our process, because the underlying
|
|
// socketpair() channel is buffered, hence it will accept the uint32_t message
|
|
// no matter what is the current state of the sandboxee, and it will allow for
|
|
// our process to continue and unlock the sandboxee with the proper ptrace
|
|
// event handling.
|
|
if (!comms_->SendUint32(Client::kSandbox2ClientDone)) {
|
|
LOG(ERROR) << "Couldn't send Client::kSandbox2ClientDone message";
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Monitor::InitAcceptConnection() {
|
|
// It's a pre-connected Comms channel, no need to accept new connection or
|
|
// verify the peer (sandboxee).
|
|
if (comms_->IsConnected()) {
|
|
return true;
|
|
}
|
|
|
|
if (!comms_->Accept()) {
|
|
return false;
|
|
}
|
|
|
|
// Check whether the PID which has connected to us, is the PID we're
|
|
// expecting.
|
|
pid_t cred_pid;
|
|
uid_t cred_uid;
|
|
gid_t cred_gid;
|
|
if (!comms_->RecvCreds(&cred_pid, &cred_uid, &cred_gid)) {
|
|
LOG(ERROR) << "Couldn't receive credentials";
|
|
return false;
|
|
}
|
|
|
|
if (pid_ != cred_pid) {
|
|
LOG(ERROR) << "Initial PID (" << pid_ << ") differs from the PID received "
|
|
<< "from the peer (" << cred_pid << ")";
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void Monitor::ActionProcessContinue(pid_t pid, int signo) {
|
|
if (ptrace(PTRACE_CONT, pid, 0, signo) == -1) {
|
|
PLOG(ERROR) << "ptrace(PTRACE_CONT, pid=" << pid << ", sig=" << signo
|
|
<< ")";
|
|
}
|
|
}
|
|
|
|
void Monitor::ActionProcessStop(pid_t pid, int signo) {
|
|
if (ptrace(PTRACE_LISTEN, pid, 0, signo) == -1) {
|
|
PLOG(ERROR) << "ptrace(PTRACE_LISTEN, pid=" << pid << ", sig=" << signo
|
|
<< ")";
|
|
}
|
|
}
|
|
|
|
void Monitor::ActionProcessSyscall(Regs* regs, const Syscall& syscall) {
|
|
// If the sandboxing is not enabled yet, allow the first __NR_execveat.
|
|
if (syscall.nr() == __NR_execveat && !IsActivelyMonitoring()) {
|
|
VLOG(1) << "[PERMITTED/BEFORE_EXECVEAT]: "
|
|
<< "SYSCALL ::: PID: " << regs->pid() << ", PROG: '"
|
|
<< util::GetProgName(regs->pid())
|
|
<< "' : " << syscall.GetDescription();
|
|
ActionProcessContinue(regs->pid(), 0);
|
|
return;
|
|
}
|
|
|
|
// Notify can decide whether we want to allow this syscall. It could be useful
|
|
// for sandbox setups in which some syscalls might still need some logging,
|
|
// but nonetheless be allowed ('permissible syscalls' in sandbox v1).
|
|
if (notify_->EventSyscallTrap(syscall)) {
|
|
LOG(WARNING) << "[PERMITTED]: SYSCALL ::: PID: " << regs->pid()
|
|
<< ", PROG: '" << util::GetProgName(regs->pid())
|
|
<< "' : " << syscall.GetDescription();
|
|
|
|
ActionProcessContinue(regs->pid(), 0);
|
|
return;
|
|
}
|
|
|
|
// TODO(wiktorg): Further clean that up, probably while doing monitor cleanup
|
|
// log_file_ not null iff FLAGS_sandbox2_danger_danger_permit_all_and_log is
|
|
// set.
|
|
if (log_file_) {
|
|
std::string syscall_description = syscall.GetDescription();
|
|
PCHECK(absl::FPrintF(log_file_, "PID: %d %s\n", regs->pid(),
|
|
syscall_description) >= 0);
|
|
ActionProcessContinue(regs->pid(), 0);
|
|
return;
|
|
}
|
|
|
|
if (absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all)) {
|
|
ActionProcessContinue(regs->pid(), 0);
|
|
return;
|
|
}
|
|
|
|
ActionProcessSyscallViolation(regs, syscall, kSyscallViolation);
|
|
}
|
|
|
|
void Monitor::ActionProcessSyscallViolation(Regs* regs, const Syscall& syscall,
|
|
ViolationType violation_type) {
|
|
pid_t pid = regs->pid();
|
|
|
|
LogAccessViolation(syscall);
|
|
notify_->EventSyscallViolation(syscall, violation_type);
|
|
result_.SetExitStatusCode(Result::VIOLATION, syscall.nr());
|
|
result_.SetSyscall(absl::make_unique<Syscall>(syscall));
|
|
// Only get the stacktrace if we are not in the libunwind sandbox (avoid
|
|
// recursion).
|
|
if (executor_->libunwind_sbox_for_pid_ == 0 && policy_->GetNamespace()) {
|
|
if (policy_->collect_stacktrace_on_violation_) {
|
|
result_.SetStackTrace(
|
|
GetStackTrace(regs, policy_->GetNamespace()->mounts()));
|
|
LOG(ERROR) << "Stack trace: " << result_.GetStackTrace();
|
|
} else {
|
|
LOG(ERROR) << "Stack traces have been disabled";
|
|
}
|
|
}
|
|
// We make the result object create its own Reg instance. our regs is a
|
|
// pointer to a stack variable which might not live long enough.
|
|
result_.LoadRegs(pid);
|
|
result_.SetProgName(util::GetProgName(pid));
|
|
result_.SetProcMaps(ReadProcMaps(pid_));
|
|
|
|
// Rewrite the syscall argument to something invalid (-1). The process will
|
|
// be killed by ActionProcessKill(), so this is just a precaution.
|
|
auto status = regs->SkipSyscallReturnValue(-ENOSYS);
|
|
if (!status.ok()) {
|
|
LOG(ERROR) << status;
|
|
}
|
|
|
|
ActionProcessKill(pid, Result::VIOLATION, syscall.nr());
|
|
}
|
|
|
|
void Monitor::LogAccessViolation(const Syscall& syscall) {
|
|
// Do not unwind libunwind.
|
|
if (executor_->libunwind_sbox_for_pid_ != 0) {
|
|
LOG(ERROR) << "Sandbox violation during execution of libunwind: "
|
|
<< syscall.GetDescription();
|
|
return;
|
|
}
|
|
|
|
uintptr_t syscall_nr = syscall.nr();
|
|
uintptr_t arg0 = syscall.args()[0];
|
|
|
|
// So, this is an invalid syscall. Will be killed by seccomp-bpf policies as
|
|
// well, but we should be on a safe side here as well.
|
|
LOG(ERROR) << "SANDBOX VIOLATION : PID: " << syscall.pid() << ", PROG: '"
|
|
<< util::GetProgName(syscall.pid())
|
|
<< "' : " << syscall.GetDescription();
|
|
|
|
// This follows policy in Policy::GetDefaultPolicy - keep it in sync.
|
|
if (syscall.arch() != Syscall::GetHostArch()) {
|
|
LOG(ERROR)
|
|
<< "This is a violation because the syscall was issued because the"
|
|
<< " sandboxee and executor architectures are different.";
|
|
return;
|
|
}
|
|
|
|
if (syscall_nr == __NR_ptrace) {
|
|
LOG(ERROR)
|
|
<< "This is a violation because the ptrace syscall would be unsafe in"
|
|
<< " sandbox2, so it has been blocked.";
|
|
return;
|
|
}
|
|
if (syscall_nr == __NR_bpf) {
|
|
LOG(ERROR)
|
|
<< "This is a violation because the bpf syscall would be risky in"
|
|
<< " a sandbox, so it has been blocked.";
|
|
return;
|
|
}
|
|
|
|
if (syscall_nr == __NR_clone && ((arg0 & CLONE_UNTRACED) != 0)) {
|
|
LOG(ERROR) << "This is a violation because calling clone with CLONE_UNTRACE"
|
|
<< " would be unsafe in sandbox2, so it has been blocked.";
|
|
return;
|
|
}
|
|
}
|
|
|
|
void Monitor::ActionProcessKill(pid_t pid, Result::StatusEnum status,
|
|
uintptr_t code) {
|
|
// Avoid overwriting result if we set it for instance after a violation.
|
|
if (result_.final_status() == Result::UNSET) {
|
|
result_.SetExitStatusCode(status, code);
|
|
}
|
|
|
|
VLOG(1) << "Sending SIGKILL to the PID: " << pid_;
|
|
if (kill(pid_, SIGKILL) != 0) {
|
|
LOG(FATAL) << "Could not send SIGKILL to PID " << pid_;
|
|
}
|
|
}
|
|
|
|
void Monitor::EventPtraceSeccomp(pid_t pid, int event_msg) {
|
|
VLOG(1) << "PID: " << pid << " violation uncovered via the SECCOMP_EVENT";
|
|
// If the seccomp-policy is using RET_TRACE, we request that it returns the
|
|
// syscall architecture identifier in the SECCOMP_RET_DATA.
|
|
const auto syscall_arch = static_cast<Syscall::CpuArch>(event_msg);
|
|
Regs regs(pid);
|
|
auto status = regs.Fetch();
|
|
if (!status.ok()) {
|
|
LOG(ERROR) << status;
|
|
ActionProcessKill(pid, Result::INTERNAL_ERROR, Result::FAILED_FETCH);
|
|
return;
|
|
}
|
|
|
|
Syscall syscall = regs.ToSyscall(syscall_arch);
|
|
// If the architecture of the syscall used is different that the current host
|
|
// architecture, report a violation.
|
|
if (syscall_arch != Syscall::GetHostArch()) {
|
|
ActionProcessSyscallViolation(®s, syscall, kArchitectureSwitchViolation);
|
|
return;
|
|
}
|
|
|
|
ActionProcessSyscall(®s, syscall);
|
|
}
|
|
|
|
void Monitor::EventPtraceExec(pid_t pid, int event_msg) {
|
|
if (!IsActivelyMonitoring()) {
|
|
VLOG(1) << "PTRACE_EVENT_EXEC seen from PID: " << event_msg
|
|
<< ". SANDBOX ENABLED!";
|
|
SetActivelyMonitoring();
|
|
}
|
|
ActionProcessContinue(pid, 0);
|
|
}
|
|
|
|
void Monitor::EventPtraceExit(pid_t pid, int event_msg) {
|
|
// A regular exit, let it continue.
|
|
if (WIFEXITED(event_msg)) {
|
|
ActionProcessContinue(pid, 0);
|
|
return;
|
|
}
|
|
|
|
// Everything except the SECCOMP violation can continue.
|
|
if (!WIFSIGNALED(event_msg) || WTERMSIG(event_msg) != SIGSYS) {
|
|
// Process is dying because it received a signal.
|
|
// This can occur in three cases:
|
|
// 1) Process was killed from the sandbox, in this case the result status
|
|
// was already set to Result::EXTERNAL_KILL. We do not get the stack
|
|
// trace in this case.
|
|
// 2) Process was killed because it hit a timeout. The result status is
|
|
// also already set, however we are interested in the stack trace.
|
|
// 3) Regular signal. We need to obtain everything. The status will be set
|
|
// upon the process exit handler.
|
|
if (pid == pid_) {
|
|
result_.LoadRegs(pid_);
|
|
result_.SetProgName(util::GetProgName(pid_));
|
|
result_.SetProcMaps(ReadProcMaps(pid_));
|
|
bool stacktrace_collection_possible =
|
|
policy_->GetNamespace() && executor_->libunwind_sbox_for_pid_ == 0;
|
|
auto collect_stacktrace = [this]() {
|
|
result_.SetStackTrace(GetStackTrace(result_.GetRegs(),
|
|
policy_->GetNamespace()->mounts()));
|
|
};
|
|
switch (result_.final_status()) {
|
|
case Result::EXTERNAL_KILL:
|
|
if (stacktrace_collection_possible &&
|
|
policy_->collect_stacktrace_on_kill_) {
|
|
collect_stacktrace();
|
|
}
|
|
break;
|
|
case Result::TIMEOUT:
|
|
if (stacktrace_collection_possible &&
|
|
policy_->collect_stacktrace_on_timeout_) {
|
|
collect_stacktrace();
|
|
}
|
|
break;
|
|
case Result::VIOLATION:
|
|
break;
|
|
case Result::UNSET:
|
|
// Regular signal.
|
|
if (stacktrace_collection_possible &&
|
|
policy_->collect_stacktrace_on_signal_) {
|
|
collect_stacktrace();
|
|
}
|
|
break;
|
|
default:
|
|
LOG(ERROR) << "Unexpected codepath taken";
|
|
break;
|
|
}
|
|
}
|
|
|
|
ActionProcessContinue(pid, 0);
|
|
return;
|
|
}
|
|
|
|
VLOG(1) << "PID: " << pid << " violation uncovered via the EXIT_EVENT";
|
|
|
|
// We do not generate the stack trace in the SECCOMP case as it will be
|
|
// generated during ActionProcessSyscallViolation anyway.
|
|
Regs regs(pid);
|
|
auto status = regs.Fetch();
|
|
if (!status.ok()) {
|
|
LOG(ERROR) << status;
|
|
ActionProcessKill(pid, Result::INTERNAL_ERROR, Result::FAILED_FETCH);
|
|
return;
|
|
}
|
|
|
|
auto syscall = regs.ToSyscall(Syscall::GetHostArch());
|
|
|
|
ActionProcessSyscallViolation(®s, syscall, kSyscallViolation);
|
|
}
|
|
|
|
void Monitor::EventPtraceStop(pid_t pid, int stopsig) {
|
|
// It's not a real stop signal. For example PTRACE_O_TRACECLONE and similar
|
|
// flags to ptrace(PTRACE_SEIZE) might generate this event with SIGTRAP.
|
|
if (stopsig != SIGSTOP && stopsig != SIGTSTP && stopsig != SIGTTIN &&
|
|
stopsig != SIGTTOU) {
|
|
ActionProcessContinue(pid, 0);
|
|
return;
|
|
}
|
|
// It's our PID stop signal. Stop it.
|
|
VLOG(2) << "PID: " << pid << " stopped due to "
|
|
<< util::GetSignalName(stopsig);
|
|
ActionProcessStop(pid, 0);
|
|
}
|
|
|
|
void Monitor::StateProcessStopped(pid_t pid, int status) {
|
|
int stopsig = WSTOPSIG(status);
|
|
if (__WPTRACEEVENT(status) == 0) {
|
|
// Must be a regular signal delivery.
|
|
VLOG(2) << "PID: " << pid
|
|
<< " received signal: " << util::GetSignalName(stopsig);
|
|
notify_->EventSignal(pid, stopsig);
|
|
ActionProcessContinue(pid, stopsig);
|
|
return;
|
|
}
|
|
|
|
unsigned long event_msg; // NOLINT
|
|
if (ptrace(PTRACE_GETEVENTMSG, pid, 0, &event_msg) == -1) {
|
|
if (errno == ESRCH) {
|
|
// This happens from time to time, the kernel does not guarantee us that
|
|
// we get the event in time.
|
|
PLOG(INFO) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
|
|
return;
|
|
}
|
|
PLOG(ERROR) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
|
|
ActionProcessKill(pid, Result::INTERNAL_ERROR, Result::FAILED_GETEVENT);
|
|
return;
|
|
}
|
|
|
|
if (pid == pid_ && should_dump_stack_ &&
|
|
executor_->libunwind_sbox_for_pid_ == 0 && policy_->GetNamespace()) {
|
|
Regs regs(pid);
|
|
auto status = regs.Fetch();
|
|
if (status.ok()) {
|
|
VLOG(0) << "SANDBOX STACK : PID: " << pid << ", ["
|
|
<< GetStackTrace(®s, policy_->GetNamespace()->mounts()) << "]";
|
|
} else {
|
|
LOG(WARNING) << "FAILED TO GET SANDBOX STACK : " << status;
|
|
}
|
|
should_dump_stack_ = false;
|
|
}
|
|
|
|
#if !defined(PTRACE_EVENT_STOP)
|
|
#define PTRACE_EVENT_STOP 128
|
|
#endif
|
|
|
|
switch (__WPTRACEEVENT(status)) {
|
|
case PTRACE_EVENT_FORK:
|
|
/* fall through */
|
|
case PTRACE_EVENT_VFORK:
|
|
/* fall through */
|
|
case PTRACE_EVENT_CLONE:
|
|
/* fall through */
|
|
case PTRACE_EVENT_VFORK_DONE:
|
|
ActionProcessContinue(pid, 0);
|
|
break;
|
|
case PTRACE_EVENT_EXEC:
|
|
VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXEC, PID: " << event_msg;
|
|
EventPtraceExec(pid, event_msg);
|
|
break;
|
|
case PTRACE_EVENT_EXIT:
|
|
VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXIT: " << event_msg;
|
|
EventPtraceExit(pid, event_msg);
|
|
break;
|
|
case PTRACE_EVENT_STOP:
|
|
VLOG(2) << "PID: " << pid << " PTRACE_EVENT_STOP: " << event_msg;
|
|
EventPtraceStop(pid, stopsig);
|
|
break;
|
|
case PTRACE_EVENT_SECCOMP:
|
|
VLOG(2) << "PID: " << pid << " PTRACE_EVENT_SECCOMP: " << event_msg;
|
|
EventPtraceSeccomp(pid, event_msg);
|
|
break;
|
|
default:
|
|
LOG(ERROR) << "Unknown ptrace event: " << __WPTRACEEVENT(status)
|
|
<< " with data: " << event_msg;
|
|
break;
|
|
}
|
|
}
|
|
|
|
void Monitor::PidInterrupt(pid_t pid) {
|
|
if (ptrace(PTRACE_INTERRUPT, pid, 0, 0) == -1) {
|
|
PLOG(WARNING) << "ptrace(PTRACE_INTERRUPT, pid=" << pid << ")";
|
|
}
|
|
}
|
|
|
|
} // namespace sandbox2
|