stack_trace: pass fd to sandboxee's memory instead of using process_vm_readv

Libunwind sandbox no longer needs to join sandboxee's userns.
This cleans up a lot of special handling for the libunwind sandbox.

PiperOrigin-RevId: 503140778
Change-Id: I020ea3adda05ae6ff74137b668a5fa7509c138f8
This commit is contained in:
Wiktor Garbacz 2023-01-19 05:44:11 -08:00 committed by Copybara-Service
parent f87b6feb18
commit 2f64d3d925
19 changed files with 118 additions and 98 deletions

View File

@ -154,27 +154,14 @@ absl::StatusOr<Executor::Process> Executor::StartSubProcess(
request.add_capabilities(cap);
}
file_util::fileops::FDCloser ns_fd;
if (libunwind_sbox_for_pid_ != 0) {
const std::string ns_path =
absl::StrCat("/proc/", libunwind_sbox_for_pid_, "/ns/user");
ns_fd = file_util::fileops::FDCloser(open(ns_path.c_str(), O_RDONLY));
if (ns_fd.get() == -1) {
return absl::ErrnoToStatus(
errno, absl::StrCat("Could not open user ns fd (", ns_path, ")"));
}
}
Process process;
if (fork_client_) {
process.main_pid = fork_client_->SendRequest(
request, exec_fd_.get(), client_comms_fd_.get(), ns_fd.get(),
&process.init_pid);
request, exec_fd_.get(), client_comms_fd_.get(), &process.init_pid);
} else {
process.main_pid = GlobalForkClient::SendRequest(
request, exec_fd_.get(), client_comms_fd_.get(), ns_fd.get(),
&process.init_pid);
request, exec_fd_.get(), client_comms_fd_.get(), &process.init_pid);
}
started_ = true;

View File

@ -22,7 +22,7 @@
namespace sandbox2 {
pid_t ForkClient::SendRequest(const ForkRequest& request, int exec_fd,
int comms_fd, int user_ns_fd, pid_t* init_pid) {
int comms_fd, pid_t* init_pid) {
// Acquire the channel ownership for this request (transaction).
absl::MutexLock l(&comms_mutex_);
@ -46,15 +46,6 @@ pid_t ForkClient::SendRequest(const ForkRequest& request, int exec_fd,
}
}
if (request.mode() == FORKSERVER_FORK_JOIN_SANDBOX_UNWIND) {
CHECK(user_ns_fd != -1) << "user_ns_fd cannot be -1 in unwind mode";
if (!comms_->SendFD(user_ns_fd)) {
LOG(ERROR) << "Sending user ns FD (" << user_ns_fd
<< ") to the ForkServer failed";
return -1;
}
}
int32_t pid;
// Receive init process ID.
if (!comms_->RecvInt32(&pid)) {

View File

@ -36,7 +36,7 @@ class ForkClient {
// Sends the fork request over the supplied Comms channel.
pid_t SendRequest(const ForkRequest& request, int exec_fd, int comms_fd,
int user_ns_fd = -1, pid_t* init_pid = nullptr);
pid_t* init_pid = nullptr);
pid_t pid() { return pid_; }

View File

@ -269,20 +269,13 @@ void ForkServer::PrepareExecveArgs(const ForkRequest& request,
void ForkServer::LaunchChild(const ForkRequest& request, int execve_fd,
int client_fd, uid_t uid, gid_t gid,
int user_ns_fd, int signaling_fd,
bool avoid_pivot_root) const {
int signaling_fd, bool avoid_pivot_root) const {
SAPI_RAW_CHECK(request.mode() != FORKSERVER_FORK_UNSPECIFIED,
"Forkserver mode is unspecified");
bool will_execve = (request.mode() == FORKSERVER_FORK_EXECVE ||
request.mode() == FORKSERVER_FORK_EXECVE_SANDBOX);
if (request.mode() == FORKSERVER_FORK_JOIN_SANDBOX_UNWIND) {
SAPI_RAW_CHECK(setns(user_ns_fd, CLONE_NEWUSER) == 0,
"Could not join user NS");
close(user_ns_fd);
}
// Prepare the arguments before sandboxing (if needed), as doing it after
// sandoxing can cause syscall violations (e.g. related to memory management).
std::vector<std::string> args;
@ -414,12 +407,6 @@ pid_t ForkServer::ServeRequest() {
// this to make sure the zombie process is reaped immediately.
int clone_flags = fork_request.clone_flags() | SIGCHLD;
int user_ns_fd = -1;
if (fork_request.mode() == FORKSERVER_FORK_JOIN_SANDBOX_UNWIND) {
SAPI_RAW_CHECK(comms_->RecvFD(&user_ns_fd),
"Failed to receive user namespace fd");
}
// Store uid and gid since they will change if CLONE_NEWUSER is set.
uid_t uid = getuid();
uid_t gid = getgid();
@ -493,8 +480,8 @@ pid_t ForkServer::ServeRequest() {
// Child.
if (sandboxee_pid == 0) {
LaunchChild(fork_request, exec_fd, comms_fd, uid, gid, user_ns_fd,
fd_closer1.get(), avoid_pivot_root);
LaunchChild(fork_request, exec_fd, comms_fd, uid, gid, fd_closer1.get(),
avoid_pivot_root);
return sandboxee_pid;
}
@ -521,9 +508,6 @@ pid_t ForkServer::ServeRequest() {
if (exec_fd >= 0) {
close(exec_fd);
}
if (user_ns_fd >= 0) {
close(user_ns_fd);
}
SAPI_RAW_CHECK(comms_->SendInt32(init_pid),
absl::StrCat("Failed to send init PID: ", init_pid).c_str());
SAPI_RAW_CHECK(
@ -646,17 +630,9 @@ void ForkServer::InitializeNamespaces(const ForkRequest& request, uid_t uid,
if (!request.has_mount_tree()) {
return;
}
int32_t clone_flags = request.clone_flags();
if (request.mode() == FORKSERVER_FORK_JOIN_SANDBOX_UNWIND) {
clone_flags = CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC;
SAPI_RAW_PCHECK(!unshare(clone_flags),
"Could not create new namespaces for libunwind");
}
Namespace::InitializeNamespaces(
uid, gid, clone_flags, Mounts(request.mount_tree()),
request.mode() != FORKSERVER_FORK_JOIN_SANDBOX_UNWIND, request.hostname(),
avoid_pivot_root, request.allow_mount_propagation());
uid, gid, request.clone_flags(), Mounts(request.mount_tree()),
request.hostname(), avoid_pivot_root, request.allow_mount_propagation());
}
} // namespace sandbox2

View File

@ -50,7 +50,7 @@ class ForkServer {
private:
// Creates and launched the child process.
void LaunchChild(const ForkRequest& request, int execve_fd, int client_fd,
uid_t uid, gid_t gid, int user_ns_fd, int signaling_fd,
uid_t uid, gid_t gid, int signaling_fd,
bool avoid_pivot_root) const;
// Prepares the Fork-Server (worker side, not the requester side) for work by

View File

@ -50,7 +50,7 @@ int GetMinimalTestcaseFd() {
return open(path.c_str(), O_RDONLY);
}
pid_t TestSingleRequest(Mode mode, int exec_fd, int userns_fd) {
pid_t TestSingleRequest(Mode mode, int exec_fd) {
ForkRequest fork_req;
IPC ipc;
int sv[2];
@ -62,8 +62,7 @@ pid_t TestSingleRequest(Mode mode, int exec_fd, int userns_fd) {
fork_req.add_args("/binary");
fork_req.add_envs("FOO=1");
pid_t pid =
GlobalForkClient::SendRequest(fork_req, exec_fd, sv[0], userns_fd);
pid_t pid = GlobalForkClient::SendRequest(fork_req, exec_fd, sv[0]);
if (pid != -1) {
VLOG(1) << "TestSingleRequest: Waiting for pid=" << pid;
waitpid(pid, nullptr, 0);
@ -75,12 +74,12 @@ pid_t TestSingleRequest(Mode mode, int exec_fd, int userns_fd) {
TEST(ForkserverTest, SimpleFork) {
// Make sure that the regular fork request works.
ASSERT_NE(TestSingleRequest(FORKSERVER_FORK, -1, -1), -1);
ASSERT_NE(TestSingleRequest(FORKSERVER_FORK, -1), -1);
}
TEST(ForkserverTest, SimpleForkNoZombie) {
// Make sure that we don't create zombies.
pid_t child = TestSingleRequest(FORKSERVER_FORK, -1, -1);
pid_t child = TestSingleRequest(FORKSERVER_FORK, -1);
ASSERT_NE(child, -1);
std::string proc = absl::StrCat("/proc/", child, "/cmdline");
@ -101,7 +100,7 @@ TEST(ForkserverTest, ForkExecveWorks) {
// Run a test binary through the FORK_EXECVE request.
int exec_fd = GetMinimalTestcaseFd();
PCHECK(exec_fd != -1) << "Could not open test binary";
ASSERT_NE(TestSingleRequest(FORKSERVER_FORK_EXECVE, exec_fd, -1), -1);
ASSERT_NE(TestSingleRequest(FORKSERVER_FORK_EXECVE, exec_fd), -1);
}
TEST(ForkserverTest, ForkExecveSandboxWithoutPolicy) {

View File

@ -290,15 +290,14 @@ void GlobalForkClient::Shutdown() {
}
pid_t GlobalForkClient::SendRequest(const ForkRequest& request, int exec_fd,
int comms_fd, int user_ns_fd,
pid_t* init_pid) {
int comms_fd, pid_t* init_pid) {
absl::ReleasableMutexLock lock(&GlobalForkClient::instance_mutex_);
EnsureStartedLocked(GlobalForkserverStartMode::kOnDemand);
if (!instance_) {
return -1;
}
pid_t pid = instance_->fork_client_.SendRequest(request, exec_fd, comms_fd,
user_ns_fd, init_pid);
pid_t pid =
instance_->fork_client_.SendRequest(request, exec_fd, comms_fd, init_pid);
if (instance_->comms_.IsTerminated()) {
LOG(ERROR) << "Global forkserver connection terminated";
pid_t server_pid = instance_->fork_client_.pid();

View File

@ -45,8 +45,7 @@ class GlobalForkClient {
: comms_(fd), fork_client_(pid, &comms_) {}
static pid_t SendRequest(const ForkRequest& request, int exec_fd,
int comms_fd, int user_ns_fd = -1,
pid_t* init_pid = nullptr)
int comms_fd, pid_t* init_pid = nullptr)
ABSL_LOCKS_EXCLUDED(instance_mutex_);
static pid_t GetPid() ABSL_LOCKS_EXCLUDED(instance_mutex_);

View File

@ -173,6 +173,41 @@ bool IsEquivalentNode(const MountTree::Node& n1, const MountTree::Node& n2) {
} // namespace internal
absl::Status Mounts::Remove(absl::string_view path) {
if (PathContainsNullByte(path)) {
return absl::InvalidArgumentError(
absl::StrCat("Path contains a null byte: ", path));
}
std::string fixed_path = sapi::file::CleanPath(path);
if (!sapi::file::IsAbsolutePath(fixed_path)) {
return absl::InvalidArgumentError("Only absolute paths are supported");
}
if (fixed_path == "/") {
return absl::InvalidArgumentError("Cannot remove root");
}
std::vector<absl::string_view> parts =
absl::StrSplit(absl::StripPrefix(fixed_path, "/"), '/');
MountTree* curtree = &mount_tree_;
for (absl::string_view part : parts) {
if (curtree->has_node() && curtree->node().has_file_node()) {
return absl::NotFoundError(
absl::StrCat("File node is mounted at parent of: ", path));
}
auto it = curtree->mutable_entries()->find(part);
if (it == curtree->mutable_entries()->end()) {
return absl::NotFoundError(
absl::StrCat("Path does not exist in mounts: ", path));
}
curtree = &it->second;
}
curtree->clear_node();
curtree->clear_entries();
return absl::OkStatus();
}
absl::Status Mounts::Insert(absl::string_view path,
const MountTree::Node& new_node) {
// Some sandboxes allow the inside/outside paths to be partially

View File

@ -60,6 +60,8 @@ class Mounts {
absl::Status AddTmpfs(absl::string_view inside, size_t sz);
absl::Status Remove(absl::string_view path);
void CreateMounts(const std::string& root_path) const;
MountTree GetMountTree() const { return mount_tree_; }

View File

@ -60,6 +60,7 @@ TEST(MountTreeTest, TestInvalidFilenames) {
StatusIs(absl::StatusCode::kInvalidArgument));
EXPECT_THAT(mounts.AddFileAt("/a", "/"),
StatusIs(absl::StatusCode::kInvalidArgument));
EXPECT_THAT(mounts.Remove("/"), StatusIs(absl::StatusCode::kInvalidArgument));
}
TEST(MountTreeTest, TestAddFile) {
@ -91,6 +92,25 @@ TEST(MountTreeTest, TestAddTmpFs) {
EXPECT_THAT(mounts.AddDirectoryAt("/a/b/d", "/a/b/d"), IsOk());
}
TEST(MountTreeTest, TestRemove) {
Mounts mounts;
EXPECT_THAT(mounts.AddTmpfs("/a", kTmpfsSize), IsOk());
EXPECT_THAT(mounts.AddFile("/b/c/d"), IsOk());
EXPECT_THAT(mounts.AddFile("/c/c/d"), IsOk());
EXPECT_THAT(mounts.AddDirectoryAt("/d/b/d", "/d/b/d"), IsOk());
EXPECT_THAT(mounts.AddDirectoryAt("/e/b/d", "/e/b/d"), IsOk());
EXPECT_THAT(mounts.Remove("/a/b"), StatusIs(absl::StatusCode::kNotFound));
EXPECT_THAT(mounts.Remove("/a"), IsOk());
EXPECT_THAT(mounts.Remove("/b/c/d/e"), StatusIs(absl::StatusCode::kNotFound));
EXPECT_THAT(mounts.Remove("/b/c/e"), StatusIs(absl::StatusCode::kNotFound));
EXPECT_THAT(mounts.Remove("/b/c/d"), IsOk());
EXPECT_THAT(mounts.Remove("/c"), IsOk());
EXPECT_THAT(mounts.Remove("/d/b/d/e"), StatusIs(absl::StatusCode::kNotFound));
EXPECT_THAT(mounts.Remove("/d/b/d"), IsOk());
EXPECT_THAT(mounts.Remove("/e"), IsOk());
EXPECT_THAT(mounts.Remove("/f"), StatusIs(absl::StatusCode::kNotFound));
}
TEST(MountTreeTest, TestMultipleInsertionFileSymlink) {
Mounts mounts;
@ -166,6 +186,8 @@ TEST(MountTreeTest, TestEvilNullByte) {
StatusIs(absl::StatusCode::kInvalidArgument));
EXPECT_THAT(mounts.AddTmpfs(filename, kTmpfsSize),
StatusIs(absl::StatusCode::kInvalidArgument));
EXPECT_THAT(mounts.Remove(filename),
StatusIs(absl::StatusCode::kInvalidArgument));
}
TEST(MountTreeTest, TestMinimalDynamicBinary) {

View File

@ -213,7 +213,7 @@ void Namespace::DisableUserNamespace() { clone_flags_ &= ~CLONE_NEWUSER; }
int32_t Namespace::GetCloneFlags() const { return clone_flags_; }
void Namespace::InitializeNamespaces(uid_t uid, gid_t gid, int32_t clone_flags,
const Mounts& mounts, bool mount_proc,
const Mounts& mounts,
const std::string& hostname,
bool avoid_pivot_root,
bool allow_mount_propagation) {
@ -238,10 +238,9 @@ void Namespace::InitializeNamespaces(uid_t uid, gid_t gid, int32_t clone_flags,
SAPI_RAW_PCHECK(chdir("/") != -1, "chdir / after chrooting real root");
}
SAPI_RAW_PCHECK(
!mount_proc || mount("", "/proc", "proc",
MS_NODEV | MS_NOEXEC | MS_NOSUID, nullptr) != -1,
"Could not mount a new /proc"
SAPI_RAW_PCHECK(mount("", "/proc", "proc", MS_NODEV | MS_NOEXEC | MS_NOSUID,
nullptr) != -1,
"Could not mount a new /proc"
);
if (clone_flags & CLONE_NEWNET) {

View File

@ -34,7 +34,7 @@ class Namespace final {
public:
// Performs the namespace setup (mounts, write the uid_map, etc.).
static void InitializeNamespaces(uid_t uid, gid_t gid, int32_t clone_flags,
const Mounts& mounts, bool mount_proc,
const Mounts& mounts,
const std::string& hostname,
bool avoid_pivot_root,
bool allow_mount_propagation);

View File

@ -16,6 +16,7 @@
#include "sandboxed_api/sandbox2/stack_trace.h"
#include <fcntl.h>
#include <sys/resource.h>
#include <syscall.h>
@ -109,7 +110,10 @@ absl::StatusOr<std::unique_ptr<Policy>> StackTracePeer::GetPolicy(
} else {
// Use the mounttree of the original executable.
CHECK(ns != nullptr);
builder.SetMounts(ns->mounts());
Mounts mounts = ns->mounts();
mounts.Remove("/proc").IgnoreError();
mounts.Remove(app_path).IgnoreError();
builder.SetMounts(std::move(mounts));
}
builder.AllowOpen()
.AllowRead()
@ -119,6 +123,8 @@ absl::StatusOr<std::unique_ptr<Policy>> StackTracePeer::GetPolicy(
.AllowHandleSignals()
.AllowTcMalloc()
.AllowSystemMalloc()
// for Comms:RecvFD
.AllowSyscall(__NR_recvmsg)
// libunwind
.AllowMmap()
@ -142,15 +148,6 @@ absl::StatusOr<std::unique_ptr<Policy>> StackTracePeer::GetPolicy(
// Required for our ptrace replacement.
.TrapPtrace()
.AddPolicyOnSyscall(
__NR_process_vm_readv,
{
// The pid technically is a 64bit int, however Linux usually uses
// max 16 bit, so we are fine with comparing only 32 bits here.
ARG_32(0),
JEQ32(static_cast<unsigned int>(target_pid), ALLOW),
JEQ32(static_cast<unsigned int>(1), ALLOW),
})
// Add proc maps.
.AddFileAt(maps_file,
@ -163,10 +160,6 @@ absl::StatusOr<std::unique_ptr<Policy>> StackTracePeer::GetPolicy(
.AddFileAt(exe_path, app_path);
SAPI_ASSIGN_OR_RETURN(std::unique_ptr<Policy> policy, builder.TryBuild());
policy->AllowUnsafeKeepCapabilities({CAP_SYS_PTRACE});
// Use no special namespace flags when cloning. We will join an existing
// user namespace and will unshare() afterwards (See forkserver.cc).
policy->GetNamespace()->clone_flags_ = 0;
return std::move(policy);
}
@ -174,6 +167,11 @@ absl::StatusOr<UnwindResult> StackTracePeer::LaunchLibunwindSandbox(
const Regs* regs, const Namespace* ns, bool uses_custom_forkserver) {
const pid_t pid = regs->pid();
sapi::file_util::fileops::FDCloser memory_fd(
open(absl::StrCat("/proc/", pid, "/mem").c_str(), O_RDONLY));
if (memory_fd.get() == -1) {
return absl::InternalError("Opening sandboxee process memory failed");
}
// Tell executor to use this special internal mode. Using `new` to access a
// non-public constructor.
auto executor = absl::WrapUnique(new Executor(pid));
@ -262,6 +260,9 @@ absl::StatusOr<UnwindResult> StackTracePeer::LaunchLibunwindSandbox(
if (!comms->SendProtoBuf(msg)) {
return absl::InternalError("Sending libunwind setup message failed");
}
if (!comms->SendFD(memory_fd.get())) {
return absl::InternalError("Sending sandboxee's memory fd failed");
}
absl::Status status;
if (!comms->RecvStatus(&status)) {
return absl::InternalError(

View File

@ -53,7 +53,6 @@ cc_library(
"//sandboxed_api/util:file_helpers",
"//sandboxed_api/util:raw_logging",
"//sandboxed_api/util:status",
"//sandboxed_api/util:strerror",
"@com_google_absl//absl/cleanup",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",

View File

@ -38,7 +38,6 @@ target_link_libraries(sandbox2_unwind PRIVATE
sandbox2::maps_parser
sandbox2::minielf
sandbox2::ptrace_hook
sapi::strerror
sandbox2::unwind_proto
sapi::base
sapi::config

View File

@ -47,6 +47,8 @@ constexpr size_t kRegSize = sizeof(RegType);
// pretty opaque which is why we just forward the raw bytes (up to a certain
// limit).
auto* g_registers = new std::vector<RegType>();
pid_t g_pid;
int g_mem_fd;
// Hooks ptrace.
// This wrapper makes use of process_vm_readv to read process memory instead of
@ -56,11 +58,12 @@ long int ptrace_hook( // NOLINT
PtraceRequest request, pid_t pid, void* addr, void* data) {
switch (request) {
case PTRACE_PEEKDATA: {
if (pid != g_pid) {
return -1;
}
RegType read_data;
iovec local = {.iov_base = &read_data, .iov_len = sizeof(read_data)};
iovec remote = {.iov_base = addr, .iov_len = sizeof(read_data)};
if (process_vm_readv(pid, &local, 1, &remote, 1, 0) <= 0) {
if (pread(g_mem_fd, &read_data, sizeof(read_data),
reinterpret_cast<uintptr_t>(addr)) != sizeof(read_data)) {
return -1;
}
*reinterpret_cast<RegType*>(data) = read_data;
@ -97,7 +100,10 @@ long int ptrace_hook( // NOLINT
} // namespace
void EnablePtraceEmulationWithUserRegs(absl::string_view regs) {
void EnablePtraceEmulationWithUserRegs(pid_t pid, absl::string_view regs,
int mem_fd) {
g_pid = pid;
g_mem_fd = mem_fd;
g_registers->resize((regs.size() + 1) / kRegSize);
memcpy(&g_registers->front(), regs.data(), regs.size());
SyscallTrap::Install([](int nr, SyscallTrap::Args args, uintptr_t* rv) {

View File

@ -15,12 +15,15 @@
#ifndef SANDBOXED_API_SANDBOX2_UNWIND_PTRACE_HOOK_H_
#define SANDBOXED_API_SANDBOX2_UNWIND_PTRACE_HOOK_H_
#include <unistd.h>
#include "absl/strings/string_view.h"
namespace sandbox2 {
// Sets the register values that the ptrace emulation will return.
void EnablePtraceEmulationWithUserRegs(absl::string_view regs);
void EnablePtraceEmulationWithUserRegs(pid_t pid, absl::string_view regs,
int mem_fd);
} // namespace sandbox2

View File

@ -39,7 +39,6 @@
#include "sandboxed_api/util/file_helpers.h"
#include "sandboxed_api/util/raw_logging.h"
#include "sandboxed_api/util/status_macros.h"
#include "sandboxed_api/util/strerror.h"
namespace sandbox2 {
namespace {
@ -206,8 +205,12 @@ bool RunLibUnwindAndSymbolizer(Comms* comms) {
if (!comms->RecvProtoBuf(&setup)) {
return false;
}
int mem_fd;
if (!comms->RecvFD(&mem_fd)) {
return false;
}
EnablePtraceEmulationWithUserRegs(setup.regs());
EnablePtraceEmulationWithUserRegs(setup.pid(), setup.regs(), mem_fd);
absl::StatusOr<std::vector<uintptr_t>> ips =
RunLibUnwind(setup.pid(), setup.default_max_frames());