sandboxed-api/sandboxed_api/sandbox2/policybuilder.h
Paul Wankadia bb6ae1d4ab Introduce AllowRestartableSequencesWithProcFiles() and tidy up.
1. In many cases, sandboxes need to allow /proc/stat and /proc/cpuinfo so that
get_nprocs(3) will work; otherwise, per-CPU logic can't determine how many CPUs
there are. Unfortunately, some of those sandboxes also disable namespaces. The
solution is to provide two functions: AllowRestartableSequencesWithProcFiles(),
which allows syscalls and files; and AllowRestartableSequences(), which allows
syscalls only. Sandboxes should usually call the former; sandboxes that disable
namespaces should instead call the latter and are responsible for allowing the
files via the deprecated Fs mechanism.

2. Make the mmap(2) policy evaluate prot AND flags, not prot OR flags.

3. Order the code and the comments identically for better readability.

PiperOrigin-RevId: 386414028
Change-Id: I016b1854ed1da9c9bcff7b351c5e0041093b8193
2021-07-23 02:23:22 -07:00

613 lines
21 KiB
C++

// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_
#define SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_
#include <linux/filter.h>
#include <cstddef>
#include <functional>
#include <initializer_list>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <vector>
#include <glog/logging.h>
#include "absl/base/macros.h"
#include "absl/memory/memory.h"
#include "absl/status/statusor.h"
#include "absl/strings/string_view.h"
#include "sandboxed_api/sandbox2/mounts.h"
#include "sandboxed_api/sandbox2/network_proxy/filtering.h"
#include "sandboxed_api/sandbox2/policy.h"
struct bpf_labels;
namespace sandbox2 {
// PolicyBuilder is a helper class to simplify creation of policies. The builder
// uses fluent interface for convenience and increased readability of policies.
//
// To build a policy you simply create a new builder object, call methods on it
// specifying what you want and finally call BuildOrDie() to generate you
// policy.
//
// For instance this would generate a simple policy suitable for binaries doing
// only computations:
//
// std::unique_ptr<Policy> policy =
// PolicyBuilder()
// .AllowRead()
// .AllowWrite()
// .AllowExit()
// .AllowSystemMalloc()
// .BuildOrDie();
//
// Note that operations are executed in the order they are dictated, though in
// most cases this has no influence since the operations themselves commute.
//
// For instance these two policies are equivalent:
//
// auto policy = PolicyBuilder.AllowRead().AllowWrite().BuildOrDie();
// auto policy = PolicyBuilder.AllowWrite().AllowRead().BuildOrDie();
//
// While these two are not:
//
// auto policy = PolicyBuilder.AllowRead().BlockSyscallWithErrno(__NR_read, EIO)
// .BuildOrDie();
// auto policy = PolicyBuilder.BlockSyscallWithErrno(__NR_read, EIO).AllowRead()
// .BuildOrDie();
//
// In fact the first one is equivalent to:
//
// auto policy = PolicyBuilder.AllowRead().BuildOrDie();
//
// If you dislike the chained style, is is also possible to write the first
// example as this:
//
// PolicyBuilder builder;
// builder.AllowRead();
// builder.AllowWrite();
// builder.AllowExit();
// builder.AllowSystemMalloc();
// auto policy = builder.BuildOrDie();
//
// For a more complicated example, see examples/persistent/persistent_sandbox.cc
class PolicyBuilder final {
public:
// Possible CPU fence modes for `AllowRestartableSequences()`
enum CpuFenceMode {
// Allow only fast fences for restartable sequences.
kRequireFastFences,
// Allow fast fences as well as slow fences if fast fences are unavailable.
kAllowSlowFences,
};
static constexpr absl::string_view kDefaultHostname = "sandbox2";
using BpfInitializer = std::initializer_list<sock_filter>;
using BpfFunc = const std::function<std::vector<sock_filter>(bpf_labels&)>&;
using SyscallInitializer = std::initializer_list<unsigned int>;
// Appends code to allow a specific syscall
PolicyBuilder& AllowSyscall(unsigned int num);
// Appends code to allow a number of syscalls
PolicyBuilder& AllowSyscalls(const std::vector<uint32_t>& nums);
PolicyBuilder& AllowSyscalls(SyscallInitializer nums);
// Appends code to block a specific syscall while setting errno to the error
// given
PolicyBuilder& BlockSyscallWithErrno(unsigned int num, int error);
// Appends code to allow exiting.
// Allows these syscalls:
// - exit
// - exit_group
PolicyBuilder& AllowExit();
// Appends code to allow restartable sequences and necessary /proc files.
// Allows these syscalls:
// - rseq
// - mmap(..., PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, ...)
// - getcpu
// - membarrier
// - futex(WAIT)
// - futex(WAKE)
// - rt_sigprocmask(SIG_SETMASK)
// Allows these files:
// - "/proc/cpuinfo"
// - "/proc/stat"
//
// If `cpu_fence_mode` is `kAllowSlowFences`, also permits slow CPU fences.
// Allows these syscalls:
// - sched_getaffinity
// - sched_setaffinity
// Allows these files:
// - "/proc/self/cpuset"
//
// If `cpu_fence_mode` is `kRequireFastFences`, RSEQ functionality may not
// be enabled if fast CPU fences are not available.
//
// This function enables namespaces! If your policy disables namespaces,
// the conflict will cause an error when the policy is built. You should
// call AllowRestartableSequences() instead; see below for instructions.
PolicyBuilder& AllowRestartableSequencesWithProcFiles(
CpuFenceMode cpu_fence_mode);
// Appends code to allow restartable sequences.
// See above for the allowed syscalls and, more importantly, for the files
// that you are responsible for allowing via the deprecated `Fs` mechanism.
PolicyBuilder& AllowRestartableSequences(CpuFenceMode cpu_fence_mode);
// Appends code to allow the scudo version of malloc, free and
// friends. This should be used in conjunction with namespaces. If scudo
// options are passed to the sandboxee through an environment variable, access
// to "/proc/self/environ" will have to be allowed by the policy.
//
// Note: This function is tuned towards the secure scudo allocator. If you are
// using another implementation, this function might not be the most
// suitable.
PolicyBuilder& AllowScudoMalloc();
// Appends code to allow the system-allocator version of malloc, free and
// friends.
//
// Note: This function is tuned towards the malloc implementation in glibc. If
// you are using another implementation, this function might not be the
// most suitable.
PolicyBuilder& AllowSystemMalloc();
// Appends code to allow the tcmalloc version of malloc, free and
// friends.
PolicyBuilder& AllowTcMalloc();
// Allows system calls typically used by the LLVM sanitizers (address
// sanitizer, memory sanitizer, and thread sanitizer). This method is
// intended as a best effort for adding system calls that are common to many
// binaries. It may not be fully inclusive of all potential system calls for
// all binaries.
PolicyBuilder& AllowLlvmSanitizers();
// Appends code to allow mmap. Specifically this allows mmap and mmap2 syscall
// on architectures where this syscalls exist.
PolicyBuilder& AllowMmap();
// Appends code to allow calling futex with the given operation.
PolicyBuilder& AllowFutexOp(int op);
// Appends code to allow opening files or directories. Specifically it allows
// these sycalls:
//
// - open
// - openat
PolicyBuilder& AllowOpen();
// Appends code to allow calling stat, fstat and lstat.
// Allows these sycalls:
// - fstat
// - fstat64
// - fstatat
// - fstatat64
// - fstatfs
// - fstatfs64
// - lstat
// - lstat64
// - newfstatat
// - oldfstat
// - oldlstat
// - oldstat
// - stat
// - stat64
// - statfs
// - statfs64
// - ustat
PolicyBuilder& AllowStat();
// Appends code to allow checking file permissions.
// Allows these syscalls:
// - access
// - faccessat
PolicyBuilder& AllowAccess();
// Appends code to the policy to allow reading from file descriptors.
// Allows these sycalls:
// - read
// - readv
// - preadv
// - pread64
PolicyBuilder& AllowRead();
// Appends code to the policy to allow writing to file descriptors.
// Allows these sycalls:
// - write
// - writev
// - pwritev
// - pwrite64
PolicyBuilder& AllowWrite();
// Appends code to allow reading directories.
// Allows these sycalls:
// - getdents
// - getdents64
PolicyBuilder& AllowReaddir();
// Appends code to allow safe calls to fcntl.
// Allows these sycalls:
// - fcntl
// - fcntl64 (on architectures where it exists)
//
// The above are only allowed when the cmd is one of:
// F_GETFD, F_SETFD, F_GETFL, F_SETFL, F_GETLK, F_SETLKW, F_SETLK,
// F_DUPFD, F_DUPFD_CLOEXEC
PolicyBuilder& AllowSafeFcntl();
// Appends code to allow creating new processes.
// Allows these sycalls:
// - fork
// - vfork
// - clone
//
// Note: while this function allows the calls, the default policy is run first
// and it has checks for dangerous flags which can create a violation. See
// sandbox2/policy.cc for more details.
PolicyBuilder& AllowFork();
// Appends code to allow waiting for processes.
// Allows these sycalls:
// - waitpid (on architectures where it exists)
// - wait4
PolicyBuilder& AllowWait();
// Appends code to allow setting up signal handlers, returning from them, etc.
// Allows these sycalls:
// - rt_sigaction
// - rt_sigreturn
// - rt_procmask
// - signal (on architectures where it exists)
// - sigaction (on architectures where it exists)
// - sigreturn (on architectures where it exists)
// - sigprocmask (on architectures where it exists)
PolicyBuilder& AllowHandleSignals();
// Appends code to allow doing the TCGETS ioctl.
// Allows these sycalls:
// - ioctl (when the first argument is TCGETS)
PolicyBuilder& AllowTCGETS();
// Appends code to allow to getting the current time.
// Allows these sycalls:
// - time
// - gettimeofday
// - clock_gettime
PolicyBuilder& AllowTime();
// Appends code to allow sleeping in the current thread.
// Allow these syscalls:
// - clock_nanosleep
// - nanosleep
PolicyBuilder& AllowSleep();
// Appends code to allow getting the uid, euid, gid, etc.
// - getuid + geteuid + getresuid
// - getgid + getegid + getresgid
// - getuid32 + geteuid32 + getresuid32 (on architectures where they exist)
// - getgid32 + getegid32 + getresgid32 (on architectures where they exist)
// - getgroups
PolicyBuilder& AllowGetIDs();
// Appends code to allow getting the pid, ppid and tid.
// Allows these syscalls:
// - getpid
// - getppid
// - gettid
PolicyBuilder& AllowGetPIDs();
// Appends code to allow getting the rlimits.
// Allows these sycalls:
// - getrlimit
// - ugetrlimit (on architectures where it exist)
PolicyBuilder& AllowGetRlimit();
// Appends code to allow setting the rlimits.
// Allows these sycalls:
// - setrlimit
// - usetrlimit (on architectures where it exist)
PolicyBuilder& AllowSetRlimit();
// Appends code to allow reading random bytes.
// Allows these sycalls:
// - getrandom (with no flags or GRND_NONBLOCK)
//
PolicyBuilder& AllowGetRandom();
// Appends code to allow configuring wipe-on-fork memory
// Allows these syscalls:
// - madvise (with advice equal to -1 or MADV_WIPEONFORK).
PolicyBuilder& AllowWipeOnFork();
// Enables syscalls required to use the logging support enabled via
// Client::SendLogsToSupervisor()
// Allows the following:
// - Writes
// - kill(0, SIGABRT) (for LOG(FATAL))
// - clock_gettime
// - gettid
// - close
PolicyBuilder& AllowLogForwarding();
// Enables the syscalls necessary to start a statically linked binary
//
// NOTE: This will call BlockSyscallWithErrno(__NR_readlink, ENOENT). If you
// do not want readlink blocked, put a different call before this call.
//
// The current list of allowed syscalls are below. However you should *not*
// depend on the specifics, as these will change whenever the startup code
// changes.
//
// - uname,
// - brk,
// - set_tid_address,
// - set_robust_list,
// - futex(FUTEX_WAIT_BITSET, ...)
// - rt_sigaction(0x20, ...)
// - rt_sigaction(0x21, ...)
// - rt_sigprocmask(SIG_UNBLOCK, ...)
// - arch_prctl(ARCH_SET_FS)
//
// Additionally it will block calls to readlink.
PolicyBuilder& AllowStaticStartup();
// In addition to syscalls allowed by AllowStaticStartup, also allow reading,
// seeking, mmapping and closing files. It does not allow opening them, as
// the mechanism for doing so depends on whether GetFs-checks are used or not.
PolicyBuilder& AllowDynamicStartup();
// Appends a policy, which will be run on the specified syscall.
// This policy must be written without labels. If you need labels, use the
// next function.
PolicyBuilder& AddPolicyOnSyscall(unsigned int num, BpfInitializer policy);
PolicyBuilder& AddPolicyOnSyscall(unsigned int num,
const std::vector<sock_filter>& policy);
// Appends a policy, which will be run on the specified syscall.
// This policy may use labels.
// Example of how to use it:
// builder.AddPolicyOnSyscall(
// __NR_socket, [](bpf_labels& labels) -> std::vector<sock_filter> {
// return {
// ARG(0), // domain is first argument of socket
// JEQ(AF_UNIX, JUMP(&labels, af_unix)),
// JEQ(AF_NETLINK, JUMP(&labels, af_netlink)),
// KILL,
//
// LABEL(&labels, af_unix),
// ARG(1),
// JEQ(SOCK_STREAM | SOCK_NONBLOCK, ALLOW),
// KILL,
//
// LABEL(&labels, af_netlink),
// ARG(2),
// JEQ(NETLINK_ROUTE, ALLOW),
// };
// });
PolicyBuilder& AddPolicyOnSyscall(unsigned int num, BpfFunc f);
// Appends a policy, which will be run on the specified syscalls.
// This policy must be written without labels.
PolicyBuilder& AddPolicyOnSyscalls(SyscallInitializer nums,
BpfInitializer policy);
PolicyBuilder& AddPolicyOnSyscalls(SyscallInitializer nums,
const std::vector<sock_filter>& policy);
// Appends a policy, which will be run on the specified syscalls.
// This policy may use labels.
PolicyBuilder& AddPolicyOnSyscalls(SyscallInitializer nums, BpfFunc f);
// Equivalent to AddPolicyOnSyscalls(mmap_syscalls, policy), where
// mmap_syscalls is a subset of {__NR_mmap, __NR_mmap2}, which exists on the
// target architecture.
PolicyBuilder& AddPolicyOnMmap(BpfInitializer policy);
PolicyBuilder& AddPolicyOnMmap(const std::vector<sock_filter>& policy);
// Equivalent to AddPolicyOnSyscalls(mmap_syscalls, f), where mmap_syscalls is
// a subset of {__NR_mmap, __NR_mmap2}, which exists on the target
// architecture.
PolicyBuilder& AddPolicyOnMmap(BpfFunc f);
// Builds the policy returning a unique_ptr to it. This should only be called
// once.
absl::StatusOr<std::unique_ptr<Policy>> TryBuild();
// Builds the policy returning a unique_ptr to it. This should only be called
// once.
// This function will abort if an error happened in any off the PolicyBuilder
// methods.
std::unique_ptr<Policy> BuildOrDie() { return TryBuild().value(); }
// Adds a bind-mount for a file from outside the namespace to inside. This
// will also create parent directories inside the namespace if needed.
//
// Calling these function will enable use of namespaces.
PolicyBuilder& AddFile(absl::string_view path, bool is_ro = true);
PolicyBuilder& AddFileAt(absl::string_view outside, absl::string_view inside,
bool is_ro = true);
// Best-effort function that adds the libraries and linker required by a
// binary.
//
// This does not add the binary itself, only the libraries it depends on.
//
// This function should work correctly for most binaries, but you might need
// to tweak it in some cases.
//
// This function is safe even for untrusted/potentially malicious binaries.
// It adds libraries only from standard library dirs and ld_library_path.
//
// run `ldd` yourself and use AddFile or AddDirectory.
PolicyBuilder& AddLibrariesForBinary(absl::string_view path,
absl::string_view ld_library_path = {});
// Similar to AddLibrariesForBinary, but binary is specified with an open fd.
PolicyBuilder& AddLibrariesForBinary(int fd,
absl::string_view ld_library_path = {});
// Adds a bind-mount for a directory from outside the namespace to
// inside. This will also create parent directories inside the namespace if
// needed.
//
// Calling these function will enable use of namespaces.
PolicyBuilder& AddDirectory(absl::string_view path, bool is_ro = true);
PolicyBuilder& AddDirectoryAt(absl::string_view outside,
absl::string_view inside, bool is_ro = true);
// Adds a tmpfs inside the namespace. This will also create parent
// directories inside the namespace if needed.
//
// Calling this function will enable use of namespaces.
ABSL_DEPRECATED(
"Explicitly specify tmpfs size by using AddTmpfs(inside, sz) instead")
PolicyBuilder& AddTmpfs(absl::string_view inside) {
LOG(WARNING) << "Tmpfs size not specified, defaulting to 4 MiB";
return AddTmpfs(inside, 4 << 20 /* 4 MiB */);
}
PolicyBuilder& AddTmpfs(absl::string_view inside, size_t sz);
// Allows unrestricted access to the network by *not* creating a network
// namespace. Note that this only disables the network namespace. To actually
// allow networking, you would also need to allow networking syscalls.
// Calling this function will enable use of namespaces.
PolicyBuilder& AllowUnrestrictedNetworking();
// Enables the use of namespaces.
//
// Namespaces are enabled by default.
// This is a no-op.
ABSL_DEPRECATED("Namespaces are enabled by default; no need to call this")
PolicyBuilder& EnableNamespaces() {
CHECK(use_namespaces_) << "Namespaces cannot be both disabled and enabled";
requires_namespaces_ = true;
return *this;
}
// Disables the use of namespaces.
//
// Call in order to use Sandbox2 without namespaces.
// This is not recommended.
PolicyBuilder& DisableNamespaces() {
CHECK(!requires_namespaces_)
<< "Namespaces cannot be both disabled and enabled. You're probably "
"using features that implicitly enable namespaces (SetHostname, "
"AddFile, AddDirectory, AddDataDependency, AddLibrariesForBinary or "
"similar)";
use_namespaces_ = false;
return *this;
}
// Set hostname in the network namespace instead of default "sandbox2".
//
// Calling this function will enable use of namespaces.
// It is an error to also call AllowUnrestrictedNetworking.
PolicyBuilder& SetHostname(absl::string_view hostname);
// Enables/disables stack trace collection on violations.
PolicyBuilder& CollectStacktracesOnViolation(bool enable);
// Enables/disables stack trace collection on signals (e.g. crashes / killed
// from a signal).
PolicyBuilder& CollectStacktracesOnSignal(bool enable);
// Enables/disables stack trace collection on hitting a timeout.
PolicyBuilder& CollectStacktracesOnTimeout(bool enable);
// Enables/disables stack trace collection on getting killed by the sandbox
// monitor / the user.
PolicyBuilder& CollectStacktracesOnKill(bool enable);
// Appends an unconditional ALLOW action for all syscalls.
// Do not use in environment with untrusted code and/or data, ask
// sandbox-team@ first if unsure.
PolicyBuilder& DangerDefaultAllowAll();
// Allows syscalls that are necessary for the NetworkProxyClient
PolicyBuilder& AddNetworkProxyPolicy();
// Allows syscalls that are necessary for the NetworkProxyClient and
// the NetworkProxyHandler
PolicyBuilder& AddNetworkProxyHandlerPolicy();
// Makes root of the filesystem writeable
// Not recommended
PolicyBuilder& SetRootWritable();
// Allows connections to this IP.
PolicyBuilder& AllowIPv4(const std::string& ip_and_mask, uint32_t port = 0);
PolicyBuilder& AllowIPv6(const std::string& ip_and_mask, uint32_t port = 0);
private:
friend class PolicyBuilderPeer; // For testing
friend class StackTracePeer;
// Allows a limited version of madvise
PolicyBuilder& AllowLimitedMadvise();
PolicyBuilder& SetMounts(Mounts mounts) {
mounts_ = std::move(mounts);
return *this;
}
std::vector<sock_filter> ResolveBpfFunc(BpfFunc f);
static absl::StatusOr<std::string> ValidateAbsolutePath(
absl::string_view path);
static absl::StatusOr<std::string> ValidatePath(absl::string_view path);
void StoreDescription(PolicyBuilderDescription* pb_description);
Mounts mounts_;
bool use_namespaces_ = true;
bool requires_namespaces_ = false;
bool allow_unrestricted_networking_ = false;
std::string hostname_ = std::string(kDefaultHostname);
bool collect_stacktrace_on_violation_ = true;
bool collect_stacktrace_on_signal_ = true;
bool collect_stacktrace_on_timeout_ = true;
bool collect_stacktrace_on_kill_ = false;
// Seccomp fields
std::vector<sock_filter> user_policy_;
bool user_policy_handles_bpf_ = false;
std::set<unsigned int> handled_syscalls_;
// Error handling
absl::Status last_status_;
bool already_built_ = false;
// This function returns a PolicyBuilder so that we can use it in the status
// macros
PolicyBuilder& SetError(const absl::Status& status);
// Contains list of allowed hosts.
absl::optional<AllowedHosts> allowed_hosts_;
};
} // namespace sandbox2
#endif // SANDBOXED_API_SANDBOX2_POLICYBUILDER_H_