// Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Implementation file for the sandbox2::Client class. #include "sandboxed_api/sandbox2/client.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // NOLINT(build/c++11) #include #include #include "absl/base/attributes.h" #include "absl/base/macros.h" #include "absl/container/flat_hash_map.h" #include "absl/status/status.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/policy.h" #include "sandboxed_api/sandbox2/sanitizer.h" #include "sandboxed_api/sandbox2/syscall.h" #include "sandboxed_api/sandbox2/util/bpf_helper.h" #include "sandboxed_api/util/raw_logging.h" #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) #endif namespace sandbox2 { namespace { void InitSeccompUnotify(sock_fprog prog, Comms* comms) { // The policy might not allow sending the notify FD. // Create a separate thread that won't get the seccomp policy to send the FD. // Synchronize with it using plain atomics + seccomp TSYNC, so we don't need // any additional syscalls. std::atomic fd(-1); std::atomic tid(-1); std::thread th([comms, &fd, &tid]() { int notify_fd = -1; while (notify_fd == -1) { notify_fd = fd.load(std::memory_order_seq_cst); } SAPI_RAW_CHECK(comms->SendFD(notify_fd), "sending unotify fd"); SAPI_RAW_CHECK(close(notify_fd) == 0, "closing unotify fd"); sock_filter filter = ALLOW; struct sock_fprog allow_prog = { .len = 1, .filter = &filter, }; int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0, reinterpret_cast(&allow_prog)); SAPI_RAW_PCHECK(result != -1, "setting seccomp filter"); tid.store(syscall(__NR_gettid), std::memory_order_seq_cst); }); th.detach(); int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_NEW_LISTENER, reinterpret_cast(&prog)); SAPI_RAW_PCHECK(result != -1, "setting seccomp filter"); fd.store(result, std::memory_order_seq_cst); pid_t child = -1; while (child == -1) { child = tid.load(std::memory_order_seq_cst); } // Apply seccomp. struct sock_filter code[] = { LOAD_ARCH, JNE32(sandbox2::Syscall::GetHostAuditArch(), ALLOW), LOAD_SYSCALL_NR, BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_seccomp, 0, 3), ARG_32(3), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, internal::kExecveMagic, 0, 1), DENY, ALLOW, }; prog.len = ABSL_ARRAYSIZE(code); prog.filter = code; do { result = syscall( __NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, reinterpret_cast(&prog), internal::kExecveMagic); } while (result == child); SAPI_RAW_CHECK(result == 0, "Enabling seccomp filter"); } void InitSeccompRegular(sock_fprog prog) { int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, reinterpret_cast(&prog)); SAPI_RAW_PCHECK(result != -1, "setting seccomp filter"); SAPI_RAW_PCHECK(result == 0, "synchronizing threads using SECCOMP_FILTER_FLAG_TSYNC flag " "for thread=%d", result); } } // namespace Client::Client(Comms* comms) : comms_(comms) { char* fdmap_envvar = getenv(kFDMapEnvVar); if (!fdmap_envvar) { return; } absl::flat_hash_map vars = absl::StrSplit(fdmap_envvar, ',', absl::SkipEmpty()); for (const auto& [name, mapped_fd] : vars) { int fd; SAPI_RAW_CHECK(absl::SimpleAtoi(mapped_fd, &fd), "failed to parse fd map"); SAPI_RAW_CHECK(fd_map_.emplace(std::string(name), fd).second, "could not insert mapping into fd map (duplicate)"); } unsetenv(kFDMapEnvVar); } std::string Client::GetFdMapEnvVar() const { return absl::StrCat(kFDMapEnvVar, "=", absl::StrJoin(fd_map_, ",", absl::PairFormatter(","))); } void Client::PrepareEnvironment(int* preserved_fd) { SetUpIPC(preserved_fd); SetUpCwd(); } void Client::EnableSandbox() { ReceivePolicy(); ApplyPolicyAndBecomeTracee(); } void Client::SandboxMeHere() { PrepareEnvironment(); EnableSandbox(); } void Client::SetUpCwd() { { // Get the current working directory to check if we are in a mount // namespace. // Note: glibc 2.27 no longer returns a relative path in that case, but // fails with ENOENT and returns a nullptr instead. The code still // needs to run on lower version for the time being. char cwd_buf[PATH_MAX + 1] = {0}; char* cwd = getcwd(cwd_buf, ABSL_ARRAYSIZE(cwd_buf)); SAPI_RAW_PCHECK(cwd != nullptr || errno == ENOENT, "no current working directory"); // Outside of the mount namespace, the path is of the form // '(unreachable)/...'. Only check for the slash, since Linux might make up // other prefixes in the future. if (errno == ENOENT || cwd_buf[0] != '/') { SAPI_RAW_VLOG(1, "chdir into mount namespace, cwd was '%s'", cwd_buf); // If we are in a mount namespace but fail to chdir, then it can lead to a // sandbox escape -- we need to fail with FATAL if the chdir fails. SAPI_RAW_PCHECK(chdir("/") != -1, "corrective chdir"); } } // Receive the user-supplied current working directory and change into it. std::string cwd; SAPI_RAW_CHECK(comms_->RecvString(&cwd), "receiving working directory"); if (!cwd.empty()) { // On the other hand this chdir can fail without a sandbox escape. It will // probably not have the intended behavior though. if (chdir(cwd.c_str()) == -1 && SAPI_RAW_VLOG_IS_ON(1)) { SAPI_RAW_PLOG( INFO, "chdir(%s) failed, falling back to previous cwd or / (with " "namespaces). Use Executor::SetCwd() to set a working directory", cwd.c_str()); } } } void Client::SetUpIPC(int* preserved_fd) { uint32_t num_of_fd_pairs; SAPI_RAW_CHECK(comms_->RecvUint32(&num_of_fd_pairs), "receiving number of fd pairs"); SAPI_RAW_CHECK(fd_map_.empty(), "fd map not empty"); SAPI_RAW_VLOG(1, "Will receive %d file descriptor pairs", num_of_fd_pairs); absl::flat_hash_map preserve_fds_map; if (preserved_fd) { preserve_fds_map.emplace(*preserved_fd, preserved_fd); } for (uint32_t i = 0; i < num_of_fd_pairs; ++i) { int32_t requested_fd; int32_t fd; std::string name; SAPI_RAW_CHECK(comms_->RecvInt32(&requested_fd), "receiving requested fd"); SAPI_RAW_CHECK(comms_->RecvFD(&fd), "receiving current fd"); SAPI_RAW_CHECK(comms_->RecvString(&name), "receiving name string"); if (auto it = preserve_fds_map.find(requested_fd); it != preserve_fds_map.end()) { int old_fd = it->first; int new_fd = dup(old_fd); SAPI_RAW_PCHECK(new_fd != -1, "Failed to duplicate preserved fd=%d", old_fd); SAPI_RAW_LOG(INFO, "Moved preserved fd=%d to %d", old_fd, new_fd); close(old_fd); int* pfd = it->second; *pfd = new_fd; preserve_fds_map.erase(it); preserve_fds_map.emplace(new_fd, pfd); } if (requested_fd == comms_->GetConnectionFD()) { comms_->MoveToAnotherFd(); SAPI_RAW_LOG(INFO, "Trying to map over comms fd (%d). Remapped comms to %d", requested_fd, comms_->GetConnectionFD()); } if (requested_fd != -1 && fd != requested_fd) { if (requested_fd > STDERR_FILENO && fcntl(requested_fd, F_GETFD) != -1) { // Dup2 will silently close the FD if one is already at requested_fd. // If someone is using the deferred sandbox entry, ie. SandboxMeHere, // the application might have something actually using that fd. // Therefore let's log a big warning if that FD is already in use. // Note: this check doesn't happen for STDIN,STDOUT,STDERR. SAPI_RAW_LOG( WARNING, "Cloning received fd %d over %d which is already open and will " "be silently closed. This may lead to unexpected behavior!", fd, requested_fd); } SAPI_RAW_VLOG(1, "Cloning received fd=%d onto fd=%d", fd, requested_fd); SAPI_RAW_PCHECK(dup2(fd, requested_fd) != -1, ""); // Close the newly received FD if it differs from the new one. close(fd); fd = requested_fd; } if (!name.empty()) { SAPI_RAW_CHECK(fd_map_.emplace(name, fd).second, "duplicate fd mapping"); } } } void Client::ReceivePolicy() { std::vector bytes; SAPI_RAW_CHECK(comms_->RecvBytes(&bytes), "receive bytes"); policy_ = std::move(bytes); } void Client::ApplyPolicyAndBecomeTracee() { // When running under *SAN, we need to notify *SANs background thread that we // want it to exit and wait for it to be done. When not running under *SAN, // this function does nothing. sanitizer::WaitForSanitizer(); // Creds can be received w/o synchronization, once the connection is // established. pid_t cred_pid; uid_t cred_uid ABSL_ATTRIBUTE_UNUSED; gid_t cred_gid ABSL_ATTRIBUTE_UNUSED; SAPI_RAW_CHECK(comms_->RecvCreds(&cred_pid, &cred_uid, &cred_gid), "receiving credentials"); SAPI_RAW_CHECK(prctl(PR_SET_DUMPABLE, 1) == 0, "setting PR_SET_DUMPABLE flag"); if (prctl(PR_SET_PTRACER, cred_pid) == -1) { SAPI_RAW_VLOG(1, "No YAMA on this system. Continuing"); } SAPI_RAW_CHECK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0, "setting PR_SET_NO_NEW_PRIVS flag"); SAPI_RAW_CHECK(prctl(PR_SET_KEEPCAPS, 0) == 0, "setting PR_SET_KEEPCAPS flag"); sock_fprog prog; SAPI_RAW_CHECK(policy_.size() / sizeof(sock_filter) <= std::numeric_limits::max(), "seccomp policy too long"); prog.len = static_cast(policy_.size() / sizeof(sock_filter)); prog.filter = reinterpret_cast(&policy_.front()); SAPI_RAW_VLOG(1, "Applying policy in PID %zd, sock_fprog.len: %" PRId16 " entries (%" PRIuPTR " bytes)", syscall(__NR_gettid), prog.len, policy_.size()); // Signal executor we are ready to have limits applied on us and be ptraced. // We want limits at the last moment to avoid triggering them too early and we // want ptrace at the last moment to avoid synchronization deadlocks. SAPI_RAW_CHECK(comms_->SendUint32(kClient2SandboxReady), "receiving ready signal from executor"); uint32_t ret; // wait for confirmation SAPI_RAW_CHECK(comms_->RecvUint32(&ret), "receving confirmation from executor"); if (ret == kSandbox2ClientUnotify) { InitSeccompUnotify(prog, comms_); } else { SAPI_RAW_CHECK(ret == kSandbox2ClientDone, "invalid confirmation from executor"); InitSeccompRegular(prog); } } int Client::GetMappedFD(const std::string& name) { auto it = fd_map_.find(name); SAPI_RAW_CHECK(it != fd_map_.end(), "mapped fd not found (function called twice?)"); int fd = it->second; fd_map_.erase(it); return fd; } bool Client::HasMappedFD(const std::string& name) { return fd_map_.find(name) != fd_map_.end(); } void Client::SendLogsToSupervisor() { // This LogSink will register itself and send all logs to the executor until // the object is destroyed. logsink_ = std::make_unique(GetMappedFD(LogSink::kLogFDName)); } NetworkProxyClient* Client::GetNetworkProxyClient() { if (proxy_client_ == nullptr) { proxy_client_ = std::make_unique( GetMappedFD(NetworkProxyClient::kFDName)); } return proxy_client_.get(); } absl::Status Client::InstallNetworkProxyHandler() { if (fd_map_.find(NetworkProxyClient::kFDName) == fd_map_.end()) { return absl::FailedPreconditionError( "InstallNetworkProxyHandler() must be called at most once after the " "sandbox is installed. Also, the NetworkProxyServer needs to be " "enabled."); } return NetworkProxyHandler::InstallNetworkProxyHandler( GetNetworkProxyClient()); } } // namespace sandbox2