// Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Implementation of the sandbox2::Policy class. #include "sandboxed_api/sandbox2/policy.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "absl/flags/flag.h" #include "absl/log/log.h" #include "absl/strings/string_view.h" #include "sandboxed_api/config.h" #include "sandboxed_api/sandbox2/bpfdisassembler.h" #include "sandboxed_api/sandbox2/comms.h" #include "sandboxed_api/sandbox2/syscall.h" #include "sandboxed_api/sandbox2/util/bpf_helper.h" #include "sandboxed_api/util/raw_logging.h" #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) #endif #ifndef SECCOMP_RET_USER_NOTIF #define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */ #endif #define DO_USER_NOTIF BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF) ABSL_FLAG(bool, sandbox2_danger_danger_permit_all, false, "Allow all syscalls, useful for testing"); ABSL_FLAG(std::string, sandbox2_danger_danger_permit_all_and_log, "", "Allow all syscalls and log them into specified file"); namespace sandbox2 { // The final policy is the concatenation of: // 1. default policy (GetDefaultPolicy, private), // 2. user policy (user_policy_, public), // 3. default KILL action (avoid failing open if user policy did not do it). std::vector Policy::GetPolicy(bool user_notif) const { if (absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all) || !absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all_and_log).empty()) { return GetTrackingPolicy(); } // Now we can start building the policy. // 1. Start with the default policy (e.g. syscall architecture checks). auto policy = GetDefaultPolicy(user_notif); VLOG(3) << "Default policy:\n" << bpf::Disasm(policy); // 2. Append user policy. VLOG(3) << "User policy:\n" << bpf::Disasm(user_policy_); // Add default syscall_nr loading in case the user forgets. policy.push_back(LOAD_SYSCALL_NR); policy.insert(policy.end(), user_policy_.begin(), user_policy_.end()); // 3. Finish with default KILL action. policy.push_back(KILL); // In seccomp_unotify mode replace all KILLS with unotify if (user_notif) { for (sock_filter& filter : policy) { if (filter.code == BPF_RET + BPF_K && filter.k == SECCOMP_RET_KILL) { filter = DO_USER_NOTIF; } } } VLOG(2) << "Final policy:\n" << bpf::Disasm(policy); return policy; } // If you modify this function, you should also modify. // Monitor::LogAccessViolation to keep them in sync. // // Produces a policy which returns SECCOMP_RET_TRACE instead of SECCOMP_RET_KILL // for the __NR_execve syscall, so the tracer can make a decision to allow or // disallow it depending on which occurrence of __NR_execve it was. // LINT.IfChange std::vector Policy::GetDefaultPolicy(bool user_notif) const { bpf_labels l = {0}; std::vector policy; if (user_notif) { policy = { // If compiled arch is different from the runtime one, inform the // Monitor. LOAD_ARCH, JNE32(Syscall::GetHostAuditArch(), DENY), LOAD_SYSCALL_NR, // TODO(b/271400371) Use NOTIF_FLAG_CONTINUE once generally available JNE32(__NR_seccomp, JUMP(&l, past_seccomp_l)), ARG_32(3), JNE32(internal::kExecveMagic, JUMP(&l, past_seccomp_l)), ALLOW, LABEL(&l, past_seccomp_l), LOAD_SYSCALL_NR, JNE32(__NR_execveat, JUMP(&l, past_execveat_l)), ARG_32(4), JNE32(AT_EMPTY_PATH, JUMP(&l, past_execveat_l)), ARG_32(5), JNE32(internal::kExecveMagic, JUMP(&l, past_execveat_l)), ALLOW, LABEL(&l, past_execveat_l), LOAD_SYSCALL_NR, }; } else { policy = { // If compiled arch is different from the runtime one, inform the Monitor. LOAD_ARCH, JEQ32(Syscall::GetHostAuditArch(), JUMP(&l, past_arch_check_l)), #if defined(SAPI_X86_64) JEQ32(AUDIT_ARCH_I386, TRACE(sapi::cpu::kX86)), // 32-bit sandboxee #endif TRACE(sapi::cpu::kUnknown), LABEL(&l, past_arch_check_l), // After the policy is uploaded, forkserver will execve the sandboxee. We // need to allow this execve but not others. Since BPF does not have // state, we need to inform the Monitor to decide, and for that we use a // magic value in syscall args 5. Note that this value is not supposed to // be secret, but just an optimization so that the monitor is not // triggered on every call to execveat. LOAD_SYSCALL_NR, JNE32(__NR_execveat, JUMP(&l, past_execveat_l)), ARG_32(4), JNE32(AT_EMPTY_PATH, JUMP(&l, past_execveat_l)), ARG_32(5), JNE32(internal::kExecveMagic, JUMP(&l, past_execveat_l)), SANDBOX2_TRACE, LABEL(&l, past_execveat_l), LOAD_SYSCALL_NR, }; } // Forbid ptrace because it's unsafe or too risky. The user policy can only // block (i.e. return an error instead of killing the process) but not allow // ptrace. This uses LOAD_SYSCALL_NR from above. if (!user_policy_handles_ptrace_) { policy.insert(policy.end(), {JEQ32(__NR_ptrace, DENY)}); } // If user policy doesn't mention it, then forbid bpf because it's unsafe or // too risky. This uses LOAD_SYSCALL_NR from above. if (!user_policy_handles_bpf_) { policy.insert(policy.end(), {JEQ32(__NR_bpf, DENY)}); } #ifndef CLONE_NEWCGROUP #define CLONE_NEWCGROUP 0x02000000 #endif constexpr uintptr_t kNewNamespacesFlags = CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS | CLONE_NEWCGROUP | CLONE_NEWIPC | CLONE_NEWPID; static_assert(kNewNamespacesFlags <= std::numeric_limits::max()); constexpr uintptr_t kUnsafeCloneFlags = kNewNamespacesFlags | CLONE_UNTRACED; static_assert(kUnsafeCloneFlags <= std::numeric_limits::max()); policy.insert(policy.end(), { #ifdef __NR_clone3 // Disallow clone3 JEQ32(__NR_clone3, DENY), #endif // Disallow clone3 and clone with unsafe flags. This uses // LOAD_SYSCALL_NR from above. JNE32(__NR_clone, JUMP(&l, past_clone_unsafe_l)), // Regardless of arch, we only care about the lower 32-bits // of the flags. ARG_32(0), JA32(kUnsafeCloneFlags, DENY), LABEL(&l, past_clone_unsafe_l), // Disallow unshare with unsafe flags. LOAD_SYSCALL_NR, JNE32(__NR_unshare, JUMP(&l, past_unshare_unsafe_l)), // Regardless of arch, we only care about the lower 32-bits // of the flags. ARG_32(0), JA32(kNewNamespacesFlags, DENY), LABEL(&l, past_unshare_unsafe_l), // Disallow seccomp with SECCOMP_FILTER_FLAG_NEW_LISTENER // flag. LOAD_SYSCALL_NR, JNE32(__NR_seccomp, JUMP(&l, past_seccomp_new_listener)), // Regardless of arch, we only care about the lower 32-bits // of the flags. ARG_32(1), JA32(SECCOMP_FILTER_FLAG_NEW_LISTENER, DENY), LABEL(&l, past_seccomp_new_listener), }); if (bpf_resolve_jumps(&l, policy.data(), policy.size()) != 0) { LOG(FATAL) << "Cannot resolve bpf jumps"; } return policy; } // LINT.ThenChange(monitor_ptrace.cc) std::vector Policy::GetTrackingPolicy() const { return { LOAD_ARCH, #if defined(SAPI_X86_64) JEQ32(AUDIT_ARCH_X86_64, TRACE(sapi::cpu::kX8664)), JEQ32(AUDIT_ARCH_I386, TRACE(sapi::cpu::kX86)), #elif defined(SAPI_PPC64_LE) JEQ32(AUDIT_ARCH_PPC64LE, TRACE(sapi::cpu::kPPC64LE)), #elif defined(SAPI_ARM64) JEQ32(AUDIT_ARCH_AARCH64, TRACE(sapi::cpu::kArm64)), #elif defined(SAPI_ARM) JEQ32(AUDIT_ARCH_ARM, TRACE(sapi::cpu::kArm)), #endif TRACE(sapi::cpu::kUnknown), }; } bool Policy::SendPolicy(Comms* comms, bool user_notif) const { auto policy = GetPolicy(user_notif); if (!comms->SendBytes( reinterpret_cast(policy.data()), static_cast(policy.size()) * sizeof(sock_filter))) { LOG(ERROR) << "Couldn't send policy"; return false; } return true; } void Policy::GetPolicyDescription(PolicyDescription* policy) const { policy->set_user_bpf_policy(user_policy_.data(), user_policy_.size() * sizeof(sock_filter)); if (policy_builder_description_) { *policy->mutable_policy_builder_description() = *policy_builder_description_; } if (namespace_) { namespace_->GetNamespaceDescription( policy->mutable_namespace_description()); } } } // namespace sandbox2