Added more descriptive Syscall argument types, and an API for introspecting arguments.

PiperOrigin-RevId: 612904089
Change-Id: Ia0ef7b0559f7eed923981b13fa8224bc891e8c37
This commit is contained in:
Sandboxed API Team 2024-03-05 11:06:15 -08:00 committed by Copybara-Service
parent 1f390c279e
commit c6bab97690
8 changed files with 1855 additions and 1521 deletions

View File

@ -136,6 +136,7 @@ cc_library(
deps = [
":util",
"//sandboxed_api:config",
"//sandboxed_api/util:status",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",
@ -799,6 +800,8 @@ cc_library(
"//sandboxed_api/util:file_helpers",
"//sandboxed_api/util:fileops",
"//sandboxed_api/util:raw_logging",
"//sandboxed_api/util:status",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/base:core_headers",
"@com_google_absl//absl/status",
"@com_google_absl//absl/status:statusor",

View File

@ -90,6 +90,7 @@ target_link_libraries(sandbox2_syscall
absl::strings
sandbox2::util
sapi::base
sapi::status
PUBLIC absl::log
)
@ -687,7 +688,8 @@ add_library(sandbox2_util ${SAPI_LIB_TYPE}
)
add_library(sandbox2::util ALIAS sandbox2_util)
target_link_libraries(sandbox2_util
PRIVATE absl::core_headers
PRIVATE absl::algorithm_container
absl::core_headers
absl::str_format
absl::strings
sapi::config
@ -696,6 +698,7 @@ target_link_libraries(sandbox2_util
sapi::fileops
sapi::base
sapi::raw_logging
sapi::status
PUBLIC absl::status
absl::statusor
)

View File

@ -74,6 +74,10 @@ std::string Syscall::GetName() const {
return absl::StrFormat("UNKNOWN[%d/0x%x]", nr_, nr_);
}
std::vector<syscalls::ArgData> Syscall::GetArgumentsData() const {
return SyscallTable::get(arch_).GetArgumentsData(nr_, args_.data(), pid_);
}
std::vector<std::string> Syscall::GetArgumentsDescription() const {
return SyscallTable::get(arch_).GetArgumentsDescription(nr_, args_.data(),
pid_);

View File

@ -27,6 +27,7 @@
#include <vector>
#include "sandboxed_api/config.h" // IWYU pragma: export
#include "sandboxed_api/sandbox2/syscall_defs.h"
namespace sandbox2 {
@ -59,7 +60,7 @@ class Syscall {
uint64_t instruction_pointer() const { return ip_; }
std::string GetName() const;
std::vector<syscalls::ArgData> GetArgumentsData() const;
std::vector<std::string> GetArgumentsDescription() const;
std::string GetDescription() const;

File diff suppressed because it is too large Load Diff

View File

@ -4,36 +4,200 @@
#include <sys/types.h>
#include <array>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <string>
#include <type_traits>
#include <vector>
#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/types/span.h"
#include "sandboxed_api/config.h"
#include "sandboxed_api/sandbox2/syscall.h"
#include "sandboxed_api/sandbox2/util.h"
#include "sandboxed_api/util/status_macros.h"
namespace sandbox2 {
namespace syscalls {
constexpr int kMaxArgs = 6;
// Type of a given syscall argument. Used with argument conversion routines.
enum ArgType {
kGen = 1,
kInt,
kPath,
kHex,
kOct,
kSocketCall,
kSocketCallPtr,
kSignal,
kString,
kGen,
kStruct,
kPtr,
kArray,
kInt,
kFlags,
kResource,
kPid,
kSignal,
kIpcResource,
kSharedAddress,
kGid,
kUid,
// These kLenN types indicate that the argument is a length, and which
// other argument it applies to. I.e., if parameter 3 is type kPollFdArray,
// and parameter 4 has the length, then parameter 4 is of type kLen3.
kLen0,
kLen1,
kLen2,
kLen3,
kLen4,
kLen5,
kAddressFamily,
kGidArray,
kPollFdArray,
kSockaddr,
kSockmsghdr,
kCloneFlag,
};
constexpr absl::string_view ArgTypeToString(ArgType type) {
switch (type) {
case kPath:
return "path";
case kString:
return "string";
case kGen:
return "generic type";
case kStruct:
return "struct";
case kPtr:
return "pointer";
case kArray:
return "array";
case kInt:
return "int";
case kFlags:
return "flags";
case kResource:
return "resource";
case kPid:
return "pid";
case kSignal:
return "signal";
case kIpcResource:
return "ipc resource";
case kSharedAddress:
return "shared address";
case kGid:
return "gid";
case kUid:
return "uid";
case kLen0:
return "length of parameter 0";
case kLen1:
return "length of parameter 1";
case kLen2:
return "length of parameter 2";
case kLen3:
return "length of parameter 3";
case kLen4:
return "length of parameter 4";
case kLen5:
return "length of parameter 5";
case kAddressFamily:
return "address family";
case kGidArray:
return "gid array";
case kPollFdArray:
return "poll fd array";
case kSockaddr:
return "sockaddr struct";
default:
return "invalid type";
}
}
class ArgData {
public:
template <typename T>
struct StructArray {
std::vector<T> array;
bool truncated;
};
ArgData(syscalls::ArgType type, pid_t pid, uint64_t value,
std::optional<uint64_t> length = std::nullopt)
: type_(type), pid_(pid), value_(value), length_(length) {}
ArgType type() const { return type_; }
pid_t pid() const { return pid_; }
uint64_t value() const { return value_; }
std::optional<uint64_t> length() const { return length_; }
std::string GetDescription() const;
absl::StatusOr<std::string> ReadAsString() const {
return util::ReadCPathFromPid(pid_, value_);
}
template <typename T>
absl::StatusOr<T> ReadAsStruct() const {
if (length_.has_value() && *length_ < sizeof(T)) {
return absl::InternalError(absl::StrFormat(
"specified length [%llu] is not enough for to sizeof(%s) == %llu",
*length_, typeid(T).name(), sizeof(T)));
}
SAPI_ASSIGN_OR_RETURN(std::vector<uint8_t> b,
util::ReadBytesFromPid(pid_, value_, sizeof(T)));
return BytesToStruct<T>(b);
}
template <typename T>
absl::StatusOr<StructArray<T>> ReadAsStructArray() const {
static uint64_t kMaxAllowedBytes = 1 << 20; // 1MB
if (!length_.has_value()) {
return absl::InternalError("length is not set");
}
bool truncated = false;
uint64_t length = *length_ * sizeof(T);
if (length > kMaxAllowedBytes) {
truncated = true;
length = (kMaxAllowedBytes / sizeof(T)) * sizeof(T);
}
SAPI_ASSIGN_OR_RETURN(std::vector<uint8_t> b,
util::ReadBytesFromPid(pid_, value_, length));
absl::Span<const uint8_t> bytes = absl::MakeSpan(b);
if (bytes.size() < length) {
return absl::InternalError("could not read full struct array");
}
std::vector<T> ret;
for (size_t i = 0; i < bytes.size(); i += sizeof(T)) {
SAPI_ASSIGN_OR_RETURN(T t, BytesToStruct<T>(bytes.subspan(i, sizeof(T))));
ret.push_back(t);
}
return StructArray<T>{std::move(ret), truncated};
}
private:
template <typename T>
static absl::StatusOr<T> BytesToStruct(absl::Span<const uint8_t> bytes) {
static_assert(std::is_pod<T>(), "Can only cast bytes to POD structs");
if (bytes.size() < sizeof(T)) {
return absl::InternalError(absl::StrFormat(
"bytes size [%llu] is not equal to sizeof(%s) == %llu", bytes.size(),
typeid(T).name(), sizeof(T)));
}
T t;
memcpy(&t, bytes.data(), sizeof(T));
return t;
}
absl::StatusOr<std::string> GetDescriptionImpl() const;
syscalls::ArgType type_;
pid_t pid_;
uint64_t value_;
std::optional<uint64_t> length_;
};
} // namespace syscalls
@ -50,10 +214,6 @@ class SyscallTable {
return num_args;
}
static std::string GetArgumentDescription(uint64_t value,
syscalls::ArgType type,
pid_t pid);
static constexpr bool BySyscallNr(const SyscallTable::Entry& a,
const SyscallTable::Entry& b) {
return a.nr < b.nr;
@ -72,6 +232,10 @@ class SyscallTable {
absl::string_view GetName(int syscall) const;
std::vector<syscalls::ArgData> GetArgumentsData(int syscall,
const uint64_t values[],
pid_t pid) const;
std::vector<std::string> GetArgumentsDescription(int syscall,
const uint64_t values[],
pid_t pid) const;

View File

@ -14,15 +14,18 @@
#include "sandboxed_api/sandbox2/util.h"
#include <linux/limits.h>
#include <sched.h>
#include <spawn.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <syscall.h>
#include <unistd.h>
#include <algorithm>
#include <cerrno>
#include <csetjmp>
#include <cstddef>
@ -33,6 +36,7 @@
#include <utility>
#include <vector>
#include "absl/algorithm/container.h"
#include "absl/base/attributes.h"
#include "absl/base/macros.h"
#include "absl/base/optimization.h"
@ -51,7 +55,7 @@
#include "sandboxed_api/util/fileops.h"
#include "sandboxed_api/util/path.h"
#include "sandboxed_api/util/raw_logging.h"
#include "sandboxed_api/util/status_macros.h"
namespace sandbox2::util {
namespace file = ::sapi::file;
@ -137,6 +141,19 @@ std::string GetProgName(pid_t pid) {
return file_util::fileops::Basename(file_util::fileops::ReadLink(fname));
}
absl::StatusOr<std::string> GetResolvedFdLink(pid_t pid, uint32_t fd) {
// The proc/PID/fd directory contains links for all of that process' file
// descriptors. They'll show up as more informative strings (paths, sockets).
std::string fd_path = absl::StrFormat("/proc/%u/fd/%u", pid, fd);
std::string result(PATH_MAX, '\0');
ssize_t size = readlink(fd_path.c_str(), &result[0], PATH_MAX);
if (size < 0) {
return absl::ErrnoToStatus(size, "failed to read link");
}
result.resize(size);
return result;
}
std::string GetCmdLine(pid_t pid) {
std::string fname = file::JoinPath("/proc", absl::StrCat(pid), "cmdline");
std::string cmdline;
@ -326,6 +343,29 @@ std::string GetSignalName(int signo) {
return absl::StrFormat("%s [%d]", kSignalNames[signo], signo);
}
std::string GetAddressFamily(int addr_family) {
// Taken from definitions in `socket.h`. Each family's index in the array is
// also its integer value.
constexpr absl::string_view kAddressFamilies[] = {
"AF_UNSPEC", "AF_UNIX", "AF_INET", "AF_AX25",
"AF_IPX", "AF_APPLETALK", "AF_NETROM", "AF_BRIDGE",
"AF_ATMPVC", "AF_X25", "AF_INET6", "AF_ROSE",
"AF_DECnet", "AF_NETBEUI", "AF_SECURITY", "AF_KEY",
"AF_NETLINK", "AF_PACKET", "AF_ASH", "AF_ECONET",
"AF_ATMSVC", "AF_RDS", "AF_SNA", "AF_IRDA",
"AF_PPPOX", "AF_WANPIPE", "AF_LLC", "AF_IB",
"AF_MPLS", "AF_CAN", "AF_TIPC", "AF_BLUETOOTH",
"AF_IUCV", "AF_RXRPC", "AF_ISDN", "AF_PHONET",
"AF_IEEE802154", "AF_CAIF", "AF_ALG", "AF_NFC",
"AF_VSOCK", "AF_KCM", "AF_QIPCRTR", "AF_SMC",
"AF_XDP", "AF_MCTP"};
if (addr_family < 0 && addr_family >= ABSL_ARRAYSIZE(kAddressFamilies)) {
return absl::StrFormat("UNKNOWN_ADDRESS_FAMILY [%d]", addr_family);
}
return std::string(kAddressFamilies[addr_family]);
}
std::string GetRlimitName(int resource) {
switch (resource) {
case RLIMIT_AS:
@ -370,45 +410,53 @@ std::string GetPtraceEventName(int event) {
}
}
absl::StatusOr<std::string> ReadCPathFromPid(pid_t pid, uintptr_t ptr) {
std::string path(PATH_MAX, '\0');
iovec local_iov[] = {{&path[0], path.size()}};
absl::StatusOr<std::vector<uint8_t>> ReadBytesFromPid(pid_t pid, uintptr_t ptr,
uint64_t size) {
static const uintptr_t page_size = getpagesize();
static const uintptr_t page_mask = ~(page_size - 1);
// See 'man process_vm_readv' for details on how to read NUL-terminated
// strings with this syscall.
size_t len1 = ((ptr + page_size) & page_mask) - ptr;
len1 = (len1 > path.size()) ? path.size() : len1;
size_t len2 = (path.size() <= len1) ? 0UL : path.size() - len1;
// Second iov is wrapping around to NULL ptr.
if ((ptr + len1) < ptr) {
len2 = 0UL;
static const uintptr_t page_mask = page_size - 1;
// Input sanity checks.
if (size == 0) {
return std::vector<uint8_t>();
}
iovec remote_iov[] = {
{reinterpret_cast<void*>(ptr), len1},
{reinterpret_cast<void*>(ptr + len1), len2},
};
// Allocate enough bytes to hold the entire size.
std::vector<uint8_t> bytes(size, 0);
iovec local_iov[] = {{bytes.data(), bytes.size()}};
// Stores all the necessary iovecs to move memory.
std::vector<iovec> remote_iov;
// Each iovec should be contained to a single page.
size_t consumed = 0;
while (consumed < size) {
// Read till the end of the page, at most the remaining number of bytes.
size_t chunk_size =
std::min(size - consumed, page_size - ((ptr + consumed) & page_mask));
remote_iov.push_back({reinterpret_cast<void*>(ptr + consumed), chunk_size});
consumed += chunk_size;
}
SAPI_RAW_VLOG(4, "ReadCPathFromPid (iovec): len1: %zu, len2: %zu", len1,
len2);
if (process_vm_readv(pid, local_iov, ABSL_ARRAYSIZE(local_iov), remote_iov,
ABSL_ARRAYSIZE(remote_iov), 0) < 0) {
ssize_t result = process_vm_readv(pid, local_iov, ABSL_ARRAYSIZE(local_iov),
remote_iov.data(), remote_iov.size(), 0);
if (result < 0) {
return absl::ErrnoToStatus(
errno,
absl::StrFormat("process_vm_readv() failed for PID: %d at address: %#x",
pid, reinterpret_cast<uintptr_t>(ptr)));
pid, ptr));
}
// Ensure only successfully read bytes are returned.
bytes.resize(result);
return bytes;
}
// Check for whether there's a NUL byte in the buffer. If not, it's an
// incorrect path (or >PATH_MAX).
auto pos = path.find('\0');
if (pos == std::string::npos) {
return absl::FailedPreconditionError(absl::StrCat(
"No NUL-byte inside the C string '", absl::CHexEscape(path), "'"));
absl::StatusOr<std::string> ReadCPathFromPid(pid_t pid, uintptr_t ptr) {
SAPI_ASSIGN_OR_RETURN(std::vector<uint8_t> bytes,
ReadBytesFromPid(pid, ptr, PATH_MAX));
auto null_pos = absl::c_find(bytes, '\0');
std::string path(bytes.begin(), null_pos);
if (null_pos == bytes.end()) {
return absl::FailedPreconditionError(
absl::StrFormat("path '%s' is too long", absl::CHexEscape(path)));
}
path.resize(pos);
return path;
}

View File

@ -63,6 +63,9 @@ inline void CharPtrArrToVecString(char* const* arr,
// Returns the program name (via /proc/self/comm) for a given PID.
std::string GetProgName(pid_t pid);
// Given a resource descriptor FD and a PID, returns link of /proc/PID/fds/FD.
absl::StatusOr<std::string> GetResolvedFdLink(pid_t pid, uint32_t fd);
// Returns the command line (via /proc/self/cmdline) for a given PID. The
// argument separators '\0' are converted to spaces.
std::string GetCmdLine(pid_t pid);
@ -95,12 +98,19 @@ absl::StatusOr<int> Communicate(const std::vector<std::string>& argv,
// Returns signal description.
std::string GetSignalName(int signo);
// Returns the socket address family as a string ("AF_INET", ...)
std::string GetAddressFamily(int addr_family);
// Returns rlimit resource name
std::string GetRlimitName(int resource);
// Returns ptrace event name
std::string GetPtraceEventName(int event);
// Reads `size` bytes from the given `ptr` address, or returns an error.
absl::StatusOr<std::vector<uint8_t>> ReadBytesFromPid(pid_t pid, uintptr_t ptr,
uint64_t size);
// Reads a path string (NUL-terminated, shorter than PATH_MAX) from another
// process memory
absl::StatusOr<std::string> ReadCPathFromPid(pid_t pid, uintptr_t ptr);