summaryrefslogtreecommitdiff
path: root/sandbox-seccomp-filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'sandbox-seccomp-filter.c')
-rw-r--r--sandbox-seccomp-filter.c250
1 files changed, 216 insertions, 34 deletions
diff --git a/sandbox-seccomp-filter.c b/sandbox-seccomp-filter.c
index b6f6258f..e0768c06 100644
--- a/sandbox-seccomp-filter.c
+++ b/sandbox-seccomp-filter.c
@@ -42,13 +42,19 @@
#include <sys/types.h>
#include <sys/resource.h>
#include <sys/prctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <linux/net.h>
#include <linux/audit.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <elf.h>
#include <asm/unistd.h>
+#ifdef __s390__
+#include <asm/zcrypt.h>
+#endif
#include <errno.h>
#include <signal.h>
@@ -72,13 +78,57 @@
# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define ARG_LO_OFFSET 0
+# define ARG_HI_OFFSET sizeof(uint32_t)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+# define ARG_LO_OFFSET sizeof(uint32_t)
+# define ARG_HI_OFFSET 0
+#else
+#error "Unknown endianness"
+#endif
+
/* Simple helpers to avoid manual errors (but larger BPF programs). */
#define SC_DENY(_nr, _errno) \
- BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
#define SC_ALLOW(_nr) \
- BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
+#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \
+ /* load and test syscall argument, low word */ \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
+ offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
+ ((_arg_val) & 0xFFFFFFFF), 0, 3), \
+ /* load and test syscall argument, high word */ \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
+ offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
+ (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
+ /* reload syscall number; all rules expect it in accumulator */ \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
+ offsetof(struct seccomp_data, nr))
+/* Allow if syscall argument contains only values in mask */
+#define SC_ALLOW_ARG_MASK(_nr, _arg_nr, _arg_mask) \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \
+ /* load, mask and test syscall argument, low word */ \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
+ offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
+ BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~((_arg_mask) & 0xFFFFFFFF)), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 4), \
+ /* load, mask and test syscall argument, high word */ \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
+ offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
+ BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \
+ ~(((uint32_t)((uint64_t)(_arg_mask) >> 32)) & 0xFFFFFFFF)), \
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 1), \
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
+ /* reload syscall number; all rules expect it in accumulator */ \
+ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
+ offsetof(struct seccomp_data, nr))
/* Syscall filtering set for preauth. */
static const struct sock_filter preauth_insns[] = {
@@ -90,45 +140,177 @@ static const struct sock_filter preauth_insns[] = {
/* Load the syscall number for checking. */
BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
offsetof(struct seccomp_data, nr)),
- SC_DENY(open, EACCES),
- SC_DENY(stat, EACCES),
- SC_ALLOW(getpid),
- SC_ALLOW(gettimeofday),
- SC_ALLOW(clock_gettime),
-#ifdef __NR_time /* not defined on EABI ARM */
- SC_ALLOW(time),
-#endif
- SC_ALLOW(read),
- SC_ALLOW(write),
- SC_ALLOW(close),
-#ifdef __NR_shutdown /* not defined on archs that go via socketcall(2) */
- SC_ALLOW(shutdown),
-#endif
- SC_ALLOW(brk),
- SC_ALLOW(poll),
-#ifdef __NR__newselect
- SC_ALLOW(_newselect),
-#else
- SC_ALLOW(select),
+
+ /* Syscalls to non-fatally deny */
+#ifdef __NR_lstat
+ SC_DENY(__NR_lstat, EACCES),
+#endif
+#ifdef __NR_lstat64
+ SC_DENY(__NR_lstat64, EACCES),
+#endif
+#ifdef __NR_fstat
+ SC_DENY(__NR_fstat, EACCES),
+#endif
+#ifdef __NR_fstat64
+ SC_DENY(__NR_fstat64, EACCES),
+#endif
+#ifdef __NR_open
+ SC_DENY(__NR_open, EACCES),
+#endif
+#ifdef __NR_openat
+ SC_DENY(__NR_openat, EACCES),
+#endif
+#ifdef __NR_newfstatat
+ SC_DENY(__NR_newfstatat, EACCES),
+#endif
+#ifdef __NR_stat
+ SC_DENY(__NR_stat, EACCES),
+#endif
+#ifdef __NR_stat64
+ SC_DENY(__NR_stat64, EACCES),
+#endif
+#ifdef __NR_shmget
+ SC_DENY(__NR_shmget, EACCES),
+#endif
+#ifdef __NR_shmat
+ SC_DENY(__NR_shmat, EACCES),
+#endif
+#ifdef __NR_shmdt
+ SC_DENY(__NR_shmdt, EACCES),
+#endif
+#ifdef __NR_ipc
+ SC_DENY(__NR_ipc, EACCES),
+#endif
+
+ /* Syscalls to permit */
+#ifdef __NR_brk
+ SC_ALLOW(__NR_brk),
+#endif
+#ifdef __NR_clock_gettime
+ SC_ALLOW(__NR_clock_gettime),
+#endif
+#ifdef __NR_clock_gettime64
+ SC_ALLOW(__NR_clock_gettime64),
+#endif
+#ifdef __NR_close
+ SC_ALLOW(__NR_close),
+#endif
+#ifdef __NR_exit
+ SC_ALLOW(__NR_exit),
+#endif
+#ifdef __NR_exit_group
+ SC_ALLOW(__NR_exit_group),
+#endif
+#ifdef __NR_futex
+ SC_ALLOW(__NR_futex),
+#endif
+#ifdef __NR_geteuid
+ SC_ALLOW(__NR_geteuid),
+#endif
+#ifdef __NR_geteuid32
+ SC_ALLOW(__NR_geteuid32),
+#endif
+#ifdef __NR_getpgid
+ SC_ALLOW(__NR_getpgid),
#endif
- SC_ALLOW(madvise),
-#ifdef __NR_mmap2 /* EABI ARM only has mmap2() */
- SC_ALLOW(mmap2),
+#ifdef __NR_getpid
+ SC_ALLOW(__NR_getpid),
+#endif
+#ifdef __NR_getrandom
+ SC_ALLOW(__NR_getrandom),
+#endif
+#ifdef __NR_gettimeofday
+ SC_ALLOW(__NR_gettimeofday),
+#endif
+#ifdef __NR_getuid
+ SC_ALLOW(__NR_getuid),
+#endif
+#ifdef __NR_getuid32
+ SC_ALLOW(__NR_getuid32),
+#endif
+#ifdef __NR_madvise
+ SC_ALLOW(__NR_madvise),
#endif
#ifdef __NR_mmap
- SC_ALLOW(mmap),
+ SC_ALLOW_ARG_MASK(__NR_mmap, 2, PROT_READ|PROT_WRITE|PROT_NONE),
+#endif
+#ifdef __NR_mmap2
+ SC_ALLOW_ARG_MASK(__NR_mmap2, 2, PROT_READ|PROT_WRITE|PROT_NONE),
+#endif
+#ifdef __NR_mprotect
+ SC_ALLOW_ARG_MASK(__NR_mprotect, 2, PROT_READ|PROT_WRITE|PROT_NONE),
+#endif
+#ifdef __NR_mremap
+ SC_ALLOW(__NR_mremap),
+#endif
+#ifdef __NR_munmap
+ SC_ALLOW(__NR_munmap),
+#endif
+#ifdef __NR_nanosleep
+ SC_ALLOW(__NR_nanosleep),
+#endif
+#ifdef __NR_clock_nanosleep
+ SC_ALLOW(__NR_clock_nanosleep),
+#endif
+#ifdef __NR_clock_nanosleep_time64
+ SC_ALLOW(__NR_clock_nanosleep_time64),
+#endif
+#ifdef __NR_clock_gettime64
+ SC_ALLOW(__NR_clock_gettime64),
+#endif
+#ifdef __NR__newselect
+ SC_ALLOW(__NR__newselect),
#endif
-#ifdef __dietlibc__
- SC_ALLOW(mremap),
- SC_ALLOW(exit),
+#ifdef __NR_poll
+ SC_ALLOW(__NR_poll),
+#endif
+#ifdef __NR_pselect6
+ SC_ALLOW(__NR_pselect6),
+#endif
+#ifdef __NR_read
+ SC_ALLOW(__NR_read),
#endif
- SC_ALLOW(munmap),
- SC_ALLOW(exit_group),
#ifdef __NR_rt_sigprocmask
- SC_ALLOW(rt_sigprocmask),
-#else
- SC_ALLOW(sigprocmask),
+ SC_ALLOW(__NR_rt_sigprocmask),
+#endif
+#ifdef __NR_select
+ SC_ALLOW(__NR_select),
+#endif
+#ifdef __NR_shutdown
+ SC_ALLOW(__NR_shutdown),
+#endif
+#ifdef __NR_sigprocmask
+ SC_ALLOW(__NR_sigprocmask),
+#endif
+#ifdef __NR_time
+ SC_ALLOW(__NR_time),
#endif
+#ifdef __NR_write
+ SC_ALLOW(__NR_write),
+#endif
+#ifdef __NR_socketcall
+ SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN),
+ SC_DENY(__NR_socketcall, EACCES),
+#endif
+#if defined(__NR_ioctl) && defined(__s390__)
+ /* Allow ioctls for ICA crypto card on s390 */
+ SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK),
+ SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO),
+ SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT),
+ SC_ALLOW_ARG(__NR_ioctl, 1, ZSECSENDCPRB),
+ /* Allow ioctls for EP11 crypto card on s390 */
+ SC_ALLOW_ARG(__NR_ioctl, 1, ZSENDEP11CPRB),
+#endif
+#if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT)
+ /*
+ * On Linux x32, the clock_gettime VDSO falls back to the
+ * x86-64 syscall under some circumstances, e.g.
+ * https://bugs.debian.org/849923
+ */
+ SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT),
+#endif
+
+ /* Default deny */
BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
};