diff options
Diffstat (limited to 'sandbox-seccomp-filter.c')
-rw-r--r-- | sandbox-seccomp-filter.c | 250 |
1 files changed, 216 insertions, 34 deletions
diff --git a/sandbox-seccomp-filter.c b/sandbox-seccomp-filter.c index b6f6258f..e0768c06 100644 --- a/sandbox-seccomp-filter.c +++ b/sandbox-seccomp-filter.c @@ -42,13 +42,19 @@ #include <sys/types.h> #include <sys/resource.h> #include <sys/prctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <linux/net.h> #include <linux/audit.h> #include <linux/filter.h> #include <linux/seccomp.h> #include <elf.h> #include <asm/unistd.h> +#ifdef __s390__ +#include <asm/zcrypt.h> +#endif #include <errno.h> #include <signal.h> @@ -72,13 +78,57 @@ # define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define ARG_LO_OFFSET 0 +# define ARG_HI_OFFSET sizeof(uint32_t) +#elif __BYTE_ORDER == __BIG_ENDIAN +# define ARG_LO_OFFSET sizeof(uint32_t) +# define ARG_HI_OFFSET 0 +#else +#error "Unknown endianness" +#endif + /* Simple helpers to avoid manual errors (but larger BPF programs). */ #define SC_DENY(_nr, _errno) \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno)) #define SC_ALLOW(_nr) \ - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) +#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \ + /* load and test syscall argument, low word */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ + ((_arg_val) & 0xFFFFFFFF), 0, 3), \ + /* load and test syscall argument, high word */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ + (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ + /* reload syscall number; all rules expect it in accumulator */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, nr)) +/* Allow if syscall argument contains only values in mask */ +#define SC_ALLOW_ARG_MASK(_nr, _arg_nr, _arg_mask) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \ + /* load, mask and test syscall argument, low word */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \ + BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~((_arg_mask) & 0xFFFFFFFF)), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 4), \ + /* load, mask and test syscall argument, high word */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \ + BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \ + ~(((uint32_t)((uint64_t)(_arg_mask) >> 32)) & 0xFFFFFFFF)), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 1), \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ + /* reload syscall number; all rules expect it in accumulator */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, nr)) /* Syscall filtering set for preauth. */ static const struct sock_filter preauth_insns[] = { @@ -90,45 +140,177 @@ static const struct sock_filter preauth_insns[] = { /* Load the syscall number for checking. */ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)), - SC_DENY(open, EACCES), - SC_DENY(stat, EACCES), - SC_ALLOW(getpid), - SC_ALLOW(gettimeofday), - SC_ALLOW(clock_gettime), -#ifdef __NR_time /* not defined on EABI ARM */ - SC_ALLOW(time), -#endif - SC_ALLOW(read), - SC_ALLOW(write), - SC_ALLOW(close), -#ifdef __NR_shutdown /* not defined on archs that go via socketcall(2) */ - SC_ALLOW(shutdown), -#endif - SC_ALLOW(brk), - SC_ALLOW(poll), -#ifdef __NR__newselect - SC_ALLOW(_newselect), -#else - SC_ALLOW(select), + + /* Syscalls to non-fatally deny */ +#ifdef __NR_lstat + SC_DENY(__NR_lstat, EACCES), +#endif +#ifdef __NR_lstat64 + SC_DENY(__NR_lstat64, EACCES), +#endif +#ifdef __NR_fstat + SC_DENY(__NR_fstat, EACCES), +#endif +#ifdef __NR_fstat64 + SC_DENY(__NR_fstat64, EACCES), +#endif +#ifdef __NR_open + SC_DENY(__NR_open, EACCES), +#endif +#ifdef __NR_openat + SC_DENY(__NR_openat, EACCES), +#endif +#ifdef __NR_newfstatat + SC_DENY(__NR_newfstatat, EACCES), +#endif +#ifdef __NR_stat + SC_DENY(__NR_stat, EACCES), +#endif +#ifdef __NR_stat64 + SC_DENY(__NR_stat64, EACCES), +#endif +#ifdef __NR_shmget + SC_DENY(__NR_shmget, EACCES), +#endif +#ifdef __NR_shmat + SC_DENY(__NR_shmat, EACCES), +#endif +#ifdef __NR_shmdt + SC_DENY(__NR_shmdt, EACCES), +#endif +#ifdef __NR_ipc + SC_DENY(__NR_ipc, EACCES), +#endif + + /* Syscalls to permit */ +#ifdef __NR_brk + SC_ALLOW(__NR_brk), +#endif +#ifdef __NR_clock_gettime + SC_ALLOW(__NR_clock_gettime), +#endif +#ifdef __NR_clock_gettime64 + SC_ALLOW(__NR_clock_gettime64), +#endif +#ifdef __NR_close + SC_ALLOW(__NR_close), +#endif +#ifdef __NR_exit + SC_ALLOW(__NR_exit), +#endif +#ifdef __NR_exit_group + SC_ALLOW(__NR_exit_group), +#endif +#ifdef __NR_futex + SC_ALLOW(__NR_futex), +#endif +#ifdef __NR_geteuid + SC_ALLOW(__NR_geteuid), +#endif +#ifdef __NR_geteuid32 + SC_ALLOW(__NR_geteuid32), +#endif +#ifdef __NR_getpgid + SC_ALLOW(__NR_getpgid), #endif - SC_ALLOW(madvise), -#ifdef __NR_mmap2 /* EABI ARM only has mmap2() */ - SC_ALLOW(mmap2), +#ifdef __NR_getpid + SC_ALLOW(__NR_getpid), +#endif +#ifdef __NR_getrandom + SC_ALLOW(__NR_getrandom), +#endif +#ifdef __NR_gettimeofday + SC_ALLOW(__NR_gettimeofday), +#endif +#ifdef __NR_getuid + SC_ALLOW(__NR_getuid), +#endif +#ifdef __NR_getuid32 + SC_ALLOW(__NR_getuid32), +#endif +#ifdef __NR_madvise + SC_ALLOW(__NR_madvise), #endif #ifdef __NR_mmap - SC_ALLOW(mmap), + SC_ALLOW_ARG_MASK(__NR_mmap, 2, PROT_READ|PROT_WRITE|PROT_NONE), +#endif +#ifdef __NR_mmap2 + SC_ALLOW_ARG_MASK(__NR_mmap2, 2, PROT_READ|PROT_WRITE|PROT_NONE), +#endif +#ifdef __NR_mprotect + SC_ALLOW_ARG_MASK(__NR_mprotect, 2, PROT_READ|PROT_WRITE|PROT_NONE), +#endif +#ifdef __NR_mremap + SC_ALLOW(__NR_mremap), +#endif +#ifdef __NR_munmap + SC_ALLOW(__NR_munmap), +#endif +#ifdef __NR_nanosleep + SC_ALLOW(__NR_nanosleep), +#endif +#ifdef __NR_clock_nanosleep + SC_ALLOW(__NR_clock_nanosleep), +#endif +#ifdef __NR_clock_nanosleep_time64 + SC_ALLOW(__NR_clock_nanosleep_time64), +#endif +#ifdef __NR_clock_gettime64 + SC_ALLOW(__NR_clock_gettime64), +#endif +#ifdef __NR__newselect + SC_ALLOW(__NR__newselect), #endif -#ifdef __dietlibc__ - SC_ALLOW(mremap), - SC_ALLOW(exit), +#ifdef __NR_poll + SC_ALLOW(__NR_poll), +#endif +#ifdef __NR_pselect6 + SC_ALLOW(__NR_pselect6), +#endif +#ifdef __NR_read + SC_ALLOW(__NR_read), #endif - SC_ALLOW(munmap), - SC_ALLOW(exit_group), #ifdef __NR_rt_sigprocmask - SC_ALLOW(rt_sigprocmask), -#else - SC_ALLOW(sigprocmask), + SC_ALLOW(__NR_rt_sigprocmask), +#endif +#ifdef __NR_select + SC_ALLOW(__NR_select), +#endif +#ifdef __NR_shutdown + SC_ALLOW(__NR_shutdown), +#endif +#ifdef __NR_sigprocmask + SC_ALLOW(__NR_sigprocmask), +#endif +#ifdef __NR_time + SC_ALLOW(__NR_time), #endif +#ifdef __NR_write + SC_ALLOW(__NR_write), +#endif +#ifdef __NR_socketcall + SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN), + SC_DENY(__NR_socketcall, EACCES), +#endif +#if defined(__NR_ioctl) && defined(__s390__) + /* Allow ioctls for ICA crypto card on s390 */ + SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK), + SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO), + SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT), + SC_ALLOW_ARG(__NR_ioctl, 1, ZSECSENDCPRB), + /* Allow ioctls for EP11 crypto card on s390 */ + SC_ALLOW_ARG(__NR_ioctl, 1, ZSENDEP11CPRB), +#endif +#if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT) + /* + * On Linux x32, the clock_gettime VDSO falls back to the + * x86-64 syscall under some circumstances, e.g. + * https://bugs.debian.org/849923 + */ + SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT), +#endif + + /* Default deny */ BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), }; |