Netdev Archive on lore.kernel.org
help / color / mirror / Atom feed
* [BUG HTB offload] syzbot: C repro for b/213075475
@ 2022-01-05 10:27 Eric Dumazet
2022-01-10 11:10 ` Maxim Mikityanskiy
0 siblings, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2022-01-05 10:27 UTC (permalink / raw)
To: David S . Miller, Jakub Kicinski
Cc: netdev, Eric Dumazet, Eric Dumazet, Maxim Mikityanskiy,
Tariq Toukan, Jamal Hadi Salim, Cong Wang, Jiri Pirko
From: Eric Dumazet <edumazet@google.com>
I had an internal syzbot report with a repro, leading to the infamous:
unregister_netdevice: waiting for DEV to become free
This repro was also working on upstream kernels, so I started
a bisection leading to this ~one year old commit
commit d03b195b5aa015f6c11988b86a3625f8d5dbac52 (HEAD, refs/bisect/bad)
Author: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Tue Jan 19 14:08:13 2021 +0200
sch_htb: Hierarchical QoS hardware offload
The repro seems to install a HTB qdisc on lo device, on TC_H_INGRESS
It appears your patches were focused on egress, so there is probably
a missing check to avoid bad things.
I spent already too much time to bisect the issue
I am thus giving a copy of the C repro.
gcc -static -o b213075475 b213075475.c -lpthread
Run the program, observe the unregister_netdevice messages in
dmesg/console in less than 20 seconds.
Reported-by: Eric Dumazet <edumazet@google.com>
Cc: Maxim Mikityanskiy <maximmi@mellanox.com>
Cc: Tariq Toukan <tariqt@nvidia.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
---
b213075475.c | 669 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 669 insertions(+)
create mode 100644 b213075475.c
diff --git a/b213075475.c b/b213075475.c
new file mode 100644
index 0000000000000000000000000000000000000000..a6bf5462d15f05ff66c66883ac5df3edd18df0bc
--- /dev/null
+++ b/b213075475.c
@@ -0,0 +1,669 @@
+// autogenerated by syzkaller (https://github.com/google/syzkaller)
+
+#define _GNU_SOURCE
+
+#include <dirent.h>
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/capability.h>
+#include <linux/futex.h>
+
+static unsigned long long procid;
+
+static void sleep_ms(uint64_t ms)
+{
+ usleep(ms * 1000);
+}
+
+static uint64_t current_time_ms(void)
+{
+ struct timespec ts;
+ if (clock_gettime(CLOCK_MONOTONIC, &ts))
+ exit(1);
+ return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
+}
+
+static void use_temporary_dir(void)
+{
+ char tmpdir_template[] = "./syzkaller.XXXXXX";
+ char* tmpdir = mkdtemp(tmpdir_template);
+ if (!tmpdir)
+ exit(1);
+ if (chmod(tmpdir, 0777))
+ exit(1);
+ if (chdir(tmpdir))
+ exit(1);
+}
+
+static void thread_start(void* (*fn)(void*), void* arg)
+{
+ pthread_t th;
+ pthread_attr_t attr;
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 128 << 10);
+ int i = 0;
+ for (; i < 100; i++) {
+ if (pthread_create(&th, &attr, fn, arg) == 0) {
+ pthread_attr_destroy(&attr);
+ return;
+ }
+ if (errno == EAGAIN) {
+ usleep(50);
+ continue;
+ }
+ break;
+ }
+ exit(1);
+}
+
+typedef struct {
+ int state;
+} event_t;
+
+static void event_init(event_t* ev)
+{
+ ev->state = 0;
+}
+
+static void event_reset(event_t* ev)
+{
+ ev->state = 0;
+}
+
+static void event_set(event_t* ev)
+{
+ if (ev->state)
+ exit(1);
+ __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
+ syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
+}
+
+static void event_wait(event_t* ev)
+{
+ while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
+ syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
+}
+
+static int event_isset(event_t* ev)
+{
+ return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
+}
+
+static int event_timedwait(event_t* ev, uint64_t timeout)
+{
+ uint64_t start = current_time_ms();
+ uint64_t now = start;
+ for (;;) {
+ uint64_t remain = timeout - (now - start);
+ struct timespec ts;
+ ts.tv_sec = remain / 1000;
+ ts.tv_nsec = (remain % 1000) * 1000 * 1000;
+ syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
+ if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
+ return 1;
+ now = current_time_ms();
+ if (now - start > timeout)
+ return 0;
+ }
+}
+
+static bool write_file(const char* file, const char* what, ...)
+{
+ char buf[1024];
+ va_list args;
+ va_start(args, what);
+ vsnprintf(buf, sizeof(buf), what, args);
+ va_end(args);
+ buf[sizeof(buf) - 1] = 0;
+ int len = strlen(buf);
+ int fd = open(file, O_WRONLY | O_CLOEXEC);
+ if (fd == -1)
+ return false;
+ if (write(fd, buf, len) != len) {
+ int err = errno;
+ close(fd);
+ errno = err;
+ return false;
+ }
+ close(fd);
+ return true;
+}
+
+#define MAX_FDS 30
+
+static void mount_cgroups(const char* dir, const char** controllers, int count)
+{
+ if (mkdir(dir, 0777)) {
+ }
+ char enabled[128] = {0};
+ int i = 0;
+ for (; i < count; i++) {
+ if (mount("none", dir, "cgroup", 0, controllers[i])) {
+ continue;
+ }
+ umount(dir);
+ strcat(enabled, ",");
+ strcat(enabled, controllers[i]);
+ }
+ if (enabled[0] == 0)
+ return;
+ if (mount("none", dir, "cgroup", 0, enabled + 1)) {
+ }
+ if (chmod(dir, 0777)) {
+ }
+}
+
+static void setup_cgroups()
+{
+ const char* unified_controllers[] = {"+cpu", "+memory", "+io", "+pids"};
+ const char* net_controllers[] = {"net", "net_prio", "devices", "blkio",
+ "freezer"};
+ const char* cpu_controllers[] = {"cpuset", "cpuacct", "hugetlb", "rlimit"};
+ if (mkdir("/syzcgroup", 0777)) {
+ }
+ if (mkdir("/syzcgroup/unified", 0777)) {
+ }
+ if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
+ }
+ if (chmod("/syzcgroup/unified", 0777)) {
+ }
+ int unified_control =
+ open("/syzcgroup/unified/cgroup.subtree_control", O_WRONLY);
+ if (unified_control != -1) {
+ unsigned i;
+ for (i = 0;
+ i < sizeof(unified_controllers) / sizeof(unified_controllers[0]); i++)
+ if (write(unified_control, unified_controllers[i],
+ strlen(unified_controllers[i])) < 0) {
+ }
+ close(unified_control);
+ }
+ mount_cgroups("/syzcgroup/net", net_controllers,
+ sizeof(net_controllers) / sizeof(net_controllers[0]));
+ mount_cgroups("/syzcgroup/cpu", cpu_controllers,
+ sizeof(cpu_controllers) / sizeof(cpu_controllers[0]));
+ write_file("/syzcgroup/cpu/cgroup.clone_children", "1");
+ write_file("/syzcgroup/cpu/cpuset.memory_pressure_enabled", "1");
+}
+
+static void setup_cgroups_loop()
+{
+ int pid = getpid();
+ char file[128];
+ char cgroupdir[64];
+ snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
+ if (mkdir(cgroupdir, 0777)) {
+ }
+ snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
+ write_file(file, "32");
+ snprintf(file, sizeof(file), "%s/memory.low", cgroupdir);
+ write_file(file, "%d", 298 << 20);
+ snprintf(file, sizeof(file), "%s/memory.high", cgroupdir);
+ write_file(file, "%d", 299 << 20);
+ snprintf(file, sizeof(file), "%s/memory.max", cgroupdir);
+ write_file(file, "%d", 300 << 20);
+ snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
+ write_file(file, "%d", pid);
+ snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
+ if (mkdir(cgroupdir, 0777)) {
+ }
+ snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
+ write_file(file, "%d", pid);
+ snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
+ if (mkdir(cgroupdir, 0777)) {
+ }
+ snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
+ write_file(file, "%d", pid);
+}
+
+static void setup_cgroups_test()
+{
+ char cgroupdir[64];
+ snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
+ if (symlink(cgroupdir, "./cgroup")) {
+ }
+ snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
+ if (symlink(cgroupdir, "./cgroup.cpu")) {
+ }
+ snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
+ if (symlink(cgroupdir, "./cgroup.net")) {
+ }
+}
+
+static void setup_common()
+{
+ if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
+ }
+}
+
+static void setup_binderfs()
+{
+ if (mkdir("/dev/binderfs", 0777)) {
+ }
+ if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) {
+ }
+}
+
+static void loop();
+
+static void sandbox_common()
+{
+ prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+ setsid();
+ struct rlimit rlim;
+ rlim.rlim_cur = rlim.rlim_max = (200 << 20);
+ setrlimit(RLIMIT_AS, &rlim);
+ rlim.rlim_cur = rlim.rlim_max = 32 << 20;
+ setrlimit(RLIMIT_MEMLOCK, &rlim);
+ rlim.rlim_cur = rlim.rlim_max = 136 << 20;
+ setrlimit(RLIMIT_FSIZE, &rlim);
+ rlim.rlim_cur = rlim.rlim_max = 1 << 20;
+ setrlimit(RLIMIT_STACK, &rlim);
+ rlim.rlim_cur = rlim.rlim_max = 0;
+ setrlimit(RLIMIT_CORE, &rlim);
+ rlim.rlim_cur = rlim.rlim_max = 256;
+ setrlimit(RLIMIT_NOFILE, &rlim);
+ if (unshare(CLONE_NEWNS)) {
+ }
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+ }
+ if (unshare(CLONE_NEWIPC)) {
+ }
+ if (unshare(0x02000000)) {
+ }
+ if (unshare(CLONE_NEWUTS)) {
+ }
+ if (unshare(CLONE_SYSVSEM)) {
+ }
+ typedef struct {
+ const char* name;
+ const char* value;
+ } sysctl_t;
+ static const sysctl_t sysctls[] = {
+ {"/proc/sys/kernel/shmmax", "16777216"},
+ {"/proc/sys/kernel/shmall", "536870912"},
+ {"/proc/sys/kernel/shmmni", "1024"},
+ {"/proc/sys/kernel/msgmax", "8192"},
+ {"/proc/sys/kernel/msgmni", "1024"},
+ {"/proc/sys/kernel/msgmnb", "1024"},
+ {"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
+ };
+ unsigned i;
+ for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
+ write_file(sysctls[i].name, sysctls[i].value);
+}
+
+static int wait_for_loop(int pid)
+{
+ if (pid < 0)
+ exit(1);
+ int status = 0;
+ while (waitpid(-1, &status, __WALL) != pid) {
+ }
+ return WEXITSTATUS(status);
+}
+
+static void drop_caps(void)
+{
+ struct __user_cap_header_struct cap_hdr = {};
+ struct __user_cap_data_struct cap_data[2] = {};
+ cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
+ cap_hdr.pid = getpid();
+ if (syscall(SYS_capget, &cap_hdr, &cap_data))
+ exit(1);
+ const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
+ cap_data[0].effective &= ~drop;
+ cap_data[0].permitted &= ~drop;
+ cap_data[0].inheritable &= ~drop;
+ if (syscall(SYS_capset, &cap_hdr, &cap_data))
+ exit(1);
+}
+
+static int do_sandbox_none(void)
+{
+ if (unshare(CLONE_NEWPID)) {
+ }
+ int pid = fork();
+ if (pid != 0)
+ return wait_for_loop(pid);
+ setup_common();
+ sandbox_common();
+ drop_caps();
+ if (unshare(CLONE_NEWNET)) {
+ }
+ setup_binderfs();
+ loop();
+ exit(1);
+}
+
+#define FS_IOC_SETFLAGS _IOW('f', 2, long)
+static void remove_dir(const char* dir)
+{
+ int iter = 0;
+ DIR* dp = 0;
+retry:
+ while (umount2(dir, MNT_DETACH | UMOUNT_NOFOLLOW) == 0) {
+ }
+ dp = opendir(dir);
+ if (dp == NULL) {
+ if (errno == EMFILE) {
+ exit(1);
+ }
+ exit(1);
+ }
+ struct dirent* ep = 0;
+ while ((ep = readdir(dp))) {
+ if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
+ continue;
+ char filename[FILENAME_MAX];
+ snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
+ while (umount2(filename, MNT_DETACH | UMOUNT_NOFOLLOW) == 0) {
+ }
+ struct stat st;
+ if (lstat(filename, &st))
+ exit(1);
+ if (S_ISDIR(st.st_mode)) {
+ remove_dir(filename);
+ continue;
+ }
+ int i;
+ for (i = 0;; i++) {
+ if (unlink(filename) == 0)
+ break;
+ if (errno == EPERM) {
+ int fd = open(filename, O_RDONLY);
+ if (fd != -1) {
+ long flags = 0;
+ if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
+ }
+ close(fd);
+ continue;
+ }
+ }
+ if (errno == EROFS) {
+ break;
+ }
+ if (errno != EBUSY || i > 100)
+ exit(1);
+ if (umount2(filename, MNT_DETACH | UMOUNT_NOFOLLOW))
+ exit(1);
+ }
+ }
+ closedir(dp);
+ for (int i = 0;; i++) {
+ if (rmdir(dir) == 0)
+ break;
+ if (i < 100) {
+ if (errno == EPERM) {
+ int fd = open(dir, O_RDONLY);
+ if (fd != -1) {
+ long flags = 0;
+ if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
+ }
+ close(fd);
+ continue;
+ }
+ }
+ if (errno == EROFS) {
+ break;
+ }
+ if (errno == EBUSY) {
+ if (umount2(dir, MNT_DETACH | UMOUNT_NOFOLLOW))
+ exit(1);
+ continue;
+ }
+ if (errno == ENOTEMPTY) {
+ if (iter < 100) {
+ iter++;
+ goto retry;
+ }
+ }
+ }
+ exit(1);
+ }
+}
+
+static void kill_and_wait(int pid, int* status)
+{
+ kill(-pid, SIGKILL);
+ kill(pid, SIGKILL);
+ for (int i = 0; i < 100; i++) {
+ if (waitpid(-1, status, WNOHANG | __WALL) == pid)
+ return;
+ usleep(1000);
+ }
+ DIR* dir = opendir("/sys/fs/fuse/connections");
+ if (dir) {
+ for (;;) {
+ struct dirent* ent = readdir(dir);
+ if (!ent)
+ break;
+ if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
+ continue;
+ char abort[300];
+ snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
+ ent->d_name);
+ int fd = open(abort, O_WRONLY);
+ if (fd == -1) {
+ continue;
+ }
+ if (write(fd, abort, 1) < 0) {
+ }
+ close(fd);
+ }
+ closedir(dir);
+ } else {
+ }
+ while (waitpid(-1, status, __WALL) != pid) {
+ }
+}
+
+static void setup_loop()
+{
+ setup_cgroups_loop();
+}
+
+static void setup_test()
+{
+ prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+ setpgrp();
+ setup_cgroups_test();
+ write_file("/proc/self/oom_score_adj", "1000");
+ if (symlink("/dev/binderfs", "./binderfs")) {
+ }
+}
+
+static void close_fds()
+{
+ for (int fd = 3; fd < MAX_FDS; fd++)
+ close(fd);
+}
+
+struct thread_t {
+ int created, call;
+ event_t ready, done;
+};
+
+static struct thread_t threads[16];
+static void execute_call(int call);
+static int running;
+
+static void* thr(void* arg)
+{
+ struct thread_t* th = (struct thread_t*)arg;
+ for (;;) {
+ event_wait(&th->ready);
+ event_reset(&th->ready);
+ execute_call(th->call);
+ __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
+ event_set(&th->done);
+ }
+ return 0;
+}
+
+static void execute_one(void)
+{
+ int i, call, thread;
+ for (call = 0; call < 6; call++) {
+ for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
+ thread++) {
+ struct thread_t* th = &threads[thread];
+ if (!th->created) {
+ th->created = 1;
+ event_init(&th->ready);
+ event_init(&th->done);
+ event_set(&th->done);
+ thread_start(thr, th);
+ }
+ if (!event_isset(&th->done))
+ continue;
+ event_reset(&th->done);
+ th->call = call;
+ __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
+ event_set(&th->ready);
+ event_timedwait(&th->done, 50);
+ break;
+ }
+ }
+ for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
+ sleep_ms(1);
+ close_fds();
+}
+
+static void execute_one(void);
+
+#define WAIT_FLAGS __WALL
+
+static void loop(void)
+{
+ setup_loop();
+ int iter = 0;
+ for (;; iter++) {
+ char cwdbuf[32];
+ sprintf(cwdbuf, "./%d", iter);
+ if (mkdir(cwdbuf, 0777))
+ exit(1);
+ int pid = fork();
+ if (pid < 0)
+ exit(1);
+ if (pid == 0) {
+ if (chdir(cwdbuf))
+ exit(1);
+ setup_test();
+ execute_one();
+ exit(0);
+ }
+ int status = 0;
+ uint64_t start = current_time_ms();
+ for (;;) {
+ if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
+ break;
+ sleep_ms(1);
+ if (current_time_ms() - start < 5000)
+ continue;
+ kill_and_wait(pid, &status);
+ break;
+ }
+ remove_dir(cwdbuf);
+ }
+}
+
+uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff};
+
+void execute_call(int call)
+{
+ intptr_t res = 0;
+ switch (call) {
+ case 0:
+ res = syscall(__NR_socket, 0x10ul, 3ul, 0);
+ if (res != -1)
+ r[0] = res;
+ break;
+ case 1:
+ res = syscall(__NR_socket, 0x11ul, 2ul, 0);
+ if (res != -1)
+ r[1] = res;
+ break;
+ case 2:
+ *(uint16_t*)0x20000080 = 0x11;
+ memcpy((void*)0x20000082,
+ "\x00\x00\x01\x00\x00\x00\x00\x00\x08\xfc\x9d\x71\xfc\x00\x00\x00"
+ "\x00\x00\x00\x00\xf8\xff\xff\x00\x2e\x0b\x38\x36\x00\x54\x04\xb0"
+ "\xd6\x30\x1a\x4c\xe8\x75\xf2\xe3\xff\x5f\x16\x3e\xe3\x40\xb7\x67"
+ "\x95\x00\x80\x00\xf8\x00\x00\x00\x00\x01\x04\x00\x3c\x58\x11\x03"
+ "\x9e\x15\x77\x50\x27\xec\xce\x66\xfd\x79\x2b\xbf\x0e\x5b\xf5\xff"
+ "\x9b\x08\x16\xf3\xf6\xdb\x1c\x00\x01\x00\x00\x00\x00\x00\x00\x00"
+ "\x49\x74\x00\x00\x00\x00\x00\x00\x00\x06\xad\x8e\x5e\xcc\x32\x6d"
+ "\x3a\x09\xff\x42\xc6\x54\x00\x00\x00\x00\x00\x00\x00\x00",
+ 126);
+ syscall(__NR_bind, r[1], 0x20000080ul, 0x80ul);
+ break;
+ case 3:
+ *(uint32_t*)0x200003c0 = 0x14;
+ res = syscall(__NR_getsockname, r[1], 0x200004c0ul, 0x200003c0ul);
+ if (res != -1)
+ r[2] = *(uint32_t*)0x200004c4;
+ break;
+ case 4:
+ *(uint64_t*)0x20000240 = 0;
+ *(uint32_t*)0x20000248 = 0;
+ *(uint64_t*)0x20000250 = 0x20000080;
+ *(uint64_t*)0x20000080 = 0x20000380;
+ memcpy((void*)0x20000380,
+ "\x48\x00\x00\x00\x24\x00\x07\x05\x00\x00\x00\x00\x00\x00\x10\x00"
+ "\x00\x00\x1f\x00",
+ 20);
+ *(uint32_t*)0x20000394 = r[2];
+ memcpy((void*)0x20000398,
+ "\x00\x00\x04\x00\xf1\xff\xff\xff\x00\x00\x00\x00\x08\x00\x01\x00"
+ "\x68\x74\x62\x00\x1c\x00\x02\x00\x18\x00\x02\x00\x03",
+ 29);
+ *(uint64_t*)0x20000088 = 0x48;
+ *(uint64_t*)0x20000258 = 1;
+ *(uint64_t*)0x20000260 = 0;
+ *(uint64_t*)0x20000268 = 0;
+ *(uint32_t*)0x20000270 = 0;
+ syscall(__NR_sendmsg, r[0], 0x20000240ul, 0ul);
+ break;
+ case 5:
+ syscall(__NR_clone, 0xbb002100ul, 0ul, 0x9999999999999999ul, 0ul, -1ul);
+ break;
+ }
+}
+int main(void)
+{
+ syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
+ syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
+ syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
+ setup_cgroups();
+ for (procid = 0; procid < 6; procid++) {
+ if (fork() == 0) {
+ use_temporary_dir();
+ do_sandbox_none();
+ }
+ }
+ sleep(1000000);
+ return 0;
+}
--
2.34.1.448.ga2b2bfdf31-goog
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [BUG HTB offload] syzbot: C repro for b/213075475
2022-01-05 10:27 [BUG HTB offload] syzbot: C repro for b/213075475 Eric Dumazet
@ 2022-01-10 11:10 ` Maxim Mikityanskiy
2022-01-10 13:14 ` Eric Dumazet
0 siblings, 1 reply; 4+ messages in thread
From: Maxim Mikityanskiy @ 2022-01-10 11:10 UTC (permalink / raw)
To: Eric Dumazet
Cc: David S . Miller, Jakub Kicinski, netdev, Eric Dumazet,
Tariq Toukan, Jamal Hadi Salim, Cong Wang, Jiri Pirko
On 2022-01-05 12:27, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
>
> I had an internal syzbot report with a repro, leading to the infamous:
>
> unregister_netdevice: waiting for DEV to become free
>
> This repro was also working on upstream kernels, so I started
> a bisection leading to this ~one year old commit
>
> commit d03b195b5aa015f6c11988b86a3625f8d5dbac52 (HEAD, refs/bisect/bad)
> Author: Maxim Mikityanskiy <maximmi@mellanox.com>
> Date: Tue Jan 19 14:08:13 2021 +0200
>
> sch_htb: Hierarchical QoS hardware offload
>
> The repro seems to install a HTB qdisc on lo device, on TC_H_INGRESS
I don't see anything related to qdiscs in this program. Could you point
me at the place where it installs HTB on lo ingress?
> It appears your patches were focused on egress, so there is probably
> a missing check to avoid bad things.
>
> I spent already too much time to bisect the issue
> I am thus giving a copy of the C repro.
>
> gcc -static -o b213075475 b213075475.c -lpthread
>
> Run the program, observe the unregister_netdevice messages in
> dmesg/console in less than 20 seconds.
It didn't reproduce for me. All I see is these messages in dmesg:
cgroup: Unknown subsys name 'net'
cgroup: Unknown subsys name 'rlimit'
and the program hangs seemingly forever.
Do I need any specific kernel config options? Maybe you could share your
config? Are there any other prerequisites for reproduction?
Is this the right program, by the way?
> Reported-by: Eric Dumazet <edumazet@google.com>
> Cc: Maxim Mikityanskiy <maximmi@mellanox.com>
> Cc: Tariq Toukan <tariqt@nvidia.com>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: Cong Wang <xiyou.wangcong@gmail.com>
> Cc: Jiri Pirko <jiri@resnulli.us>
> ---
> b213075475.c | 669 +++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 669 insertions(+)
> create mode 100644 b213075475.c
>
> diff --git a/b213075475.c b/b213075475.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..a6bf5462d15f05ff66c66883ac5df3edd18df0bc
> --- /dev/null
> +++ b/b213075475.c
> @@ -0,0 +1,669 @@
> +// autogenerated by syzkaller (https://github.com/google/syzkaller)
> +
> +#define _GNU_SOURCE
> +
> +#include <dirent.h>
> +#include <endian.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <pthread.h>
> +#include <sched.h>
> +#include <signal.h>
> +#include <stdarg.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <sys/ioctl.h>
> +#include <sys/mount.h>
> +#include <sys/prctl.h>
> +#include <sys/resource.h>
> +#include <sys/stat.h>
> +#include <sys/syscall.h>
> +#include <sys/time.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <time.h>
> +#include <unistd.h>
> +
> +#include <linux/capability.h>
> +#include <linux/futex.h>
> +
> +static unsigned long long procid;
> +
> +static void sleep_ms(uint64_t ms)
> +{
> + usleep(ms * 1000);
> +}
> +
> +static uint64_t current_time_ms(void)
> +{
> + struct timespec ts;
> + if (clock_gettime(CLOCK_MONOTONIC, &ts))
> + exit(1);
> + return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
> +}
> +
> +static void use_temporary_dir(void)
> +{
> + char tmpdir_template[] = "./syzkaller.XXXXXX";
> + char* tmpdir = mkdtemp(tmpdir_template);
> + if (!tmpdir)
> + exit(1);
> + if (chmod(tmpdir, 0777))
> + exit(1);
> + if (chdir(tmpdir))
> + exit(1);
> +}
> +
> +static void thread_start(void* (*fn)(void*), void* arg)
> +{
> + pthread_t th;
> + pthread_attr_t attr;
> + pthread_attr_init(&attr);
> + pthread_attr_setstacksize(&attr, 128 << 10);
> + int i = 0;
> + for (; i < 100; i++) {
> + if (pthread_create(&th, &attr, fn, arg) == 0) {
> + pthread_attr_destroy(&attr);
> + return;
> + }
> + if (errno == EAGAIN) {
> + usleep(50);
> + continue;
> + }
> + break;
> + }
> + exit(1);
> +}
> +
> +typedef struct {
> + int state;
> +} event_t;
> +
> +static void event_init(event_t* ev)
> +{
> + ev->state = 0;
> +}
> +
> +static void event_reset(event_t* ev)
> +{
> + ev->state = 0;
> +}
> +
> +static void event_set(event_t* ev)
> +{
> + if (ev->state)
> + exit(1);
> + __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
> + syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
> +}
> +
> +static void event_wait(event_t* ev)
> +{
> + while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
> + syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
> +}
> +
> +static int event_isset(event_t* ev)
> +{
> + return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
> +}
> +
> +static int event_timedwait(event_t* ev, uint64_t timeout)
> +{
> + uint64_t start = current_time_ms();
> + uint64_t now = start;
> + for (;;) {
> + uint64_t remain = timeout - (now - start);
> + struct timespec ts;
> + ts.tv_sec = remain / 1000;
> + ts.tv_nsec = (remain % 1000) * 1000 * 1000;
> + syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
> + if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
> + return 1;
> + now = current_time_ms();
> + if (now - start > timeout)
> + return 0;
> + }
> +}
> +
> +static bool write_file(const char* file, const char* what, ...)
> +{
> + char buf[1024];
> + va_list args;
> + va_start(args, what);
> + vsnprintf(buf, sizeof(buf), what, args);
> + va_end(args);
> + buf[sizeof(buf) - 1] = 0;
> + int len = strlen(buf);
> + int fd = open(file, O_WRONLY | O_CLOEXEC);
> + if (fd == -1)
> + return false;
> + if (write(fd, buf, len) != len) {
> + int err = errno;
> + close(fd);
> + errno = err;
> + return false;
> + }
> + close(fd);
> + return true;
> +}
> +
> +#define MAX_FDS 30
> +
> +static void mount_cgroups(const char* dir, const char** controllers, int count)
> +{
> + if (mkdir(dir, 0777)) {
> + }
> + char enabled[128] = {0};
> + int i = 0;
> + for (; i < count; i++) {
> + if (mount("none", dir, "cgroup", 0, controllers[i])) {
> + continue;
> + }
> + umount(dir);
> + strcat(enabled, ",");
> + strcat(enabled, controllers[i]);
> + }
> + if (enabled[0] == 0)
> + return;
> + if (mount("none", dir, "cgroup", 0, enabled + 1)) {
> + }
> + if (chmod(dir, 0777)) {
> + }
> +}
> +
> +static void setup_cgroups()
> +{
> + const char* unified_controllers[] = {"+cpu", "+memory", "+io", "+pids"};
> + const char* net_controllers[] = {"net", "net_prio", "devices", "blkio",
> + "freezer"};
> + const char* cpu_controllers[] = {"cpuset", "cpuacct", "hugetlb", "rlimit"};
> + if (mkdir("/syzcgroup", 0777)) {
> + }
> + if (mkdir("/syzcgroup/unified", 0777)) {
> + }
> + if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
> + }
> + if (chmod("/syzcgroup/unified", 0777)) {
> + }
> + int unified_control =
> + open("/syzcgroup/unified/cgroup.subtree_control", O_WRONLY);
> + if (unified_control != -1) {
> + unsigned i;
> + for (i = 0;
> + i < sizeof(unified_controllers) / sizeof(unified_controllers[0]); i++)
> + if (write(unified_control, unified_controllers[i],
> + strlen(unified_controllers[i])) < 0) {
> + }
> + close(unified_control);
> + }
> + mount_cgroups("/syzcgroup/net", net_controllers,
> + sizeof(net_controllers) / sizeof(net_controllers[0]));
> + mount_cgroups("/syzcgroup/cpu", cpu_controllers,
> + sizeof(cpu_controllers) / sizeof(cpu_controllers[0]));
> + write_file("/syzcgroup/cpu/cgroup.clone_children", "1");
> + write_file("/syzcgroup/cpu/cpuset.memory_pressure_enabled", "1");
> +}
> +
> +static void setup_cgroups_loop()
> +{
> + int pid = getpid();
> + char file[128];
> + char cgroupdir[64];
> + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
> + if (mkdir(cgroupdir, 0777)) {
> + }
> + snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
> + write_file(file, "32");
> + snprintf(file, sizeof(file), "%s/memory.low", cgroupdir);
> + write_file(file, "%d", 298 << 20);
> + snprintf(file, sizeof(file), "%s/memory.high", cgroupdir);
> + write_file(file, "%d", 299 << 20);
> + snprintf(file, sizeof(file), "%s/memory.max", cgroupdir);
> + write_file(file, "%d", 300 << 20);
> + snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
> + write_file(file, "%d", pid);
> + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
> + if (mkdir(cgroupdir, 0777)) {
> + }
> + snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
> + write_file(file, "%d", pid);
> + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
> + if (mkdir(cgroupdir, 0777)) {
> + }
> + snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
> + write_file(file, "%d", pid);
> +}
> +
> +static void setup_cgroups_test()
> +{
> + char cgroupdir[64];
> + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
> + if (symlink(cgroupdir, "./cgroup")) {
> + }
> + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
> + if (symlink(cgroupdir, "./cgroup.cpu")) {
> + }
> + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
> + if (symlink(cgroupdir, "./cgroup.net")) {
> + }
> +}
> +
> +static void setup_common()
> +{
> + if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
> + }
> +}
> +
> +static void setup_binderfs()
> +{
> + if (mkdir("/dev/binderfs", 0777)) {
> + }
> + if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) {
> + }
> +}
> +
> +static void loop();
> +
> +static void sandbox_common()
> +{
> + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
> + setsid();
> + struct rlimit rlim;
> + rlim.rlim_cur = rlim.rlim_max = (200 << 20);
> + setrlimit(RLIMIT_AS, &rlim);
> + rlim.rlim_cur = rlim.rlim_max = 32 << 20;
> + setrlimit(RLIMIT_MEMLOCK, &rlim);
> + rlim.rlim_cur = rlim.rlim_max = 136 << 20;
> + setrlimit(RLIMIT_FSIZE, &rlim);
> + rlim.rlim_cur = rlim.rlim_max = 1 << 20;
> + setrlimit(RLIMIT_STACK, &rlim);
> + rlim.rlim_cur = rlim.rlim_max = 0;
> + setrlimit(RLIMIT_CORE, &rlim);
> + rlim.rlim_cur = rlim.rlim_max = 256;
> + setrlimit(RLIMIT_NOFILE, &rlim);
> + if (unshare(CLONE_NEWNS)) {
> + }
> + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
> + }
> + if (unshare(CLONE_NEWIPC)) {
> + }
> + if (unshare(0x02000000)) {
> + }
> + if (unshare(CLONE_NEWUTS)) {
> + }
> + if (unshare(CLONE_SYSVSEM)) {
> + }
> + typedef struct {
> + const char* name;
> + const char* value;
> + } sysctl_t;
> + static const sysctl_t sysctls[] = {
> + {"/proc/sys/kernel/shmmax", "16777216"},
> + {"/proc/sys/kernel/shmall", "536870912"},
> + {"/proc/sys/kernel/shmmni", "1024"},
> + {"/proc/sys/kernel/msgmax", "8192"},
> + {"/proc/sys/kernel/msgmni", "1024"},
> + {"/proc/sys/kernel/msgmnb", "1024"},
> + {"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
> + };
> + unsigned i;
> + for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
> + write_file(sysctls[i].name, sysctls[i].value);
> +}
> +
> +static int wait_for_loop(int pid)
> +{
> + if (pid < 0)
> + exit(1);
> + int status = 0;
> + while (waitpid(-1, &status, __WALL) != pid) {
> + }
> + return WEXITSTATUS(status);
> +}
> +
> +static void drop_caps(void)
> +{
> + struct __user_cap_header_struct cap_hdr = {};
> + struct __user_cap_data_struct cap_data[2] = {};
> + cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
> + cap_hdr.pid = getpid();
> + if (syscall(SYS_capget, &cap_hdr, &cap_data))
> + exit(1);
> + const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
> + cap_data[0].effective &= ~drop;
> + cap_data[0].permitted &= ~drop;
> + cap_data[0].inheritable &= ~drop;
> + if (syscall(SYS_capset, &cap_hdr, &cap_data))
> + exit(1);
> +}
> +
> +static int do_sandbox_none(void)
> +{
> + if (unshare(CLONE_NEWPID)) {
> + }
> + int pid = fork();
> + if (pid != 0)
> + return wait_for_loop(pid);
> + setup_common();
> + sandbox_common();
> + drop_caps();
> + if (unshare(CLONE_NEWNET)) {
> + }
> + setup_binderfs();
> + loop();
> + exit(1);
> +}
> +
> +#define FS_IOC_SETFLAGS _IOW('f', 2, long)
> +static void remove_dir(const char* dir)
> +{
> + int iter = 0;
> + DIR* dp = 0;
> +retry:
> + while (umount2(dir, MNT_DETACH | UMOUNT_NOFOLLOW) == 0) {
> + }
> + dp = opendir(dir);
> + if (dp == NULL) {
> + if (errno == EMFILE) {
> + exit(1);
> + }
> + exit(1);
> + }
> + struct dirent* ep = 0;
> + while ((ep = readdir(dp))) {
> + if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
> + continue;
> + char filename[FILENAME_MAX];
> + snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
> + while (umount2(filename, MNT_DETACH | UMOUNT_NOFOLLOW) == 0) {
> + }
> + struct stat st;
> + if (lstat(filename, &st))
> + exit(1);
> + if (S_ISDIR(st.st_mode)) {
> + remove_dir(filename);
> + continue;
> + }
> + int i;
> + for (i = 0;; i++) {
> + if (unlink(filename) == 0)
> + break;
> + if (errno == EPERM) {
> + int fd = open(filename, O_RDONLY);
> + if (fd != -1) {
> + long flags = 0;
> + if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
> + }
> + close(fd);
> + continue;
> + }
> + }
> + if (errno == EROFS) {
> + break;
> + }
> + if (errno != EBUSY || i > 100)
> + exit(1);
> + if (umount2(filename, MNT_DETACH | UMOUNT_NOFOLLOW))
> + exit(1);
> + }
> + }
> + closedir(dp);
> + for (int i = 0;; i++) {
> + if (rmdir(dir) == 0)
> + break;
> + if (i < 100) {
> + if (errno == EPERM) {
> + int fd = open(dir, O_RDONLY);
> + if (fd != -1) {
> + long flags = 0;
> + if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
> + }
> + close(fd);
> + continue;
> + }
> + }
> + if (errno == EROFS) {
> + break;
> + }
> + if (errno == EBUSY) {
> + if (umount2(dir, MNT_DETACH | UMOUNT_NOFOLLOW))
> + exit(1);
> + continue;
> + }
> + if (errno == ENOTEMPTY) {
> + if (iter < 100) {
> + iter++;
> + goto retry;
> + }
> + }
> + }
> + exit(1);
> + }
> +}
> +
> +static void kill_and_wait(int pid, int* status)
> +{
> + kill(-pid, SIGKILL);
> + kill(pid, SIGKILL);
> + for (int i = 0; i < 100; i++) {
> + if (waitpid(-1, status, WNOHANG | __WALL) == pid)
> + return;
> + usleep(1000);
> + }
> + DIR* dir = opendir("/sys/fs/fuse/connections");
> + if (dir) {
> + for (;;) {
> + struct dirent* ent = readdir(dir);
> + if (!ent)
> + break;
> + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
> + continue;
> + char abort[300];
> + snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
> + ent->d_name);
> + int fd = open(abort, O_WRONLY);
> + if (fd == -1) {
> + continue;
> + }
> + if (write(fd, abort, 1) < 0) {
> + }
> + close(fd);
> + }
> + closedir(dir);
> + } else {
> + }
> + while (waitpid(-1, status, __WALL) != pid) {
> + }
> +}
> +
> +static void setup_loop()
> +{
> + setup_cgroups_loop();
> +}
> +
> +static void setup_test()
> +{
> + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
> + setpgrp();
> + setup_cgroups_test();
> + write_file("/proc/self/oom_score_adj", "1000");
> + if (symlink("/dev/binderfs", "./binderfs")) {
> + }
> +}
> +
> +static void close_fds()
> +{
> + for (int fd = 3; fd < MAX_FDS; fd++)
> + close(fd);
> +}
> +
> +struct thread_t {
> + int created, call;
> + event_t ready, done;
> +};
> +
> +static struct thread_t threads[16];
> +static void execute_call(int call);
> +static int running;
> +
> +static void* thr(void* arg)
> +{
> + struct thread_t* th = (struct thread_t*)arg;
> + for (;;) {
> + event_wait(&th->ready);
> + event_reset(&th->ready);
> + execute_call(th->call);
> + __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
> + event_set(&th->done);
> + }
> + return 0;
> +}
> +
> +static void execute_one(void)
> +{
> + int i, call, thread;
> + for (call = 0; call < 6; call++) {
> + for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
> + thread++) {
> + struct thread_t* th = &threads[thread];
> + if (!th->created) {
> + th->created = 1;
> + event_init(&th->ready);
> + event_init(&th->done);
> + event_set(&th->done);
> + thread_start(thr, th);
> + }
> + if (!event_isset(&th->done))
> + continue;
> + event_reset(&th->done);
> + th->call = call;
> + __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
> + event_set(&th->ready);
> + event_timedwait(&th->done, 50);
> + break;
> + }
> + }
> + for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
> + sleep_ms(1);
> + close_fds();
> +}
> +
> +static void execute_one(void);
> +
> +#define WAIT_FLAGS __WALL
> +
> +static void loop(void)
> +{
> + setup_loop();
> + int iter = 0;
> + for (;; iter++) {
> + char cwdbuf[32];
> + sprintf(cwdbuf, "./%d", iter);
> + if (mkdir(cwdbuf, 0777))
> + exit(1);
> + int pid = fork();
> + if (pid < 0)
> + exit(1);
> + if (pid == 0) {
> + if (chdir(cwdbuf))
> + exit(1);
> + setup_test();
> + execute_one();
> + exit(0);
> + }
> + int status = 0;
> + uint64_t start = current_time_ms();
> + for (;;) {
> + if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
> + break;
> + sleep_ms(1);
> + if (current_time_ms() - start < 5000)
> + continue;
> + kill_and_wait(pid, &status);
> + break;
> + }
> + remove_dir(cwdbuf);
> + }
> +}
> +
> +uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff};
> +
> +void execute_call(int call)
> +{
> + intptr_t res = 0;
> + switch (call) {
> + case 0:
> + res = syscall(__NR_socket, 0x10ul, 3ul, 0);
> + if (res != -1)
> + r[0] = res;
> + break;
> + case 1:
> + res = syscall(__NR_socket, 0x11ul, 2ul, 0);
> + if (res != -1)
> + r[1] = res;
> + break;
> + case 2:
> + *(uint16_t*)0x20000080 = 0x11;
> + memcpy((void*)0x20000082,
> + "\x00\x00\x01\x00\x00\x00\x00\x00\x08\xfc\x9d\x71\xfc\x00\x00\x00"
> + "\x00\x00\x00\x00\xf8\xff\xff\x00\x2e\x0b\x38\x36\x00\x54\x04\xb0"
> + "\xd6\x30\x1a\x4c\xe8\x75\xf2\xe3\xff\x5f\x16\x3e\xe3\x40\xb7\x67"
> + "\x95\x00\x80\x00\xf8\x00\x00\x00\x00\x01\x04\x00\x3c\x58\x11\x03"
> + "\x9e\x15\x77\x50\x27\xec\xce\x66\xfd\x79\x2b\xbf\x0e\x5b\xf5\xff"
> + "\x9b\x08\x16\xf3\xf6\xdb\x1c\x00\x01\x00\x00\x00\x00\x00\x00\x00"
> + "\x49\x74\x00\x00\x00\x00\x00\x00\x00\x06\xad\x8e\x5e\xcc\x32\x6d"
> + "\x3a\x09\xff\x42\xc6\x54\x00\x00\x00\x00\x00\x00\x00\x00",
> + 126);
> + syscall(__NR_bind, r[1], 0x20000080ul, 0x80ul);
> + break;
> + case 3:
> + *(uint32_t*)0x200003c0 = 0x14;
> + res = syscall(__NR_getsockname, r[1], 0x200004c0ul, 0x200003c0ul);
> + if (res != -1)
> + r[2] = *(uint32_t*)0x200004c4;
> + break;
> + case 4:
> + *(uint64_t*)0x20000240 = 0;
> + *(uint32_t*)0x20000248 = 0;
> + *(uint64_t*)0x20000250 = 0x20000080;
> + *(uint64_t*)0x20000080 = 0x20000380;
> + memcpy((void*)0x20000380,
> + "\x48\x00\x00\x00\x24\x00\x07\x05\x00\x00\x00\x00\x00\x00\x10\x00"
> + "\x00\x00\x1f\x00",
> + 20);
> + *(uint32_t*)0x20000394 = r[2];
> + memcpy((void*)0x20000398,
> + "\x00\x00\x04\x00\xf1\xff\xff\xff\x00\x00\x00\x00\x08\x00\x01\x00"
> + "\x68\x74\x62\x00\x1c\x00\x02\x00\x18\x00\x02\x00\x03",
> + 29);
> + *(uint64_t*)0x20000088 = 0x48;
> + *(uint64_t*)0x20000258 = 1;
> + *(uint64_t*)0x20000260 = 0;
> + *(uint64_t*)0x20000268 = 0;
> + *(uint32_t*)0x20000270 = 0;
> + syscall(__NR_sendmsg, r[0], 0x20000240ul, 0ul);
> + break;
> + case 5:
> + syscall(__NR_clone, 0xbb002100ul, 0ul, 0x9999999999999999ul, 0ul, -1ul);
> + break;
> + }
> +}
> +int main(void)
> +{
> + syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
> + syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
> + syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
> + setup_cgroups();
> + for (procid = 0; procid < 6; procid++) {
> + if (fork() == 0) {
> + use_temporary_dir();
> + do_sandbox_none();
> + }
> + }
> + sleep(1000000);
> + return 0;
> +}
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [BUG HTB offload] syzbot: C repro for b/213075475
2022-01-10 11:10 ` Maxim Mikityanskiy
@ 2022-01-10 13:14 ` Eric Dumazet
2022-01-10 13:17 ` Eric Dumazet
0 siblings, 1 reply; 4+ messages in thread
From: Eric Dumazet @ 2022-01-10 13:14 UTC (permalink / raw)
To: Maxim Mikityanskiy
Cc: Eric Dumazet, David S . Miller, Jakub Kicinski, netdev,
Tariq Toukan, Jamal Hadi Salim, Cong Wang, Jiri Pirko
On Mon, Jan 10, 2022 at 3:10 AM Maxim Mikityanskiy <maximmi@nvidia.com> wrote:
>
> On 2022-01-05 12:27, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > I had an internal syzbot report with a repro, leading to the infamous:
> >
> > unregister_netdevice: waiting for DEV to become free
> >
> > This repro was also working on upstream kernels, so I started
> > a bisection leading to this ~one year old commit
> >
> > commit d03b195b5aa015f6c11988b86a3625f8d5dbac52 (HEAD, refs/bisect/bad)
> > Author: Maxim Mikityanskiy <maximmi@mellanox.com>
> > Date: Tue Jan 19 14:08:13 2021 +0200
> >
> > sch_htb: Hierarchical QoS hardware offload
> >
> > The repro seems to install a HTB qdisc on lo device, on TC_H_INGRESS
>
> I don't see anything related to qdiscs in this program. Could you point
> me at the place where it installs HTB on lo ingress?
>
> > It appears your patches were focused on egress, so there is probably
> > a missing check to avoid bad things.
> >
> > I spent already too much time to bisect the issue
> > I am thus giving a copy of the C repro.
> >
> > gcc -static -o b213075475 b213075475.c -lpthread
> >
> > Run the program, observe the unregister_netdevice messages in
> > dmesg/console in less than 20 seconds.
>
> It didn't reproduce for me. All I see is these messages in dmesg:
>
> cgroup: Unknown subsys name 'net'
> cgroup: Unknown subsys name 'rlimit'
You can ignore.
>
> and the program hangs seemingly forever.
It runs, until you interrupt it.
>
> Do I need any specific kernel config options? Maybe you could share your
> config? Are there any other prerequisites for reproduction?
Maybe the relevant net/sched options ?
CONFIG_NET_CLS_ACT=y
>
> Is this the right program, by the way?
Yes it is.
If you look at it, you find htb string embedded in
memcpy((void*)0x20000398,
+ "\x00\x00\x04\x00\xf1\xff\xff\xff\x00\x00\x00\x00\x08\x00\x01\x00"
+ "\x68\x74\x62\x00\x1c\x00\x02\x00\x18\x00\x02\x00\x03",
+ 29);
I can provide a .config file to you if needed.
>
> > Reported-by: Eric Dumazet <edumazet@google.com>
> > Cc: Maxim Mikityanskiy <maximmi@mellanox.com>
> > Cc: Tariq Toukan <tariqt@nvidia.com>
> > Cc: Jakub Kicinski <kuba@kernel.org>
> > Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> > Cc: Cong Wang <xiyou.wangcong@gmail.com>
> > Cc: Jiri Pirko <jiri@resnulli.us>
> > ---
> > b213075475.c | 669 +++++++++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 669 insertions(+)
> > create mode 100644 b213075475.c
> >
> > diff --git a/b213075475.c b/b213075475.c
> > new file mode 100644
> > index 0000000000000000000000000000000000000000..a6bf5462d15f05ff66c66883ac5df3edd18df0bc
> > --- /dev/null
> > +++ b/b213075475.c
> > @@ -0,0 +1,669 @@
> > +// autogenerated by syzkaller (https://github.com/google/syzkaller)
> > +
> > +#define _GNU_SOURCE
> > +
> > +#include <dirent.h>
> > +#include <endian.h>
> > +#include <errno.h>
> > +#include <fcntl.h>
> > +#include <pthread.h>
> > +#include <sched.h>
> > +#include <signal.h>
> > +#include <stdarg.h>
> > +#include <stdbool.h>
> > +#include <stdint.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/mount.h>
> > +#include <sys/prctl.h>
> > +#include <sys/resource.h>
> > +#include <sys/stat.h>
> > +#include <sys/syscall.h>
> > +#include <sys/time.h>
> > +#include <sys/types.h>
> > +#include <sys/wait.h>
> > +#include <time.h>
> > +#include <unistd.h>
> > +
> > +#include <linux/capability.h>
> > +#include <linux/futex.h>
> > +
> > +static unsigned long long procid;
> > +
> > +static void sleep_ms(uint64_t ms)
> > +{
> > + usleep(ms * 1000);
> > +}
> > +
> > +static uint64_t current_time_ms(void)
> > +{
> > + struct timespec ts;
> > + if (clock_gettime(CLOCK_MONOTONIC, &ts))
> > + exit(1);
> > + return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
> > +}
> > +
> > +static void use_temporary_dir(void)
> > +{
> > + char tmpdir_template[] = "./syzkaller.XXXXXX";
> > + char* tmpdir = mkdtemp(tmpdir_template);
> > + if (!tmpdir)
> > + exit(1);
> > + if (chmod(tmpdir, 0777))
> > + exit(1);
> > + if (chdir(tmpdir))
> > + exit(1);
> > +}
> > +
> > +static void thread_start(void* (*fn)(void*), void* arg)
> > +{
> > + pthread_t th;
> > + pthread_attr_t attr;
> > + pthread_attr_init(&attr);
> > + pthread_attr_setstacksize(&attr, 128 << 10);
> > + int i = 0;
> > + for (; i < 100; i++) {
> > + if (pthread_create(&th, &attr, fn, arg) == 0) {
> > + pthread_attr_destroy(&attr);
> > + return;
> > + }
> > + if (errno == EAGAIN) {
> > + usleep(50);
> > + continue;
> > + }
> > + break;
> > + }
> > + exit(1);
> > +}
> > +
> > +typedef struct {
> > + int state;
> > +} event_t;
> > +
> > +static void event_init(event_t* ev)
> > +{
> > + ev->state = 0;
> > +}
> > +
> > +static void event_reset(event_t* ev)
> > +{
> > + ev->state = 0;
> > +}
> > +
> > +static void event_set(event_t* ev)
> > +{
> > + if (ev->state)
> > + exit(1);
> > + __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
> > + syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
> > +}
> > +
> > +static void event_wait(event_t* ev)
> > +{
> > + while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
> > + syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
> > +}
> > +
> > +static int event_isset(event_t* ev)
> > +{
> > + return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
> > +}
> > +
> > +static int event_timedwait(event_t* ev, uint64_t timeout)
> > +{
> > + uint64_t start = current_time_ms();
> > + uint64_t now = start;
> > + for (;;) {
> > + uint64_t remain = timeout - (now - start);
> > + struct timespec ts;
> > + ts.tv_sec = remain / 1000;
> > + ts.tv_nsec = (remain % 1000) * 1000 * 1000;
> > + syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
> > + if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
> > + return 1;
> > + now = current_time_ms();
> > + if (now - start > timeout)
> > + return 0;
> > + }
> > +}
> > +
> > +static bool write_file(const char* file, const char* what, ...)
> > +{
> > + char buf[1024];
> > + va_list args;
> > + va_start(args, what);
> > + vsnprintf(buf, sizeof(buf), what, args);
> > + va_end(args);
> > + buf[sizeof(buf) - 1] = 0;
> > + int len = strlen(buf);
> > + int fd = open(file, O_WRONLY | O_CLOEXEC);
> > + if (fd == -1)
> > + return false;
> > + if (write(fd, buf, len) != len) {
> > + int err = errno;
> > + close(fd);
> > + errno = err;
> > + return false;
> > + }
> > + close(fd);
> > + return true;
> > +}
> > +
> > +#define MAX_FDS 30
> > +
> > +static void mount_cgroups(const char* dir, const char** controllers, int count)
> > +{
> > + if (mkdir(dir, 0777)) {
> > + }
> > + char enabled[128] = {0};
> > + int i = 0;
> > + for (; i < count; i++) {
> > + if (mount("none", dir, "cgroup", 0, controllers[i])) {
> > + continue;
> > + }
> > + umount(dir);
> > + strcat(enabled, ",");
> > + strcat(enabled, controllers[i]);
> > + }
> > + if (enabled[0] == 0)
> > + return;
> > + if (mount("none", dir, "cgroup", 0, enabled + 1)) {
> > + }
> > + if (chmod(dir, 0777)) {
> > + }
> > +}
> > +
> > +static void setup_cgroups()
> > +{
> > + const char* unified_controllers[] = {"+cpu", "+memory", "+io", "+pids"};
> > + const char* net_controllers[] = {"net", "net_prio", "devices", "blkio",
> > + "freezer"};
> > + const char* cpu_controllers[] = {"cpuset", "cpuacct", "hugetlb", "rlimit"};
> > + if (mkdir("/syzcgroup", 0777)) {
> > + }
> > + if (mkdir("/syzcgroup/unified", 0777)) {
> > + }
> > + if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
> > + }
> > + if (chmod("/syzcgroup/unified", 0777)) {
> > + }
> > + int unified_control =
> > + open("/syzcgroup/unified/cgroup.subtree_control", O_WRONLY);
> > + if (unified_control != -1) {
> > + unsigned i;
> > + for (i = 0;
> > + i < sizeof(unified_controllers) / sizeof(unified_controllers[0]); i++)
> > + if (write(unified_control, unified_controllers[i],
> > + strlen(unified_controllers[i])) < 0) {
> > + }
> > + close(unified_control);
> > + }
> > + mount_cgroups("/syzcgroup/net", net_controllers,
> > + sizeof(net_controllers) / sizeof(net_controllers[0]));
> > + mount_cgroups("/syzcgroup/cpu", cpu_controllers,
> > + sizeof(cpu_controllers) / sizeof(cpu_controllers[0]));
> > + write_file("/syzcgroup/cpu/cgroup.clone_children", "1");
> > + write_file("/syzcgroup/cpu/cpuset.memory_pressure_enabled", "1");
> > +}
> > +
> > +static void setup_cgroups_loop()
> > +{
> > + int pid = getpid();
> > + char file[128];
> > + char cgroupdir[64];
> > + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
> > + if (mkdir(cgroupdir, 0777)) {
> > + }
> > + snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
> > + write_file(file, "32");
> > + snprintf(file, sizeof(file), "%s/memory.low", cgroupdir);
> > + write_file(file, "%d", 298 << 20);
> > + snprintf(file, sizeof(file), "%s/memory.high", cgroupdir);
> > + write_file(file, "%d", 299 << 20);
> > + snprintf(file, sizeof(file), "%s/memory.max", cgroupdir);
> > + write_file(file, "%d", 300 << 20);
> > + snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
> > + write_file(file, "%d", pid);
> > + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
> > + if (mkdir(cgroupdir, 0777)) {
> > + }
> > + snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
> > + write_file(file, "%d", pid);
> > + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
> > + if (mkdir(cgroupdir, 0777)) {
> > + }
> > + snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
> > + write_file(file, "%d", pid);
> > +}
> > +
> > +static void setup_cgroups_test()
> > +{
> > + char cgroupdir[64];
> > + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
> > + if (symlink(cgroupdir, "./cgroup")) {
> > + }
> > + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
> > + if (symlink(cgroupdir, "./cgroup.cpu")) {
> > + }
> > + snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
> > + if (symlink(cgroupdir, "./cgroup.net")) {
> > + }
> > +}
> > +
> > +static void setup_common()
> > +{
> > + if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
> > + }
> > +}
> > +
> > +static void setup_binderfs()
> > +{
> > + if (mkdir("/dev/binderfs", 0777)) {
> > + }
> > + if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) {
> > + }
> > +}
> > +
> > +static void loop();
> > +
> > +static void sandbox_common()
> > +{
> > + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
> > + setsid();
> > + struct rlimit rlim;
> > + rlim.rlim_cur = rlim.rlim_max = (200 << 20);
> > + setrlimit(RLIMIT_AS, &rlim);
> > + rlim.rlim_cur = rlim.rlim_max = 32 << 20;
> > + setrlimit(RLIMIT_MEMLOCK, &rlim);
> > + rlim.rlim_cur = rlim.rlim_max = 136 << 20;
> > + setrlimit(RLIMIT_FSIZE, &rlim);
> > + rlim.rlim_cur = rlim.rlim_max = 1 << 20;
> > + setrlimit(RLIMIT_STACK, &rlim);
> > + rlim.rlim_cur = rlim.rlim_max = 0;
> > + setrlimit(RLIMIT_CORE, &rlim);
> > + rlim.rlim_cur = rlim.rlim_max = 256;
> > + setrlimit(RLIMIT_NOFILE, &rlim);
> > + if (unshare(CLONE_NEWNS)) {
> > + }
> > + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
> > + }
> > + if (unshare(CLONE_NEWIPC)) {
> > + }
> > + if (unshare(0x02000000)) {
> > + }
> > + if (unshare(CLONE_NEWUTS)) {
> > + }
> > + if (unshare(CLONE_SYSVSEM)) {
> > + }
> > + typedef struct {
> > + const char* name;
> > + const char* value;
> > + } sysctl_t;
> > + static const sysctl_t sysctls[] = {
> > + {"/proc/sys/kernel/shmmax", "16777216"},
> > + {"/proc/sys/kernel/shmall", "536870912"},
> > + {"/proc/sys/kernel/shmmni", "1024"},
> > + {"/proc/sys/kernel/msgmax", "8192"},
> > + {"/proc/sys/kernel/msgmni", "1024"},
> > + {"/proc/sys/kernel/msgmnb", "1024"},
> > + {"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
> > + };
> > + unsigned i;
> > + for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
> > + write_file(sysctls[i].name, sysctls[i].value);
> > +}
> > +
> > +static int wait_for_loop(int pid)
> > +{
> > + if (pid < 0)
> > + exit(1);
> > + int status = 0;
> > + while (waitpid(-1, &status, __WALL) != pid) {
> > + }
> > + return WEXITSTATUS(status);
> > +}
> > +
> > +static void drop_caps(void)
> > +{
> > + struct __user_cap_header_struct cap_hdr = {};
> > + struct __user_cap_data_struct cap_data[2] = {};
> > + cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
> > + cap_hdr.pid = getpid();
> > + if (syscall(SYS_capget, &cap_hdr, &cap_data))
> > + exit(1);
> > + const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
> > + cap_data[0].effective &= ~drop;
> > + cap_data[0].permitted &= ~drop;
> > + cap_data[0].inheritable &= ~drop;
> > + if (syscall(SYS_capset, &cap_hdr, &cap_data))
> > + exit(1);
> > +}
> > +
> > +static int do_sandbox_none(void)
> > +{
> > + if (unshare(CLONE_NEWPID)) {
> > + }
> > + int pid = fork();
> > + if (pid != 0)
> > + return wait_for_loop(pid);
> > + setup_common();
> > + sandbox_common();
> > + drop_caps();
> > + if (unshare(CLONE_NEWNET)) {
> > + }
> > + setup_binderfs();
> > + loop();
> > + exit(1);
> > +}
> > +
> > +#define FS_IOC_SETFLAGS _IOW('f', 2, long)
> > +static void remove_dir(const char* dir)
> > +{
> > + int iter = 0;
> > + DIR* dp = 0;
> > +retry:
> > + while (umount2(dir, MNT_DETACH | UMOUNT_NOFOLLOW) == 0) {
> > + }
> > + dp = opendir(dir);
> > + if (dp == NULL) {
> > + if (errno == EMFILE) {
> > + exit(1);
> > + }
> > + exit(1);
> > + }
> > + struct dirent* ep = 0;
> > + while ((ep = readdir(dp))) {
> > + if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
> > + continue;
> > + char filename[FILENAME_MAX];
> > + snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
> > + while (umount2(filename, MNT_DETACH | UMOUNT_NOFOLLOW) == 0) {
> > + }
> > + struct stat st;
> > + if (lstat(filename, &st))
> > + exit(1);
> > + if (S_ISDIR(st.st_mode)) {
> > + remove_dir(filename);
> > + continue;
> > + }
> > + int i;
> > + for (i = 0;; i++) {
> > + if (unlink(filename) == 0)
> > + break;
> > + if (errno == EPERM) {
> > + int fd = open(filename, O_RDONLY);
> > + if (fd != -1) {
> > + long flags = 0;
> > + if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
> > + }
> > + close(fd);
> > + continue;
> > + }
> > + }
> > + if (errno == EROFS) {
> > + break;
> > + }
> > + if (errno != EBUSY || i > 100)
> > + exit(1);
> > + if (umount2(filename, MNT_DETACH | UMOUNT_NOFOLLOW))
> > + exit(1);
> > + }
> > + }
> > + closedir(dp);
> > + for (int i = 0;; i++) {
> > + if (rmdir(dir) == 0)
> > + break;
> > + if (i < 100) {
> > + if (errno == EPERM) {
> > + int fd = open(dir, O_RDONLY);
> > + if (fd != -1) {
> > + long flags = 0;
> > + if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
> > + }
> > + close(fd);
> > + continue;
> > + }
> > + }
> > + if (errno == EROFS) {
> > + break;
> > + }
> > + if (errno == EBUSY) {
> > + if (umount2(dir, MNT_DETACH | UMOUNT_NOFOLLOW))
> > + exit(1);
> > + continue;
> > + }
> > + if (errno == ENOTEMPTY) {
> > + if (iter < 100) {
> > + iter++;
> > + goto retry;
> > + }
> > + }
> > + }
> > + exit(1);
> > + }
> > +}
> > +
> > +static void kill_and_wait(int pid, int* status)
> > +{
> > + kill(-pid, SIGKILL);
> > + kill(pid, SIGKILL);
> > + for (int i = 0; i < 100; i++) {
> > + if (waitpid(-1, status, WNOHANG | __WALL) == pid)
> > + return;
> > + usleep(1000);
> > + }
> > + DIR* dir = opendir("/sys/fs/fuse/connections");
> > + if (dir) {
> > + for (;;) {
> > + struct dirent* ent = readdir(dir);
> > + if (!ent)
> > + break;
> > + if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
> > + continue;
> > + char abort[300];
> > + snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
> > + ent->d_name);
> > + int fd = open(abort, O_WRONLY);
> > + if (fd == -1) {
> > + continue;
> > + }
> > + if (write(fd, abort, 1) < 0) {
> > + }
> > + close(fd);
> > + }
> > + closedir(dir);
> > + } else {
> > + }
> > + while (waitpid(-1, status, __WALL) != pid) {
> > + }
> > +}
> > +
> > +static void setup_loop()
> > +{
> > + setup_cgroups_loop();
> > +}
> > +
> > +static void setup_test()
> > +{
> > + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
> > + setpgrp();
> > + setup_cgroups_test();
> > + write_file("/proc/self/oom_score_adj", "1000");
> > + if (symlink("/dev/binderfs", "./binderfs")) {
> > + }
> > +}
> > +
> > +static void close_fds()
> > +{
> > + for (int fd = 3; fd < MAX_FDS; fd++)
> > + close(fd);
> > +}
> > +
> > +struct thread_t {
> > + int created, call;
> > + event_t ready, done;
> > +};
> > +
> > +static struct thread_t threads[16];
> > +static void execute_call(int call);
> > +static int running;
> > +
> > +static void* thr(void* arg)
> > +{
> > + struct thread_t* th = (struct thread_t*)arg;
> > + for (;;) {
> > + event_wait(&th->ready);
> > + event_reset(&th->ready);
> > + execute_call(th->call);
> > + __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
> > + event_set(&th->done);
> > + }
> > + return 0;
> > +}
> > +
> > +static void execute_one(void)
> > +{
> > + int i, call, thread;
> > + for (call = 0; call < 6; call++) {
> > + for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0]));
> > + thread++) {
> > + struct thread_t* th = &threads[thread];
> > + if (!th->created) {
> > + th->created = 1;
> > + event_init(&th->ready);
> > + event_init(&th->done);
> > + event_set(&th->done);
> > + thread_start(thr, th);
> > + }
> > + if (!event_isset(&th->done))
> > + continue;
> > + event_reset(&th->done);
> > + th->call = call;
> > + __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
> > + event_set(&th->ready);
> > + event_timedwait(&th->done, 50);
> > + break;
> > + }
> > + }
> > + for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
> > + sleep_ms(1);
> > + close_fds();
> > +}
> > +
> > +static void execute_one(void);
> > +
> > +#define WAIT_FLAGS __WALL
> > +
> > +static void loop(void)
> > +{
> > + setup_loop();
> > + int iter = 0;
> > + for (;; iter++) {
> > + char cwdbuf[32];
> > + sprintf(cwdbuf, "./%d", iter);
> > + if (mkdir(cwdbuf, 0777))
> > + exit(1);
> > + int pid = fork();
> > + if (pid < 0)
> > + exit(1);
> > + if (pid == 0) {
> > + if (chdir(cwdbuf))
> > + exit(1);
> > + setup_test();
> > + execute_one();
> > + exit(0);
> > + }
> > + int status = 0;
> > + uint64_t start = current_time_ms();
> > + for (;;) {
> > + if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
> > + break;
> > + sleep_ms(1);
> > + if (current_time_ms() - start < 5000)
> > + continue;
> > + kill_and_wait(pid, &status);
> > + break;
> > + }
> > + remove_dir(cwdbuf);
> > + }
> > +}
> > +
> > +uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff};
> > +
> > +void execute_call(int call)
> > +{
> > + intptr_t res = 0;
> > + switch (call) {
> > + case 0:
> > + res = syscall(__NR_socket, 0x10ul, 3ul, 0);
> > + if (res != -1)
> > + r[0] = res;
> > + break;
> > + case 1:
> > + res = syscall(__NR_socket, 0x11ul, 2ul, 0);
> > + if (res != -1)
> > + r[1] = res;
> > + break;
> > + case 2:
> > + *(uint16_t*)0x20000080 = 0x11;
> > + memcpy((void*)0x20000082,
> > + "\x00\x00\x01\x00\x00\x00\x00\x00\x08\xfc\x9d\x71\xfc\x00\x00\x00"
> > + "\x00\x00\x00\x00\xf8\xff\xff\x00\x2e\x0b\x38\x36\x00\x54\x04\xb0"
> > + "\xd6\x30\x1a\x4c\xe8\x75\xf2\xe3\xff\x5f\x16\x3e\xe3\x40\xb7\x67"
> > + "\x95\x00\x80\x00\xf8\x00\x00\x00\x00\x01\x04\x00\x3c\x58\x11\x03"
> > + "\x9e\x15\x77\x50\x27\xec\xce\x66\xfd\x79\x2b\xbf\x0e\x5b\xf5\xff"
> > + "\x9b\x08\x16\xf3\xf6\xdb\x1c\x00\x01\x00\x00\x00\x00\x00\x00\x00"
> > + "\x49\x74\x00\x00\x00\x00\x00\x00\x00\x06\xad\x8e\x5e\xcc\x32\x6d"
> > + "\x3a\x09\xff\x42\xc6\x54\x00\x00\x00\x00\x00\x00\x00\x00",
> > + 126);
> > + syscall(__NR_bind, r[1], 0x20000080ul, 0x80ul);
> > + break;
> > + case 3:
> > + *(uint32_t*)0x200003c0 = 0x14;
> > + res = syscall(__NR_getsockname, r[1], 0x200004c0ul, 0x200003c0ul);
> > + if (res != -1)
> > + r[2] = *(uint32_t*)0x200004c4;
> > + break;
> > + case 4:
> > + *(uint64_t*)0x20000240 = 0;
> > + *(uint32_t*)0x20000248 = 0;
> > + *(uint64_t*)0x20000250 = 0x20000080;
> > + *(uint64_t*)0x20000080 = 0x20000380;
> > + memcpy((void*)0x20000380,
> > + "\x48\x00\x00\x00\x24\x00\x07\x05\x00\x00\x00\x00\x00\x00\x10\x00"
> > + "\x00\x00\x1f\x00",
> > + 20);
> > + *(uint32_t*)0x20000394 = r[2];
> > + memcpy((void*)0x20000398,
> > + "\x00\x00\x04\x00\xf1\xff\xff\xff\x00\x00\x00\x00\x08\x00\x01\x00"
> > + "\x68\x74\x62\x00\x1c\x00\x02\x00\x18\x00\x02\x00\x03",
> > + 29);
> > + *(uint64_t*)0x20000088 = 0x48;
> > + *(uint64_t*)0x20000258 = 1;
> > + *(uint64_t*)0x20000260 = 0;
> > + *(uint64_t*)0x20000268 = 0;
> > + *(uint32_t*)0x20000270 = 0;
> > + syscall(__NR_sendmsg, r[0], 0x20000240ul, 0ul);
> > + break;
> > + case 5:
> > + syscall(__NR_clone, 0xbb002100ul, 0ul, 0x9999999999999999ul, 0ul, -1ul);
> > + break;
> > + }
> > +}
> > +int main(void)
> > +{
> > + syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
> > + syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
> > + syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
> > + setup_cgroups();
> > + for (procid = 0; procid < 6; procid++) {
> > + if (fork() == 0) {
> > + use_temporary_dir();
> > + do_sandbox_none();
> > + }
> > + }
> > + sleep(1000000);
> > + return 0;
> > +}
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [BUG HTB offload] syzbot: C repro for b/213075475
2022-01-10 13:14 ` Eric Dumazet
@ 2022-01-10 13:17 ` Eric Dumazet
0 siblings, 0 replies; 4+ messages in thread
From: Eric Dumazet @ 2022-01-10 13:17 UTC (permalink / raw)
To: Maxim Mikityanskiy
Cc: Eric Dumazet, David S . Miller, Jakub Kicinski, netdev,
Tariq Toukan, Jamal Hadi Salim, Cong Wang, Jiri Pirko
On Mon, Jan 10, 2022 at 5:14 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Mon, Jan 10, 2022 at 3:10 AM Maxim Mikityanskiy <maximmi@nvidia.com> wrote:
> > Is this the right program, by the way?
>
> Yes it is.
>
> If you look at it, you find htb string embedded in
>
> memcpy((void*)0x20000398,
> + "\x00\x00\x04\x00\xf1\xff\xff\xff\x00\x00\x00\x00\x08\x00\x01\x00"
> + "\x68\x74\x62\x00\x1c\x00\x02\x00\x18\x00\x02\x00\x03",
> + 29);
>
Also embedded in this memcpy. you can find 0xFFFFFFF1 which is TC_H_INGRESS
#define TC_H_INGRESS (0xFFFFFFF1U)
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-01-10 13:17 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-05 10:27 [BUG HTB offload] syzbot: C repro for b/213075475 Eric Dumazet
2022-01-10 11:10 ` Maxim Mikityanskiy
2022-01-10 13:14 ` Eric Dumazet
2022-01-10 13:17 ` Eric Dumazet
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).