LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH 1/5] bpf: add map_copy_value hook
[not found] <20200310174711.7490-1-lmb@cloudflare.com>
@ 2020-03-10 17:47 ` Lorenz Bauer
2020-03-10 17:47 ` [PATCH 2/5] bpf: convert queue and stack map to map_copy_value Lorenz Bauer
` (3 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Lorenz Bauer @ 2020-03-10 17:47 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann
Cc: kernel-team, Lorenz Bauer, netdev, bpf, linux-kernel
bpf_map_copy_value has a lot of special cases for different map types
that want more control than map_lookup_elem provides. On closer
inspection, almost all of them follow the pattern
int func(struct bpf_map *, void *, void *)
Introduce a new member map_copy_value to struct bpf_map_ops, and
convert the current special cases to use it.
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
include/linux/bpf-cgroup.h | 5 -----
include/linux/bpf.h | 21 +--------------------
include/linux/bpf_types.h | 2 +-
kernel/bpf/arraymap.c | 13 ++++++++++---
kernel/bpf/bpf_struct_ops.c | 7 ++++---
kernel/bpf/hashtab.c | 10 +++++++---
kernel/bpf/local_storage.c | 14 +++++++++++++-
kernel/bpf/reuseport_array.c | 5 +++--
kernel/bpf/syscall.c | 24 ++++--------------------
9 files changed, 43 insertions(+), 58 deletions(-)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a7cd5c7a2509..6741a6c460f6 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -162,7 +162,6 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *map);
-int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
void *value, u64 flags);
@@ -370,10 +369,6 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
static inline void bpf_cgroup_storage_free(
struct bpf_cgroup_storage *storage) {}
-static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
- void *value) {
- return 0;
-}
static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
void *key, void *value, u64 flags) {
return 0;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 94a329b9da81..ad9f3be830f0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -44,6 +44,7 @@ struct bpf_map_ops {
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
void (*map_release_uref)(struct bpf_map *map);
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
+ int (*map_copy_value)(struct bpf_map *map, void *key, void *value);
int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
@@ -741,8 +742,6 @@ const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id);
void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log);
bool bpf_struct_ops_get(const void *kdata);
void bpf_struct_ops_put(const void *kdata);
-int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
- void *value);
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
if (owner == BPF_MODULE_OWNER)
@@ -774,12 +773,6 @@ static inline void bpf_module_put(const void *data, struct module *owner)
{
module_put(owner);
}
-static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
- void *key,
- void *value)
-{
- return -EINVAL;
-}
#endif
struct bpf_array {
@@ -1082,8 +1075,6 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname, int flags);
-int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
@@ -1093,10 +1084,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
-int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
-int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
int bpf_get_file_flag(int flags);
int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
@@ -1437,8 +1426,6 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr,
#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
void bpf_sk_reuseport_detach(struct sock *sk);
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
- void *value);
int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags);
#else
@@ -1447,12 +1434,6 @@ static inline void bpf_sk_reuseport_detach(struct sock *sk)
}
#ifdef CONFIG_BPF_SYSCALL
-static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
- void *key, void *value)
-{
- return -EOPNOTSUPP;
-}
-
static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
void *key, void *value,
u64 map_flags)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c81d4ece79a4..4949638cd049 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -81,7 +81,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, percpu_cgroup_storage_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 95d77770353c..58a0a8b3abe3 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -249,7 +249,8 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
-int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
+static int percpu_array_map_copy_value(struct bpf_map *map, void *key,
+ void *value)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
@@ -513,6 +514,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
+ .map_copy_value = percpu_array_map_copy_value,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_seq_show_elem = percpu_array_map_seq_show_elem,
@@ -550,7 +552,8 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
}
/* only called from syscall */
-int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+static int fd_array_map_lookup_elem_sys_copy(struct bpf_map *map, void *key,
+ void *value)
{
void **elem, *ptr;
int ret = 0;
@@ -561,7 +564,7 @@ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
rcu_read_lock();
elem = array_map_lookup_elem(map, key);
if (elem && (ptr = READ_ONCE(*elem)))
- *value = map->ops->map_fd_sys_lookup_elem(ptr);
+ *(u32 *)value = map->ops->map_fd_sys_lookup_elem(ptr);
else
ret = -ENOENT;
rcu_read_unlock();
@@ -872,6 +875,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_poke_run = prog_array_map_poke_run,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
+ .map_copy_value = fd_array_map_lookup_elem_sys_copy,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
@@ -962,6 +966,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
+ .map_copy_value = fd_array_map_lookup_elem_sys_copy,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
@@ -995,6 +1000,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
.map_free = cgroup_fd_array_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
+ .map_copy_value = fd_array_map_lookup_elem_sys_copy,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
.map_fd_put_ptr = cgroup_fd_array_put_ptr,
@@ -1078,6 +1084,7 @@ const struct bpf_map_ops array_of_maps_map_ops = {
.map_free = array_of_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = array_of_map_lookup_elem,
+ .map_copy_value = fd_array_map_lookup_elem_sys_copy,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = bpf_map_fd_get_ptr,
.map_fd_put_ptr = bpf_map_fd_put_ptr,
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index ca5cc8cdb6eb..cc1d7d1077c1 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -238,8 +238,8 @@ static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key,
return 0;
}
-int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
- void *value)
+static int bpf_struct_ops_map_copy_value(struct bpf_map *map, void *key,
+ void *value)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
struct bpf_struct_ops_value *uvalue, *kvalue;
@@ -509,7 +509,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
if (!value)
return;
- err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
+ err = bpf_struct_ops_map_copy_value(map, key, value);
if (!err) {
btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id,
value, m);
@@ -609,6 +609,7 @@ const struct bpf_map_ops bpf_struct_ops_map_ops = {
.map_free = bpf_struct_ops_map_free,
.map_get_next_key = bpf_struct_ops_map_get_next_key,
.map_lookup_elem = bpf_struct_ops_map_lookup_elem,
+ .map_copy_value = bpf_struct_ops_map_copy_value,
.map_delete_elem = bpf_struct_ops_map_delete_elem,
.map_update_elem = bpf_struct_ops_map_update_elem,
.map_seq_show_elem = bpf_struct_ops_map_seq_show_elem,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d541c8486c95..f5452a8a5177 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1664,7 +1664,8 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
return NULL;
}
-int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
+static int htab_percpu_map_copy_value(struct bpf_map *map, void *key,
+ void *value)
{
struct htab_elem *l;
void __percpu *pptr;
@@ -1749,6 +1750,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
.map_lookup_elem = htab_percpu_map_lookup_elem,
+ .map_copy_value = htab_percpu_map_copy_value,
.map_update_elem = htab_percpu_map_update_elem,
.map_delete_elem = htab_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -1761,6 +1763,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
+ .map_copy_value = htab_percpu_map_copy_value,
.map_update_elem = htab_lru_percpu_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -1796,7 +1799,7 @@ static void fd_htab_map_free(struct bpf_map *map)
}
/* only called from syscall */
-int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
+static int fd_htab_map_copy_value(struct bpf_map *map, void *key, void *value)
{
void **ptr;
int ret = 0;
@@ -1807,7 +1810,7 @@ int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
rcu_read_lock();
ptr = htab_map_lookup_elem(map, key);
if (ptr)
- *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
+ *(u32 *)value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr));
else
ret = -ENOENT;
rcu_read_unlock();
@@ -1893,6 +1896,7 @@ const struct bpf_map_ops htab_of_maps_map_ops = {
.map_free = htab_of_map_free,
.map_get_next_key = htab_map_get_next_key,
.map_lookup_elem = htab_of_map_lookup_elem,
+ .map_copy_value = fd_htab_map_copy_value,
.map_delete_elem = htab_map_delete_elem,
.map_fd_get_ptr = bpf_map_fd_get_ptr,
.map_fd_put_ptr = bpf_map_fd_put_ptr,
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 33d01866bcc2..fcc0b168dad2 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -167,7 +167,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
return 0;
}
-int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
+static int percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
void *value)
{
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
@@ -420,6 +420,18 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
.map_seq_show_elem = cgroup_storage_seq_show_elem,
};
+const struct bpf_map_ops percpu_cgroup_storage_map_ops = {
+ .map_alloc = cgroup_storage_map_alloc,
+ .map_free = cgroup_storage_map_free,
+ .map_get_next_key = cgroup_storage_get_next_key,
+ .map_lookup_elem = cgroup_storage_lookup_elem,
+ .map_copy_value = percpu_cgroup_storage_copy,
+ .map_update_elem = cgroup_storage_update_elem,
+ .map_delete_elem = cgroup_storage_delete_elem,
+ .map_check_btf = cgroup_storage_check_btf,
+ .map_seq_show_elem = cgroup_storage_seq_show_elem,
+};
+
int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map)
{
enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 01badd3eda7a..f36ccbf2612e 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -178,8 +178,8 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
return &array->map;
}
-int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
- void *value)
+static int reuseport_array_copy_value(struct bpf_map *map, void *key,
+ void *value)
{
struct sock *sk;
int err;
@@ -350,6 +350,7 @@ const struct bpf_map_ops reuseport_array_ops = {
.map_alloc = reuseport_array_alloc,
.map_free = reuseport_array_free,
.map_lookup_elem = reuseport_array_lookup_elem,
+ .map_copy_value = reuseport_array_copy_value,
.map_get_next_key = reuseport_array_get_next_key,
.map_delete_elem = reuseport_array_delete_elem,
};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7ce0815793dd..6503824e81e9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -218,27 +218,11 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
return bpf_map_offload_lookup_elem(map, key, value);
bpf_disable_instrumentation();
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
- err = bpf_percpu_hash_copy(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
- err = bpf_percpu_array_copy(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
- err = bpf_percpu_cgroup_storage_copy(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
- err = bpf_stackmap_copy(map, key, value);
- } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
- err = bpf_fd_array_map_lookup_elem(map, key, value);
- } else if (IS_FD_HASH(map)) {
- err = bpf_fd_htab_map_lookup_elem(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
- err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
- } else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
- map->map_type == BPF_MAP_TYPE_STACK) {
+ if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+ map->map_type == BPF_MAP_TYPE_STACK) {
err = map->ops->map_peek_elem(map, value);
- } else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
- /* struct_ops map requires directly updating "value" */
- err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
+ } else if (map->ops->map_copy_value) {
+ err = map->ops->map_copy_value(map, key, value);
} else {
rcu_read_lock();
if (map->ops->map_lookup_elem_sys_only)
--
2.20.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/5] bpf: convert queue and stack map to map_copy_value
[not found] <20200310174711.7490-1-lmb@cloudflare.com>
2020-03-10 17:47 ` [PATCH 1/5] bpf: add map_copy_value hook Lorenz Bauer
@ 2020-03-10 17:47 ` Lorenz Bauer
2020-03-11 14:00 ` Jakub Sitnicki
2020-03-10 17:47 ` [PATCH 3/5] bpf: convert sock map and hash " Lorenz Bauer
` (2 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Lorenz Bauer @ 2020-03-10 17:47 UTC (permalink / raw)
To: Alexei Starovoitov, Daniel Borkmann
Cc: kernel-team, Lorenz Bauer, netdev, bpf, linux-kernel
Migrate BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK to map_copy_value,
by introducing small wrappers that discard the (unused) key argument.
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
kernel/bpf/queue_stack_maps.c | 18 ++++++++++++++++++
kernel/bpf/syscall.c | 5 +----
2 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index f697647ceb54..5c89b7583cd2 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -262,11 +262,28 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
return -EINVAL;
}
+/* Called from syscall */
+static int queue_map_copy_value(struct bpf_map *map, void *key, void *value)
+{
+ (void)key;
+
+ return queue_map_peek_elem(map, value);
+}
+
+/* Called from syscall */
+static int stack_map_copy_value(struct bpf_map *map, void *key, void *value)
+{
+ (void)key;
+
+ return stack_map_peek_elem(map, value);
+}
+
const struct bpf_map_ops queue_map_ops = {
.map_alloc_check = queue_stack_map_alloc_check,
.map_alloc = queue_stack_map_alloc,
.map_free = queue_stack_map_free,
.map_lookup_elem = queue_stack_map_lookup_elem,
+ .map_copy_value = queue_map_copy_value,
.map_update_elem = queue_stack_map_update_elem,
.map_delete_elem = queue_stack_map_delete_elem,
.map_push_elem = queue_stack_map_push_elem,
@@ -280,6 +297,7 @@ const struct bpf_map_ops stack_map_ops = {
.map_alloc = queue_stack_map_alloc,
.map_free = queue_stack_map_free,
.map_lookup_elem = queue_stack_map_lookup_elem,
+ .map_copy_value = stack_map_copy_value,
.map_update_elem = queue_stack_map_update_elem,
.map_delete_elem = queue_stack_map_delete_elem,
.map_push_elem = queue_stack_map_push_elem,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6503824e81e9..20c6cdace275 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -218,10 +218,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
return bpf_map_offload_lookup_elem(map, key, value);
bpf_disable_instrumentation();
- if (map->map_type == BPF_MAP_TYPE_QUEUE ||
- map->map_type == BPF_MAP_TYPE_STACK) {
- err = map->ops->map_peek_elem(map, value);
- } else if (map->ops->map_copy_value) {
+ if (map->ops->map_copy_value) {
err = map->ops->map_copy_value(map, key, value);
} else {
rcu_read_lock();
--
2.20.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 3/5] bpf: convert sock map and hash to map_copy_value
[not found] <20200310174711.7490-1-lmb@cloudflare.com>
2020-03-10 17:47 ` [PATCH 1/5] bpf: add map_copy_value hook Lorenz Bauer
2020-03-10 17:47 ` [PATCH 2/5] bpf: convert queue and stack map to map_copy_value Lorenz Bauer
@ 2020-03-10 17:47 ` Lorenz Bauer
2020-03-11 13:55 ` Jakub Sitnicki
2020-03-10 17:47 ` [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup Lorenz Bauer
2020-03-10 17:47 ` [PATCH 5/5] bpf: sockmap, sockhash: test looking up fds Lorenz Bauer
4 siblings, 1 reply; 14+ messages in thread
From: Lorenz Bauer @ 2020-03-10 17:47 UTC (permalink / raw)
To: John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer,
David S. Miller, Jakub Kicinski, Alexei Starovoitov
Cc: kernel-team, netdev, bpf, linux-kernel
Migrate sockmap and sockhash to use map_copy_value instead of
map_lookup_elem_sys_only.
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
net/core/sock_map.c | 48 ++++++++++++++++++++++++++++++---------------
1 file changed, 32 insertions(+), 16 deletions(-)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index a7075b3b4489..03e04426cd21 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -344,19 +344,34 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
return __sock_map_lookup_elem(map, *(u32 *)key);
}
-static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
+static int __sock_map_copy_value(struct bpf_map *map, struct sock *sk,
+ void *value)
+{
+ switch (map->value_size) {
+ case sizeof(u64):
+ sock_gen_cookie(sk);
+ *(u64 *)value = atomic64_read(&sk->sk_cookie);
+ return 0;
+
+ default:
+ return -ENOSPC;
+ }
+}
+
+static int sock_map_copy_value(struct bpf_map *map, void *key, void *value)
{
struct sock *sk;
+ int ret = -ENOENT;
- if (map->value_size != sizeof(u64))
- return ERR_PTR(-ENOSPC);
-
+ rcu_read_lock();
sk = __sock_map_lookup_elem(map, *(u32 *)key);
if (!sk)
- return ERR_PTR(-ENOENT);
+ goto out;
- sock_gen_cookie(sk);
- return &sk->sk_cookie;
+ ret = __sock_map_copy_value(map, sk, value);
+out:
+ rcu_read_unlock();
+ return ret;
}
static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
@@ -636,7 +651,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
.map_get_next_key = sock_map_get_next_key,
- .map_lookup_elem_sys_only = sock_map_lookup_sys,
+ .map_copy_value = sock_map_copy_value,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_map_delete_elem,
.map_lookup_elem = sock_map_lookup,
@@ -1030,19 +1045,20 @@ static void sock_hash_free(struct bpf_map *map)
kfree(htab);
}
-static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
+static int sock_hash_copy_value(struct bpf_map *map, void *key, void *value)
{
struct sock *sk;
+ int ret = -ENOENT;
- if (map->value_size != sizeof(u64))
- return ERR_PTR(-ENOSPC);
-
+ rcu_read_lock();
sk = __sock_hash_lookup_elem(map, key);
if (!sk)
- return ERR_PTR(-ENOENT);
+ goto out;
- sock_gen_cookie(sk);
- return &sk->sk_cookie;
+ ret = __sock_map_copy_value(map, sk, value);
+out:
+ rcu_read_unlock();
+ return ret;
}
static void *sock_hash_lookup(struct bpf_map *map, void *key)
@@ -1139,7 +1155,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_update_elem = sock_hash_update_elem,
.map_delete_elem = sock_hash_delete_elem,
.map_lookup_elem = sock_hash_lookup,
- .map_lookup_elem_sys_only = sock_hash_lookup_sys,
+ .map_copy_value = sock_hash_copy_value,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
};
--
2.20.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup
[not found] <20200310174711.7490-1-lmb@cloudflare.com>
` (2 preceding siblings ...)
2020-03-10 17:47 ` [PATCH 3/5] bpf: convert sock map and hash " Lorenz Bauer
@ 2020-03-10 17:47 ` Lorenz Bauer
2020-03-11 23:27 ` John Fastabend
2020-03-17 15:18 ` Jakub Sitnicki
2020-03-10 17:47 ` [PATCH 5/5] bpf: sockmap, sockhash: test looking up fds Lorenz Bauer
4 siblings, 2 replies; 14+ messages in thread
From: Lorenz Bauer @ 2020-03-10 17:47 UTC (permalink / raw)
To: John Fastabend, Daniel Borkmann, Jakub Sitnicki, Lorenz Bauer,
David S. Miller, Jakub Kicinski, Alexei Starovoitov
Cc: kernel-team, netdev, bpf, linux-kernel
Allow callers with CAP_NET_ADMIN to retrieve file descriptors from a
sockmap and sockhash. O_CLOEXEC is enforced on all fds.
Without this, it's difficult to resize or otherwise rebuild existing
sockmap or sockhashes.
Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
net/core/sock_map.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 03e04426cd21..3228936aa31e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -347,12 +347,31 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
static int __sock_map_copy_value(struct bpf_map *map, struct sock *sk,
void *value)
{
+ struct file *file;
+ int fd;
+
switch (map->value_size) {
case sizeof(u64):
sock_gen_cookie(sk);
*(u64 *)value = atomic64_read(&sk->sk_cookie);
return 0;
+ case sizeof(u32):
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (unlikely(fd < 0))
+ return fd;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ file = get_file(sk->sk_socket->file);
+ read_unlock_bh(&sk->sk_callback_lock);
+
+ fd_install(fd, file);
+ *(u32 *)value = fd;
+ return 0;
+
default:
return -ENOSPC;
}
--
2.20.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/5] bpf: sockmap, sockhash: test looking up fds
[not found] <20200310174711.7490-1-lmb@cloudflare.com>
` (3 preceding siblings ...)
2020-03-10 17:47 ` [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup Lorenz Bauer
@ 2020-03-10 17:47 ` Lorenz Bauer
2020-03-11 13:52 ` Jakub Sitnicki
4 siblings, 1 reply; 14+ messages in thread
From: Lorenz Bauer @ 2020-03-10 17:47 UTC (permalink / raw)
To: Shuah Khan, Alexei Starovoitov, Daniel Borkmann
Cc: kernel-team, Lorenz Bauer, linux-kselftest, netdev, bpf, linux-kernel
Make sure that looking up an element from the map succeeds,
and that the fd is valid by using it an fcntl call.
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
.../selftests/bpf/prog_tests/sockmap_listen.c | 26 ++++++++++++++-----
1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 52aa468bdccd..929e1e77ecc6 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -453,7 +453,7 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
+static void test_lookup_fd(int family, int sotype, int mapfd)
{
u32 key, value32;
int err, s;
@@ -466,7 +466,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
sizeof(value32), 1, 0);
if (mapfd < 0) {
FAIL_ERRNO("map_create");
- goto close;
+ goto close_sock;
}
key = 0;
@@ -475,11 +475,25 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
errno = 0;
err = bpf_map_lookup_elem(mapfd, &key, &value32);
- if (!err || errno != ENOSPC)
- FAIL_ERRNO("map_lookup: expected ENOSPC");
+ if (err) {
+ FAIL_ERRNO("map_lookup");
+ goto close_map;
+ }
+ if ((int)value32 == s) {
+ FAIL("return value is identical");
+ goto close;
+ }
+
+ err = fcntl(value32, F_GETFD);
+ if (err == -1)
+ FAIL_ERRNO("fcntl");
+
+close:
+ xclose(value32);
+close_map:
xclose(mapfd);
-close:
+close_sock:
xclose(s);
}
@@ -1456,7 +1470,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
/* lookup */
TEST(test_lookup_after_insert),
TEST(test_lookup_after_delete),
- TEST(test_lookup_32_bit_value),
+ TEST(test_lookup_fd),
/* update */
TEST(test_update_existing),
/* races with insert/delete */
--
2.20.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 5/5] bpf: sockmap, sockhash: test looking up fds
2020-03-10 17:47 ` [PATCH 5/5] bpf: sockmap, sockhash: test looking up fds Lorenz Bauer
@ 2020-03-11 13:52 ` Jakub Sitnicki
2020-03-11 17:24 ` Lorenz Bauer
0 siblings, 1 reply; 14+ messages in thread
From: Jakub Sitnicki @ 2020-03-11 13:52 UTC (permalink / raw)
To: Lorenz Bauer
Cc: Shuah Khan, Alexei Starovoitov, Daniel Borkmann, kernel-team,
linux-kselftest, netdev, bpf, linux-kernel
On Tue, Mar 10, 2020 at 06:47 PM CET, Lorenz Bauer wrote:
> Make sure that looking up an element from the map succeeds,
> and that the fd is valid by using it an fcntl call.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
> .../selftests/bpf/prog_tests/sockmap_listen.c | 26 ++++++++++++++-----
> 1 file changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> index 52aa468bdccd..929e1e77ecc6 100644
> --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> @@ -453,7 +453,7 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
> xclose(s);
> }
>
> -static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
> +static void test_lookup_fd(int family, int sotype, int mapfd)
> {
> u32 key, value32;
> int err, s;
> @@ -466,7 +466,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
> sizeof(value32), 1, 0);
> if (mapfd < 0) {
> FAIL_ERRNO("map_create");
> - goto close;
> + goto close_sock;
> }
>
> key = 0;
> @@ -475,11 +475,25 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
>
> errno = 0;
> err = bpf_map_lookup_elem(mapfd, &key, &value32);
> - if (!err || errno != ENOSPC)
> - FAIL_ERRNO("map_lookup: expected ENOSPC");
> + if (err) {
> + FAIL_ERRNO("map_lookup");
> + goto close_map;
> + }
>
> + if ((int)value32 == s) {
> + FAIL("return value is identical");
> + goto close;
> + }
> +
> + err = fcntl(value32, F_GETFD);
> + if (err == -1)
> + FAIL_ERRNO("fcntl");
I would call getsockopt()/getsockname() to assert that the FD lookup
succeeded. We want to know not only that it's an FD (-EBADFD case), but
also that it's associated with a socket (-ENOTSOCK).
We can go even further, and compare socket cookies to ensure we got an
FD for the expected socket.
Also, I'm wondering if we could keep the -ENOSPC case test-covered by
temporarily dropping NET_ADMIN capability.
> +
> +close:
> + xclose(value32);
> +close_map:
> xclose(mapfd);
> -close:
> +close_sock:
> xclose(s);
> }
>
> @@ -1456,7 +1470,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
> /* lookup */
> TEST(test_lookup_after_insert),
> TEST(test_lookup_after_delete),
> - TEST(test_lookup_32_bit_value),
> + TEST(test_lookup_fd),
> /* update */
> TEST(test_update_existing),
> /* races with insert/delete */
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 3/5] bpf: convert sock map and hash to map_copy_value
2020-03-10 17:47 ` [PATCH 3/5] bpf: convert sock map and hash " Lorenz Bauer
@ 2020-03-11 13:55 ` Jakub Sitnicki
0 siblings, 0 replies; 14+ messages in thread
From: Jakub Sitnicki @ 2020-03-11 13:55 UTC (permalink / raw)
To: Lorenz Bauer
Cc: John Fastabend, Daniel Borkmann, David S. Miller, Jakub Kicinski,
Alexei Starovoitov, kernel-team, netdev, bpf, linux-kernel
On Tue, Mar 10, 2020 at 06:47 PM CET, Lorenz Bauer wrote:
> Migrate sockmap and sockhash to use map_copy_value instead of
> map_lookup_elem_sys_only.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
> net/core/sock_map.c | 48 ++++++++++++++++++++++++++++++---------------
> 1 file changed, 32 insertions(+), 16 deletions(-)
>
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index a7075b3b4489..03e04426cd21 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -344,19 +344,34 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
> return __sock_map_lookup_elem(map, *(u32 *)key);
> }
>
> -static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
> +static int __sock_map_copy_value(struct bpf_map *map, struct sock *sk,
> + void *value)
> +{
> + switch (map->value_size) {
> + case sizeof(u64):
> + sock_gen_cookie(sk);
> + *(u64 *)value = atomic64_read(&sk->sk_cookie);
You could use sock_gen_cookie return value to merge the two above
statements into one. sock_gen_cookie also reads out the value.
> + return 0;
> +
> + default:
> + return -ENOSPC;
> + }
> +}
> +
> +static int sock_map_copy_value(struct bpf_map *map, void *key, void *value)
> {
> struct sock *sk;
> + int ret = -ENOENT;
>
> - if (map->value_size != sizeof(u64))
> - return ERR_PTR(-ENOSPC);
> -
> + rcu_read_lock();
> sk = __sock_map_lookup_elem(map, *(u32 *)key);
> if (!sk)
> - return ERR_PTR(-ENOENT);
> + goto out;
>
> - sock_gen_cookie(sk);
> - return &sk->sk_cookie;
> + ret = __sock_map_copy_value(map, sk, value);
> +out:
> + rcu_read_unlock();
> + return ret;
> }
>
> static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
> @@ -636,7 +651,7 @@ const struct bpf_map_ops sock_map_ops = {
> .map_alloc = sock_map_alloc,
> .map_free = sock_map_free,
> .map_get_next_key = sock_map_get_next_key,
> - .map_lookup_elem_sys_only = sock_map_lookup_sys,
> + .map_copy_value = sock_map_copy_value,
> .map_update_elem = sock_map_update_elem,
> .map_delete_elem = sock_map_delete_elem,
> .map_lookup_elem = sock_map_lookup,
> @@ -1030,19 +1045,20 @@ static void sock_hash_free(struct bpf_map *map)
> kfree(htab);
> }
>
> -static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
> +static int sock_hash_copy_value(struct bpf_map *map, void *key, void *value)
> {
> struct sock *sk;
> + int ret = -ENOENT;
>
> - if (map->value_size != sizeof(u64))
> - return ERR_PTR(-ENOSPC);
> -
> + rcu_read_lock();
> sk = __sock_hash_lookup_elem(map, key);
> if (!sk)
> - return ERR_PTR(-ENOENT);
> + goto out;
>
> - sock_gen_cookie(sk);
> - return &sk->sk_cookie;
> + ret = __sock_map_copy_value(map, sk, value);
> +out:
> + rcu_read_unlock();
> + return ret;
> }
>
> static void *sock_hash_lookup(struct bpf_map *map, void *key)
> @@ -1139,7 +1155,7 @@ const struct bpf_map_ops sock_hash_ops = {
> .map_update_elem = sock_hash_update_elem,
> .map_delete_elem = sock_hash_delete_elem,
> .map_lookup_elem = sock_hash_lookup,
> - .map_lookup_elem_sys_only = sock_hash_lookup_sys,
> + .map_copy_value = sock_hash_copy_value,
> .map_release_uref = sock_hash_release_progs,
> .map_check_btf = map_check_no_btf,
> };
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/5] bpf: convert queue and stack map to map_copy_value
2020-03-10 17:47 ` [PATCH 2/5] bpf: convert queue and stack map to map_copy_value Lorenz Bauer
@ 2020-03-11 14:00 ` Jakub Sitnicki
2020-03-11 22:31 ` John Fastabend
0 siblings, 1 reply; 14+ messages in thread
From: Jakub Sitnicki @ 2020-03-11 14:00 UTC (permalink / raw)
To: Lorenz Bauer
Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, netdev, bpf,
linux-kernel
On Tue, Mar 10, 2020 at 06:47 PM CET, Lorenz Bauer wrote:
> Migrate BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK to map_copy_value,
> by introducing small wrappers that discard the (unused) key argument.
>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
> kernel/bpf/queue_stack_maps.c | 18 ++++++++++++++++++
> kernel/bpf/syscall.c | 5 +----
> 2 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
> index f697647ceb54..5c89b7583cd2 100644
> --- a/kernel/bpf/queue_stack_maps.c
> +++ b/kernel/bpf/queue_stack_maps.c
> @@ -262,11 +262,28 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
> return -EINVAL;
> }
>
> +/* Called from syscall */
> +static int queue_map_copy_value(struct bpf_map *map, void *key, void *value)
> +{
> + (void)key;
Alternatively, there's is the __always_unused compiler attribute from
include/linux/compiler_attributes.h that seems to be in wide use.
> +
> + return queue_map_peek_elem(map, value);
> +}
> +
> +/* Called from syscall */
> +static int stack_map_copy_value(struct bpf_map *map, void *key, void *value)
> +{
> + (void)key;
> +
> + return stack_map_peek_elem(map, value);
> +}
> +
> const struct bpf_map_ops queue_map_ops = {
> .map_alloc_check = queue_stack_map_alloc_check,
> .map_alloc = queue_stack_map_alloc,
> .map_free = queue_stack_map_free,
> .map_lookup_elem = queue_stack_map_lookup_elem,
> + .map_copy_value = queue_map_copy_value,
> .map_update_elem = queue_stack_map_update_elem,
> .map_delete_elem = queue_stack_map_delete_elem,
> .map_push_elem = queue_stack_map_push_elem,
> @@ -280,6 +297,7 @@ const struct bpf_map_ops stack_map_ops = {
> .map_alloc = queue_stack_map_alloc,
> .map_free = queue_stack_map_free,
> .map_lookup_elem = queue_stack_map_lookup_elem,
> + .map_copy_value = stack_map_copy_value,
> .map_update_elem = queue_stack_map_update_elem,
> .map_delete_elem = queue_stack_map_delete_elem,
> .map_push_elem = queue_stack_map_push_elem,
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 6503824e81e9..20c6cdace275 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -218,10 +218,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
> return bpf_map_offload_lookup_elem(map, key, value);
>
> bpf_disable_instrumentation();
> - if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> - map->map_type == BPF_MAP_TYPE_STACK) {
> - err = map->ops->map_peek_elem(map, value);
> - } else if (map->ops->map_copy_value) {
> + if (map->ops->map_copy_value) {
> err = map->ops->map_copy_value(map, key, value);
> } else {
> rcu_read_lock();
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 5/5] bpf: sockmap, sockhash: test looking up fds
2020-03-11 13:52 ` Jakub Sitnicki
@ 2020-03-11 17:24 ` Lorenz Bauer
0 siblings, 0 replies; 14+ messages in thread
From: Lorenz Bauer @ 2020-03-11 17:24 UTC (permalink / raw)
To: Jakub Sitnicki
Cc: Shuah Khan, Alexei Starovoitov, Daniel Borkmann, kernel-team,
linux-kselftest, Networking, bpf, linux-kernel
On Wed, 11 Mar 2020 at 13:52, Jakub Sitnicki <jakub@cloudflare.com> wrote:
>
> On Tue, Mar 10, 2020 at 06:47 PM CET, Lorenz Bauer wrote:
> > Make sure that looking up an element from the map succeeds,
> > and that the fd is valid by using it an fcntl call.
> >
> > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> > ---
> > .../selftests/bpf/prog_tests/sockmap_listen.c | 26 ++++++++++++++-----
> > 1 file changed, 20 insertions(+), 6 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> > index 52aa468bdccd..929e1e77ecc6 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
> > @@ -453,7 +453,7 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
> > xclose(s);
> > }
> >
> > -static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
> > +static void test_lookup_fd(int family, int sotype, int mapfd)
> > {
> > u32 key, value32;
> > int err, s;
> > @@ -466,7 +466,7 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
> > sizeof(value32), 1, 0);
> > if (mapfd < 0) {
> > FAIL_ERRNO("map_create");
> > - goto close;
> > + goto close_sock;
> > }
> >
> > key = 0;
> > @@ -475,11 +475,25 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
> >
> > errno = 0;
> > err = bpf_map_lookup_elem(mapfd, &key, &value32);
> > - if (!err || errno != ENOSPC)
> > - FAIL_ERRNO("map_lookup: expected ENOSPC");
> > + if (err) {
> > + FAIL_ERRNO("map_lookup");
> > + goto close_map;
> > + }
> >
> > + if ((int)value32 == s) {
> > + FAIL("return value is identical");
> > + goto close;
> > + }
> > +
> > + err = fcntl(value32, F_GETFD);
> > + if (err == -1)
> > + FAIL_ERRNO("fcntl");
>
> I would call getsockopt()/getsockname() to assert that the FD lookup
> succeeded. We want to know not only that it's an FD (-EBADFD case), but
> also that it's associated with a socket (-ENOTSOCK).
>
> We can go even further, and compare socket cookies to ensure we got an
> FD for the expected socket.
Good idea, thanks!
> Also, I'm wondering if we could keep the -ENOSPC case test-covered by
> temporarily dropping NET_ADMIN capability.
You mean EPERM? ENOSPC isn't reachable, since the map can only be created
with a map_value of 4 or 8.
>
> > +
> > +close:
> > + xclose(value32);
> > +close_map:
> > xclose(mapfd);
> > -close:
> > +close_sock:
> > xclose(s);
> > }
> >
> > @@ -1456,7 +1470,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
> > /* lookup */
> > TEST(test_lookup_after_insert),
> > TEST(test_lookup_after_delete),
> > - TEST(test_lookup_32_bit_value),
> > + TEST(test_lookup_fd),
> > /* update */
> > TEST(test_update_existing),
> > /* races with insert/delete */
--
Lorenz Bauer | Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
www.cloudflare.com
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/5] bpf: convert queue and stack map to map_copy_value
2020-03-11 14:00 ` Jakub Sitnicki
@ 2020-03-11 22:31 ` John Fastabend
0 siblings, 0 replies; 14+ messages in thread
From: John Fastabend @ 2020-03-11 22:31 UTC (permalink / raw)
To: Jakub Sitnicki, Lorenz Bauer
Cc: Alexei Starovoitov, Daniel Borkmann, kernel-team, netdev, bpf,
linux-kernel
Jakub Sitnicki wrote:
> On Tue, Mar 10, 2020 at 06:47 PM CET, Lorenz Bauer wrote:
> > Migrate BPF_MAP_TYPE_QUEUE and BPF_MAP_TYPE_STACK to map_copy_value,
> > by introducing small wrappers that discard the (unused) key argument.
> >
> > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> > ---
> > kernel/bpf/queue_stack_maps.c | 18 ++++++++++++++++++
> > kernel/bpf/syscall.c | 5 +----
> > 2 files changed, 19 insertions(+), 4 deletions(-)
> >
> > diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
> > index f697647ceb54..5c89b7583cd2 100644
> > --- a/kernel/bpf/queue_stack_maps.c
> > +++ b/kernel/bpf/queue_stack_maps.c
> > @@ -262,11 +262,28 @@ static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
> > return -EINVAL;
> > }
> >
> > +/* Called from syscall */
> > +static int queue_map_copy_value(struct bpf_map *map, void *key, void *value)
> > +{
> > + (void)key;
>
> Alternatively, there's is the __always_unused compiler attribute from
> include/linux/compiler_attributes.h that seems to be in wide use.
>
+1 use the attribute its much nicer imo.
> > +
> > + return queue_map_peek_elem(map, value);
> > +}
> > +
> > +/* Called from syscall */
> > +static int stack_map_copy_value(struct bpf_map *map, void *key, void *value)
> > +{
> > + (void)key;
> > +
> > + return stack_map_peek_elem(map, value);
> > +}
> > +
> > const struct bpf_map_ops queue_map_ops = {
> > .map_alloc_check = queue_stack_map_alloc_check,
> > .map_alloc = queue_stack_map_alloc,
> > .map_free = queue_stack_map_free,
> > .map_lookup_elem = queue_stack_map_lookup_elem,
> > + .map_copy_value = queue_map_copy_value,
> > .map_update_elem = queue_stack_map_update_elem,
> > .map_delete_elem = queue_stack_map_delete_elem,
> > .map_push_elem = queue_stack_map_push_elem,
> > @@ -280,6 +297,7 @@ const struct bpf_map_ops stack_map_ops = {
> > .map_alloc = queue_stack_map_alloc,
> > .map_free = queue_stack_map_free,
> > .map_lookup_elem = queue_stack_map_lookup_elem,
> > + .map_copy_value = stack_map_copy_value,
> > .map_update_elem = queue_stack_map_update_elem,
> > .map_delete_elem = queue_stack_map_delete_elem,
> > .map_push_elem = queue_stack_map_push_elem,
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 6503824e81e9..20c6cdace275 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -218,10 +218,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
> > return bpf_map_offload_lookup_elem(map, key, value);
> >
> > bpf_disable_instrumentation();
> > - if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> > - map->map_type == BPF_MAP_TYPE_STACK) {
> > - err = map->ops->map_peek_elem(map, value);
> > - } else if (map->ops->map_copy_value) {
> > + if (map->ops->map_copy_value) {
> > err = map->ops->map_copy_value(map, key, value);
> > } else {
> > rcu_read_lock();
^ permalink raw reply [flat|nested] 14+ messages in thread
* RE: [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup
2020-03-10 17:47 ` [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup Lorenz Bauer
@ 2020-03-11 23:27 ` John Fastabend
2020-03-17 10:17 ` Lorenz Bauer
2020-03-17 15:18 ` Jakub Sitnicki
1 sibling, 1 reply; 14+ messages in thread
From: John Fastabend @ 2020-03-11 23:27 UTC (permalink / raw)
To: Lorenz Bauer, John Fastabend, Daniel Borkmann, Jakub Sitnicki,
Lorenz Bauer, David S. Miller, Jakub Kicinski,
Alexei Starovoitov
Cc: kernel-team, netdev, bpf, linux-kernel
Lorenz Bauer wrote:
> Allow callers with CAP_NET_ADMIN to retrieve file descriptors from a
> sockmap and sockhash. O_CLOEXEC is enforced on all fds.
>
> Without this, it's difficult to resize or otherwise rebuild existing
> sockmap or sockhashes.
>
> Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
> net/core/sock_map.c | 19 +++++++++++++++++++
> 1 file changed, 19 insertions(+)
>
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index 03e04426cd21..3228936aa31e 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -347,12 +347,31 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
> static int __sock_map_copy_value(struct bpf_map *map, struct sock *sk,
> void *value)
> {
> + struct file *file;
> + int fd;
> +
> switch (map->value_size) {
> case sizeof(u64):
> sock_gen_cookie(sk);
> *(u64 *)value = atomic64_read(&sk->sk_cookie);
> return 0;
>
> + case sizeof(u32):
> + if (!capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + fd = get_unused_fd_flags(O_CLOEXEC);
> + if (unlikely(fd < 0))
> + return fd;
> +
> + read_lock_bh(&sk->sk_callback_lock);
> + file = get_file(sk->sk_socket->file);
> + read_unlock_bh(&sk->sk_callback_lock);
> +
> + fd_install(fd, file);
> + *(u32 *)value = fd;
> + return 0;
> +
Hi Lorenz, Can you say something about what happens if the sk
is deleted from the map or the sock is closed/unhashed ideally
in the commit message so we have it for later reference. I guess
because we are in an rcu block here the sk will be OK and psock
reference will exist until after the rcu block at least because
of call_rcu(). If the psock is destroyed from another path then
the fd will still point at the sock. correct?
Thanks.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup
2020-03-11 23:27 ` John Fastabend
@ 2020-03-17 10:17 ` Lorenz Bauer
0 siblings, 0 replies; 14+ messages in thread
From: Lorenz Bauer @ 2020-03-17 10:17 UTC (permalink / raw)
To: John Fastabend
Cc: Daniel Borkmann, Jakub Sitnicki, David S. Miller, Jakub Kicinski,
Alexei Starovoitov, kernel-team, Networking, bpf, linux-kernel
On Wed, 11 Mar 2020 at 23:27, John Fastabend <john.fastabend@gmail.com> wrote:
>
> Lorenz Bauer wrote:
> > Allow callers with CAP_NET_ADMIN to retrieve file descriptors from a
> > sockmap and sockhash. O_CLOEXEC is enforced on all fds.
> >
> > Without this, it's difficult to resize or otherwise rebuild existing
> > sockmap or sockhashes.
> >
> > Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
> > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> > ---
> > net/core/sock_map.c | 19 +++++++++++++++++++
> > 1 file changed, 19 insertions(+)
> >
> > diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> > index 03e04426cd21..3228936aa31e 100644
> > --- a/net/core/sock_map.c
> > +++ b/net/core/sock_map.c
> > @@ -347,12 +347,31 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
> > static int __sock_map_copy_value(struct bpf_map *map, struct sock *sk,
> > void *value)
> > {
> > + struct file *file;
> > + int fd;
> > +
> > switch (map->value_size) {
> > case sizeof(u64):
> > sock_gen_cookie(sk);
> > *(u64 *)value = atomic64_read(&sk->sk_cookie);
> > return 0;
> >
> > + case sizeof(u32):
> > + if (!capable(CAP_NET_ADMIN))
> > + return -EPERM;
> > +
> > + fd = get_unused_fd_flags(O_CLOEXEC);
> > + if (unlikely(fd < 0))
> > + return fd;
> > +
> > + read_lock_bh(&sk->sk_callback_lock);
> > + file = get_file(sk->sk_socket->file);
> > + read_unlock_bh(&sk->sk_callback_lock);
> > +
> > + fd_install(fd, file);
> > + *(u32 *)value = fd;
> > + return 0;
> > +
>
> Hi Lorenz, Can you say something about what happens if the sk
> is deleted from the map or the sock is closed/unhashed ideally
> in the commit message so we have it for later reference. I guess
> because we are in an rcu block here the sk will be OK and psock
> reference will exist until after the rcu block at least because
> of call_rcu(). If the psock is destroyed from another path then
> the fd will still point at the sock. correct?
This is how I understand it:
* sk is protected by rcu_read_lock (as you point out)
* sk->sk_callback_lock protects against sk->sk_socket being
modified by sock_orphan, sock_graft, etc. via sk_set_socket
* get_file increments the refcount on the file
I'm not sure how the psock figures into this, maybe you can
elaborate a little?
--
Lorenz Bauer | Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
www.cloudflare.com
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup
2020-03-10 17:47 ` [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup Lorenz Bauer
2020-03-11 23:27 ` John Fastabend
@ 2020-03-17 15:18 ` Jakub Sitnicki
2020-03-17 18:16 ` John Fastabend
1 sibling, 1 reply; 14+ messages in thread
From: Jakub Sitnicki @ 2020-03-17 15:18 UTC (permalink / raw)
To: Lorenz Bauer
Cc: John Fastabend, Daniel Borkmann, David S. Miller, Jakub Kicinski,
Alexei Starovoitov, kernel-team, netdev, bpf, linux-kernel
On Tue, Mar 10, 2020 at 06:47 PM CET, Lorenz Bauer wrote:
> Allow callers with CAP_NET_ADMIN to retrieve file descriptors from a
> sockmap and sockhash. O_CLOEXEC is enforced on all fds.
>
> Without this, it's difficult to resize or otherwise rebuild existing
> sockmap or sockhashes.
>
> Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> ---
> net/core/sock_map.c | 19 +++++++++++++++++++
> 1 file changed, 19 insertions(+)
>
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index 03e04426cd21..3228936aa31e 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -347,12 +347,31 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
> static int __sock_map_copy_value(struct bpf_map *map, struct sock *sk,
> void *value)
> {
> + struct file *file;
> + int fd;
> +
> switch (map->value_size) {
> case sizeof(u64):
> sock_gen_cookie(sk);
> *(u64 *)value = atomic64_read(&sk->sk_cookie);
> return 0;
>
> + case sizeof(u32):
> + if (!capable(CAP_NET_ADMIN))
> + return -EPERM;
> +
> + fd = get_unused_fd_flags(O_CLOEXEC);
> + if (unlikely(fd < 0))
> + return fd;
> +
> + read_lock_bh(&sk->sk_callback_lock);
> + file = get_file(sk->sk_socket->file);
I think this deserves a second look.
We don't lock the sock, so what if tcp_close orphans it before we enter
this critical section? Looks like sk->sk_socket might be NULL.
I'd find a test that tries to trigger the race helpful, like:
thread A: loop in lookup FD from map
thread B: loop in insert FD into map, close FD
> + read_unlock_bh(&sk->sk_callback_lock);
> +
> + fd_install(fd, file);
> + *(u32 *)value = fd;
> + return 0;
> +
> default:
> return -ENOSPC;
> }
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup
2020-03-17 15:18 ` Jakub Sitnicki
@ 2020-03-17 18:16 ` John Fastabend
0 siblings, 0 replies; 14+ messages in thread
From: John Fastabend @ 2020-03-17 18:16 UTC (permalink / raw)
To: Jakub Sitnicki, Lorenz Bauer
Cc: John Fastabend, Daniel Borkmann, David S. Miller, Jakub Kicinski,
Alexei Starovoitov, kernel-team, netdev, bpf, linux-kernel
Jakub Sitnicki wrote:
> On Tue, Mar 10, 2020 at 06:47 PM CET, Lorenz Bauer wrote:
> > Allow callers with CAP_NET_ADMIN to retrieve file descriptors from a
> > sockmap and sockhash. O_CLOEXEC is enforced on all fds.
> >
> > Without this, it's difficult to resize or otherwise rebuild existing
> > sockmap or sockhashes.
> >
> > Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
> > Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
> > ---
> > net/core/sock_map.c | 19 +++++++++++++++++++
> > 1 file changed, 19 insertions(+)
> >
> > diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> > index 03e04426cd21..3228936aa31e 100644
> > --- a/net/core/sock_map.c
> > +++ b/net/core/sock_map.c
> > @@ -347,12 +347,31 @@ static void *sock_map_lookup(struct bpf_map *map, void *key)
> > static int __sock_map_copy_value(struct bpf_map *map, struct sock *sk,
> > void *value)
> > {
> > + struct file *file;
> > + int fd;
> > +
> > switch (map->value_size) {
> > case sizeof(u64):
> > sock_gen_cookie(sk);
> > *(u64 *)value = atomic64_read(&sk->sk_cookie);
> > return 0;
> >
> > + case sizeof(u32):
> > + if (!capable(CAP_NET_ADMIN))
> > + return -EPERM;
> > +
> > + fd = get_unused_fd_flags(O_CLOEXEC);
> > + if (unlikely(fd < 0))
> > + return fd;
> > +
> > + read_lock_bh(&sk->sk_callback_lock);
> > + file = get_file(sk->sk_socket->file);
>
> I think this deserves a second look.
>
> We don't lock the sock, so what if tcp_close orphans it before we enter
> this critical section? Looks like sk->sk_socket might be NULL.
>
> I'd find a test that tries to trigger the race helpful, like:
>
> thread A: loop in lookup FD from map
> thread B: loop in insert FD into map, close FD
Agreed, this was essentially my question above as well.
When the psock is created we call sock_hold() and will only do a sock_put()
after an rcu grace period when its removed. So at least if you have the
sock here it should have a sk_refcnt. (Note the user data is set to NULL
so if you do reference psock you need to check its non-null.)
Is that enough to ensure sk_socket? Seems not to me, tcp_close for example
will still happen and call sock_orphan(sk) based on my admittddly quick
look.
Further, even if you do check sk->sk_socket is non-null what does it mean
to return a file with a socket that is closed, deleted from the sock_map
and psock removed? At this point is it just a dangling reference?
Still a bit confused as well what would or should happen when the sock is closed
after you have the file reference? I could probably dig up what exactly
would happen but I think we need it in the commiit message so we understand
it. I also didn't dig up the details here but if the receiver of the
fd crashes or otherwise disappears this hopefully all get cleaned up?
>
> > + read_unlock_bh(&sk->sk_callback_lock);
> > +
> > + fd_install(fd, file);
> > + *(u32 *)value = fd;
> > + return 0;
> > +
> > default:
> > return -ENOSPC;
> > }
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2020-03-17 18:16 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20200310174711.7490-1-lmb@cloudflare.com>
2020-03-10 17:47 ` [PATCH 1/5] bpf: add map_copy_value hook Lorenz Bauer
2020-03-10 17:47 ` [PATCH 2/5] bpf: convert queue and stack map to map_copy_value Lorenz Bauer
2020-03-11 14:00 ` Jakub Sitnicki
2020-03-11 22:31 ` John Fastabend
2020-03-10 17:47 ` [PATCH 3/5] bpf: convert sock map and hash " Lorenz Bauer
2020-03-11 13:55 ` Jakub Sitnicki
2020-03-10 17:47 ` [PATCH 4/5] bpf: sockmap, sockhash: return file descriptors from privileged lookup Lorenz Bauer
2020-03-11 23:27 ` John Fastabend
2020-03-17 10:17 ` Lorenz Bauer
2020-03-17 15:18 ` Jakub Sitnicki
2020-03-17 18:16 ` John Fastabend
2020-03-10 17:47 ` [PATCH 5/5] bpf: sockmap, sockhash: test looking up fds Lorenz Bauer
2020-03-11 13:52 ` Jakub Sitnicki
2020-03-11 17:24 ` Lorenz Bauer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).