LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH v3 1/3] Move *_ucounts functions above
@ 2019-05-31 19:50 Albert Vaca Cintora
  2019-05-31 19:50 ` [PATCH v3 2/3] kernel/ucounts: expose count of inotify watches in use Albert Vaca Cintora
  2019-05-31 19:50 ` [PATCH v3 3/3] Documentation for /proc/sys/user/*_inotify_* Albert Vaca Cintora
  0 siblings, 2 replies; 6+ messages in thread
From: Albert Vaca Cintora @ 2019-05-31 19:50 UTC (permalink / raw)
  To: albertvaka, akpm, rdunlap, mingo, jack, ebiederm, nsaenzjulienne,
	linux-kernel, corbet, linux-doc, mbrugger

So we can use them from proc_handler functions in user_table

Signed-off-by: Albert Vaca Cintora <albertvaka@gmail.com>
---
 kernel/ucount.c | 122 ++++++++++++++++++++++++------------------------
 1 file changed, 61 insertions(+), 61 deletions(-)

diff --git a/kernel/ucount.c b/kernel/ucount.c
index f48d1b6376a4..909c856e809f 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -57,6 +57,67 @@ static struct ctl_table_root set_root = {
 	.permissions = set_permissions,
 };
 
+static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
+{
+	struct ucounts *ucounts;
+
+	hlist_for_each_entry(ucounts, hashent, node) {
+		if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
+			return ucounts;
+	}
+	return NULL;
+}
+
+static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
+{
+	struct hlist_head *hashent = ucounts_hashentry(ns, uid);
+	struct ucounts *ucounts, *new;
+
+	spin_lock_irq(&ucounts_lock);
+	ucounts = find_ucounts(ns, uid, hashent);
+	if (!ucounts) {
+		spin_unlock_irq(&ucounts_lock);
+
+		new = kzalloc(sizeof(*new), GFP_KERNEL);
+		if (!new)
+			return NULL;
+
+		new->ns = ns;
+		new->uid = uid;
+		new->count = 0;
+
+		spin_lock_irq(&ucounts_lock);
+		ucounts = find_ucounts(ns, uid, hashent);
+		if (ucounts) {
+			kfree(new);
+		} else {
+			hlist_add_head(&new->node, hashent);
+			ucounts = new;
+		}
+	}
+	if (ucounts->count == INT_MAX)
+		ucounts = NULL;
+	else
+		ucounts->count += 1;
+	spin_unlock_irq(&ucounts_lock);
+	return ucounts;
+}
+
+static void put_ucounts(struct ucounts *ucounts)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ucounts_lock, flags);
+	ucounts->count -= 1;
+	if (!ucounts->count)
+		hlist_del_init(&ucounts->node);
+	else
+		ucounts = NULL;
+	spin_unlock_irqrestore(&ucounts_lock, flags);
+
+	kfree(ucounts);
+}
+
 static int zero = 0;
 static int int_max = INT_MAX;
 #define UCOUNT_ENTRY(name)				\
@@ -118,67 +179,6 @@ void retire_userns_sysctls(struct user_namespace *ns)
 #endif
 }
 
-static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
-{
-	struct ucounts *ucounts;
-
-	hlist_for_each_entry(ucounts, hashent, node) {
-		if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
-			return ucounts;
-	}
-	return NULL;
-}
-
-static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
-{
-	struct hlist_head *hashent = ucounts_hashentry(ns, uid);
-	struct ucounts *ucounts, *new;
-
-	spin_lock_irq(&ucounts_lock);
-	ucounts = find_ucounts(ns, uid, hashent);
-	if (!ucounts) {
-		spin_unlock_irq(&ucounts_lock);
-
-		new = kzalloc(sizeof(*new), GFP_KERNEL);
-		if (!new)
-			return NULL;
-
-		new->ns = ns;
-		new->uid = uid;
-		new->count = 0;
-
-		spin_lock_irq(&ucounts_lock);
-		ucounts = find_ucounts(ns, uid, hashent);
-		if (ucounts) {
-			kfree(new);
-		} else {
-			hlist_add_head(&new->node, hashent);
-			ucounts = new;
-		}
-	}
-	if (ucounts->count == INT_MAX)
-		ucounts = NULL;
-	else
-		ucounts->count += 1;
-	spin_unlock_irq(&ucounts_lock);
-	return ucounts;
-}
-
-static void put_ucounts(struct ucounts *ucounts)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ucounts_lock, flags);
-	ucounts->count -= 1;
-	if (!ucounts->count)
-		hlist_del_init(&ucounts->node);
-	else
-		ucounts = NULL;
-	spin_unlock_irqrestore(&ucounts_lock, flags);
-
-	kfree(ucounts);
-}
-
 static inline bool atomic_inc_below(atomic_t *v, int u)
 {
 	int c, old;
-- 
2.21.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3 2/3] kernel/ucounts: expose count of inotify watches in use
  2019-05-31 19:50 [PATCH v3 1/3] Move *_ucounts functions above Albert Vaca Cintora
@ 2019-05-31 19:50 ` Albert Vaca Cintora
  2019-06-01  0:00   ` Andrew Morton
  2019-05-31 19:50 ` [PATCH v3 3/3] Documentation for /proc/sys/user/*_inotify_* Albert Vaca Cintora
  1 sibling, 1 reply; 6+ messages in thread
From: Albert Vaca Cintora @ 2019-05-31 19:50 UTC (permalink / raw)
  To: albertvaka, akpm, rdunlap, mingo, jack, ebiederm, nsaenzjulienne,
	linux-kernel, corbet, linux-doc, mbrugger

Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
The handler for this entry is a custom function that ends up calling
proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
and it gets mounted under /proc/sys/user/.

Inotify watches are a finite resource, in a similar way to available file
descriptors. The motivation for this patch is to be able to set up
monitoring and alerting before an application starts failing because
it runs out of inotify watches.

Signed-off-by: Albert Vaca Cintora <albertvaka@gmail.com>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
---
 kernel/ucount.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/kernel/ucount.c b/kernel/ucount.c
index 909c856e809f..05b0e76208d3 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -118,6 +118,26 @@ static void put_ucounts(struct ucounts *ucounts)
 	kfree(ucounts);
 }
 
+#ifdef CONFIG_INOTIFY_USER
+int proc_read_inotify_watches(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ucounts *ucounts;
+	struct ctl_table fake_table;
+	int count = -1;
+
+	ucounts = get_ucounts(current_user_ns(), current_euid());
+	if (ucounts != NULL) {
+		count = atomic_read(&ucounts->ucount[UCOUNT_INOTIFY_WATCHES]);
+		put_ucounts(ucounts);
+	}
+
+	fake_table.data = &count;
+	fake_table.maxlen = sizeof(count);
+	return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
+}
+#endif
+
 static int zero = 0;
 static int int_max = INT_MAX;
 #define UCOUNT_ENTRY(name)				\
@@ -140,6 +160,12 @@ static struct ctl_table user_table[] = {
 #ifdef CONFIG_INOTIFY_USER
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
+	{
+		.procname	= "current_inotify_watches",
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= proc_read_inotify_watches,
+	},
 #endif
 	{ }
 };
-- 
2.21.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v3 3/3] Documentation for /proc/sys/user/*_inotify_*
  2019-05-31 19:50 [PATCH v3 1/3] Move *_ucounts functions above Albert Vaca Cintora
  2019-05-31 19:50 ` [PATCH v3 2/3] kernel/ucounts: expose count of inotify watches in use Albert Vaca Cintora
@ 2019-05-31 19:50 ` Albert Vaca Cintora
  1 sibling, 0 replies; 6+ messages in thread
From: Albert Vaca Cintora @ 2019-05-31 19:50 UTC (permalink / raw)
  To: albertvaka, akpm, rdunlap, mingo, jack, ebiederm, nsaenzjulienne,
	linux-kernel, corbet, linux-doc, mbrugger

Added docs for the existing and new inotify-related files

Signed-off-by: Albert Vaca Cintora <albertvaka@gmail.com>
---
 Documentation/sysctl/user.txt | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/Documentation/sysctl/user.txt b/Documentation/sysctl/user.txt
index a5882865836e..99c288d39cf6 100644
--- a/Documentation/sysctl/user.txt
+++ b/Documentation/sysctl/user.txt
@@ -30,11 +30,26 @@ user namespace does not allow a user to escape their current limits.
 
 Currently, these files are in /proc/sys/user:
 
+- current_inotify_watches
+
+  The number of inotify watches in use in the current user namespace.
+  Calling inotify_add_watch() increases this.
+
 - max_cgroup_namespaces
 
   The maximum number of cgroup namespaces that any user in the current
   user namespace may create.
 
+- max_inotify_instances
+
+  The maximum number of inotify instances that any user in the current
+  user namespace may create. Calling inotify_init() uses an instance.
+
+- max_inotify_watches
+
+  The maximum number of inotify watches that any user in the current
+  user namespace may create. Calling inotify_add_watch() uses a watch.
+
 - max_ipc_namespaces
 
   The maximum number of ipc namespaces that any user in the current
-- 
2.21.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 2/3] kernel/ucounts: expose count of inotify watches in use
  2019-05-31 19:50 ` [PATCH v3 2/3] kernel/ucounts: expose count of inotify watches in use Albert Vaca Cintora
@ 2019-06-01  0:00   ` Andrew Morton
  2019-06-01 18:20     ` Albert Vaca Cintora
  0 siblings, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2019-06-01  0:00 UTC (permalink / raw)
  To: Albert Vaca Cintora
  Cc: rdunlap, mingo, jack, ebiederm, nsaenzjulienne, linux-kernel,
	corbet, linux-doc, mbrugger

On Fri, 31 May 2019 21:50:15 +0200 Albert Vaca Cintora <albertvaka@gmail.com> wrote:

> Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
> The handler for this entry is a custom function that ends up calling
> proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
> and it gets mounted under /proc/sys/user/.
> 
> Inotify watches are a finite resource, in a similar way to available file
> descriptors. The motivation for this patch is to be able to set up
> monitoring and alerting before an application starts failing because
> it runs out of inotify watches.
> 
> ...
>
> --- a/kernel/ucount.c
> +++ b/kernel/ucount.c
> @@ -118,6 +118,26 @@ static void put_ucounts(struct ucounts *ucounts)
>  	kfree(ucounts);
>  }
>  
> +#ifdef CONFIG_INOTIFY_USER
> +int proc_read_inotify_watches(struct ctl_table *table, int write,
> +		     void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +	struct ucounts *ucounts;
> +	struct ctl_table fake_table;

hmm.

> +	int count = -1;
> +
> +	ucounts = get_ucounts(current_user_ns(), current_euid());
> +	if (ucounts != NULL) {
> +		count = atomic_read(&ucounts->ucount[UCOUNT_INOTIFY_WATCHES]);
> +		put_ucounts(ucounts);
> +	}
> +
> +	fake_table.data = &count;
> +	fake_table.maxlen = sizeof(count);
> +	return proc_dointvec(&fake_table, write, buffer, lenp, ppos);

proc_dointvec
->do_proc_dointvec
  ->__do_proc_dointvec
    ->proc_first_pos_non_zero_ignore
      ->warn_sysctl_write
        ->pr_warn_once(..., table->procname)

and I think ->procname is uninitialized.

That's from a cursory check.  Perhaps other uninitialized members of
fake_table are accessed, dunno.

we could do

	{
		struct ctl_table fake_table = {
			.data = &count,
			.maxlen = sizeof(count),
		};

		return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
	}

or whatever.  That will cause the pr_warn_once to print "(null)" but
that's OK I guess.

Are there other places in the kernel which do this temp ctl_table
trick?  If so, what do they do?  If not, what is special about this
code?



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 2/3] kernel/ucounts: expose count of inotify watches in use
  2019-06-01  0:00   ` Andrew Morton
@ 2019-06-01 18:20     ` Albert Vaca Cintora
  2019-10-16 18:47       ` Albert Vaca Cintora
  0 siblings, 1 reply; 6+ messages in thread
From: Albert Vaca Cintora @ 2019-06-01 18:20 UTC (permalink / raw)
  To: Andrew Morton
  Cc: rdunlap, mingo, Jan Kara, ebiederm, Nicolas Saenz Julienne,
	linux-kernel, corbet, linux-doc, Matthias Brugger

On Sat, Jun 1, 2019 at 2:00 AM Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Fri, 31 May 2019 21:50:15 +0200 Albert Vaca Cintora <albertvaka@gmail.com> wrote:
>
> > Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
> > The handler for this entry is a custom function that ends up calling
> > proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
> > and it gets mounted under /proc/sys/user/.
> >
> > Inotify watches are a finite resource, in a similar way to available file
> > descriptors. The motivation for this patch is to be able to set up
> > monitoring and alerting before an application starts failing because
> > it runs out of inotify watches.
> >
> > ...
> >
> > --- a/kernel/ucount.c
> > +++ b/kernel/ucount.c
> > @@ -118,6 +118,26 @@ static void put_ucounts(struct ucounts *ucounts)
> >       kfree(ucounts);
> >  }
> >
> > +#ifdef CONFIG_INOTIFY_USER
> > +int proc_read_inotify_watches(struct ctl_table *table, int write,
> > +                  void __user *buffer, size_t *lenp, loff_t *ppos)
> > +{
> > +     struct ucounts *ucounts;
> > +     struct ctl_table fake_table;
>
> hmm.
>
> > +     int count = -1;
> > +
> > +     ucounts = get_ucounts(current_user_ns(), current_euid());
> > +     if (ucounts != NULL) {
> > +             count = atomic_read(&ucounts->ucount[UCOUNT_INOTIFY_WATCHES]);
> > +             put_ucounts(ucounts);
> > +     }
> > +
> > +     fake_table.data = &count;
> > +     fake_table.maxlen = sizeof(count);
> > +     return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
>
> proc_dointvec
> ->do_proc_dointvec
>   ->__do_proc_dointvec
>     ->proc_first_pos_non_zero_ignore
>       ->warn_sysctl_write
>         ->pr_warn_once(..., table->procname)
>
> and I think ->procname is uninitialized.
>
> That's from a cursory check.  Perhaps other uninitialized members of
> fake_table are accessed, dunno.
>
> we could do
>
>         {
>                 struct ctl_table fake_table = {
>                         .data = &count,
>                         .maxlen = sizeof(count),
>                 };
>
>                 return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
>         }
>
> or whatever.  That will cause the pr_warn_once to print "(null)" but
> that's OK I guess.
>
> Are there other places in the kernel which do this temp ctl_table
> trick?  If so, what do they do?  If not, what is special about this
> code?
>
>

I copied this 'fake_table' trick from proc_do_entropy in
drivers/char/random.c exactly as it is. It is also used in other
places with slight variations.

Note that, since we are creating a read-only proc file,
proc_first_pos_non_zero_ignore is not called from __do_proc_dointvec,
so the uninitialized ->procname is not accessed.

Albert

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 2/3] kernel/ucounts: expose count of inotify watches in use
  2019-06-01 18:20     ` Albert Vaca Cintora
@ 2019-10-16 18:47       ` Albert Vaca Cintora
  0 siblings, 0 replies; 6+ messages in thread
From: Albert Vaca Cintora @ 2019-10-16 18:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: rdunlap, mingo, Jan Kara, ebiederm, Nicolas Saenz Julienne,
	linux-kernel, corbet, linux-doc, Matthias Brugger

 On Sat, Jun 1, 2019 at 8:20 PM Albert Vaca Cintora
<albertvaka@gmail.com> wrote:
>
> On Sat, Jun 1, 2019 at 2:00 AM Andrew Morton <akpm@linux-foundation.org> wrote:
> >
> > On Fri, 31 May 2019 21:50:15 +0200 Albert Vaca Cintora <albertvaka@gmail.com> wrote:
> >
> > > Adds a readonly 'current_inotify_watches' entry to the user sysctl table.
> > > The handler for this entry is a custom function that ends up calling
> > > proc_dointvec. Said sysctl table already contains 'max_inotify_watches'
> > > and it gets mounted under /proc/sys/user/.
> > >
> > > Inotify watches are a finite resource, in a similar way to available file
> > > descriptors. The motivation for this patch is to be able to set up
> > > monitoring and alerting before an application starts failing because
> > > it runs out of inotify watches.
> > >
> > > ...
> > >
> > > --- a/kernel/ucount.c
> > > +++ b/kernel/ucount.c
> > > @@ -118,6 +118,26 @@ static void put_ucounts(struct ucounts *ucounts)
> > >       kfree(ucounts);
> > >  }
> > >
> > > +#ifdef CONFIG_INOTIFY_USER
> > > +int proc_read_inotify_watches(struct ctl_table *table, int write,
> > > +                  void __user *buffer, size_t *lenp, loff_t *ppos)
> > > +{
> > > +     struct ucounts *ucounts;
> > > +     struct ctl_table fake_table;
> >
> > hmm.
> >
> > > +     int count = -1;
> > > +
> > > +     ucounts = get_ucounts(current_user_ns(), current_euid());
> > > +     if (ucounts != NULL) {
> > > +             count = atomic_read(&ucounts->ucount[UCOUNT_INOTIFY_WATCHES]);
> > > +             put_ucounts(ucounts);
> > > +     }
> > > +
> > > +     fake_table.data = &count;
> > > +     fake_table.maxlen = sizeof(count);
> > > +     return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
> >
> > proc_dointvec
> > ->do_proc_dointvec
> >   ->__do_proc_dointvec
> >     ->proc_first_pos_non_zero_ignore
> >       ->warn_sysctl_write
> >         ->pr_warn_once(..., table->procname)
> >
> > and I think ->procname is uninitialized.
> >
> > That's from a cursory check.  Perhaps other uninitialized members of
> > fake_table are accessed, dunno.
> >
> > we could do
> >
> >         {
> >                 struct ctl_table fake_table = {
> >                         .data = &count,
> >                         .maxlen = sizeof(count),
> >                 };
> >
> >                 return proc_dointvec(&fake_table, write, buffer, lenp, ppos);
> >         }
> >
> > or whatever.  That will cause the pr_warn_once to print "(null)" but
> > that's OK I guess.
> >
> > Are there other places in the kernel which do this temp ctl_table
> > trick?  If so, what do they do?  If not, what is special about this
> > code?
> >
> >
>
> I copied this 'fake_table' trick from proc_do_entropy in
> drivers/char/random.c exactly as it is. It is also used in other
> places with slight variations.
>
> Note that, since we are creating a read-only proc file,
> proc_first_pos_non_zero_ignore is not called from __do_proc_dointvec,
> so the uninitialized ->procname is not accessed.
>

Friendly ping. I think the code is correct as it is for the reasons
explained above.

Best regards,
Albert

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-10-16 18:47 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-31 19:50 [PATCH v3 1/3] Move *_ucounts functions above Albert Vaca Cintora
2019-05-31 19:50 ` [PATCH v3 2/3] kernel/ucounts: expose count of inotify watches in use Albert Vaca Cintora
2019-06-01  0:00   ` Andrew Morton
2019-06-01 18:20     ` Albert Vaca Cintora
2019-10-16 18:47       ` Albert Vaca Cintora
2019-05-31 19:50 ` [PATCH v3 3/3] Documentation for /proc/sys/user/*_inotify_* Albert Vaca Cintora

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).