LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: vikas.shivappa@intel.com
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, hpa@zytor.com,
	tglx@linutronix.de, mingo@kernel.org, tj@kernel.org,
	peterz@infradead.org, matt.fleming@intel.com,
	will.auld@intel.com, glenn.p.williamson@intel.com,
	kanaka.d.juvva@intel.com, vikas.shivappa@linux.intel.com
Subject: [PATCH 3/7] x86/intel_rdt: Support cache bit mask for Intel CAT
Date: Thu, 12 Mar 2015 16:16:03 -0700	[thread overview]
Message-ID: <1426202167-30598-4-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1426202167-30598-1-git-send-email-vikas.shivappa@linux.intel.com>

Add support for cache bit mask manipulation. The change adds a file to
the RDT cgroup which represents the CBM(cache bit mask) for the cgroup.

The RDT cgroup follows cgroup hierarchy ,mkdir and adding tasks to the
cgroup never fails.  When a child cgroup is created it inherits the
CLOSid and the CBM from its parent.  When a user changes the default
CBM for a cgroup, a new CLOSid may be allocated if the CBM was not
used before. If the new CBM is the one that is already used, the
count for that CLOSid<->CBM is incremented. The changing of 'cbm'
may fail with -ENOSPC once the kernel runs out of maximum CLOSids it
can support.
User can create as many cgroups as he wants but having different CBMs
at the same time is restricted by the maximum number of CLOSids
(multiple cgroups can have the same CBM).
Kernel maintains a CLOSid<->cbm mapping which keeps count
of cgroups using a CLOSid.

The tasks in the CAT cgroup would get to fill the LLC cache represented
by the cgroup's 'cbm' file.

Reuse of CLOSids for cgroups with same bitmask also has following
advantages:
- This helps to use the scant CLOSids optimally.
- This also implies that during context switch, write to PQR-MSR is done
only when a task with a different bitmask is scheduled in.

Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
 arch/x86/include/asm/intel_rdt.h |   3 +
 arch/x86/kernel/cpu/intel_rdt.c  | 205 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 87af1a5..0ed28d9 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -4,6 +4,9 @@
 #ifdef CONFIG_CGROUP_RDT
 
 #include <linux/cgroup.h>
+#define MAX_CBM_LENGTH			32
+#define IA32_L3_CBM_BASE		0xc90
+#define CBM_FROM_INDEX(x)		(IA32_L3_CBM_BASE + x)
 
 struct rdt_subsys_info {
 	/* Clos Bitmap to keep track of available CLOSids.*/
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 3726f41..495497a 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -33,6 +33,9 @@ static struct rdt_subsys_info rdtss_info;
 static DEFINE_MUTEX(rdt_group_mutex);
 struct intel_rdt rdt_root_group;
 
+#define rdt_for_each_child(pos_css, parent_ir)		\
+	css_for_each_child((pos_css), &(parent_ir)->css)
+
 static inline bool cat_supported(struct cpuinfo_x86 *c)
 {
 	if (cpu_has(c, X86_FEATURE_CAT_L3))
@@ -83,6 +86,31 @@ static int __init rdt_late_init(void)
 late_initcall(rdt_late_init);
 
 /*
+ * Allocates a new closid from unused closids.
+ * Called with the rdt_group_mutex held.
+ */
+
+static int rdt_alloc_closid(struct intel_rdt *ir)
+{
+	unsigned int id;
+	unsigned int maxid;
+
+	lockdep_assert_held(&rdt_group_mutex);
+
+	maxid = boot_cpu_data.x86_cat_closs;
+	id = find_next_zero_bit(rdtss_info.closmap, maxid, 0);
+	if (id == maxid)
+		return -ENOSPC;
+
+	set_bit(id, rdtss_info.closmap);
+	WARN_ON(ccmap[id].cgrp_count);
+	ccmap[id].cgrp_count++;
+	ir->clos = id;
+
+	return 0;
+}
+
+/*
 * Called with the rdt_group_mutex held.
 */
 static int rdt_free_closid(struct intel_rdt *ir)
@@ -133,8 +161,185 @@ static void rdt_css_free(struct cgroup_subsys_state *css)
 	mutex_unlock(&rdt_group_mutex);
 }
 
+/*
+ * Tests if atleast two contiguous bits are set.
+ */
+
+static inline bool cbm_is_contiguous(unsigned long var)
+{
+	unsigned long first_bit, zero_bit;
+	unsigned long maxcbm = MAX_CBM_LENGTH;
+
+	if (bitmap_weight(&var, maxcbm) < 2)
+		return false;
+
+	first_bit = find_next_bit(&var, maxcbm, 0);
+	zero_bit = find_next_zero_bit(&var, maxcbm, first_bit);
+
+	if (find_next_bit(&var, maxcbm, zero_bit) < maxcbm)
+		return false;
+
+	return true;
+}
+
+static int cat_cbm_read(struct seq_file *m, void *v)
+{
+	struct intel_rdt *ir = css_rdt(seq_css(m));
+
+	seq_printf(m, "%08lx\n", ccmap[ir->clos].cbm);
+	return 0;
+}
+
+static int validate_cbm(struct intel_rdt *ir, unsigned long cbmvalue)
+{
+	struct intel_rdt *par, *c;
+	struct cgroup_subsys_state *css;
+	unsigned long *cbm_tmp;
+
+	if (!cbm_is_contiguous(cbmvalue)) {
+		pr_info("cbm should have >= 2 bits and be contiguous\n");
+		return -EINVAL;
+	}
+
+	par = parent_rdt(ir);
+	cbm_tmp = &ccmap[par->clos].cbm;
+	if (!bitmap_subset(&cbmvalue, cbm_tmp, MAX_CBM_LENGTH))
+		return -EINVAL;
+
+	rcu_read_lock();
+	rdt_for_each_child(css, ir) {
+		c = css_rdt(css);
+		cbm_tmp = &ccmap[c->clos].cbm;
+		if (!bitmap_subset(cbm_tmp, &cbmvalue, MAX_CBM_LENGTH)) {
+			pr_info("Children's mask not a subset\n");
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+	}
+
+	rcu_read_unlock();
+	return 0;
+}
+
+static bool cbm_search(unsigned long cbm, int *closid)
+{
+	int maxid = boot_cpu_data.x86_cat_closs;
+	unsigned int i;
+
+	for (i = 0; i < maxid; i++)
+		if (bitmap_equal(&cbm, &ccmap[i].cbm, MAX_CBM_LENGTH)) {
+			*closid = i;
+			return true;
+		}
+
+	return false;
+}
+
+static void cbmmap_dump(void)
+{
+	int i;
+
+	pr_debug("CBMMAP\n");
+	for (i = 0; i < boot_cpu_data.x86_cat_closs; i++)
+		pr_debug("cbm: 0x%x,cgrp_count: %u\n",
+		 (unsigned int)ccmap[i].cbm, ccmap[i].cgrp_count);
+}
+
+static void cpu_cbm_update(void *info)
+{
+	unsigned int closid = *((unsigned int *)info);
+
+	wrmsrl(CBM_FROM_INDEX(closid), ccmap[closid].cbm);
+}
+
+static inline void cbm_update(unsigned int closid)
+{
+	int pkg_id = -1;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (pkg_id == topology_physical_package_id(cpu))
+			continue;
+		smp_call_function_single(cpu, cpu_cbm_update, &closid, 1);
+		pkg_id = topology_physical_package_id(cpu);
+
+	}
+}
+
+/*
+ * rdt_cbm_write() - Validates and writes the cache bit mask(cbm)
+ * to the IA32_L3_MASK_n and also store the same in the ccmap.
+ *
+ * CLOSids are reused for cgroups which have same bitmask.
+ * - This helps to use the scant CLOSids optimally.
+ * - This also implies that at context switch write
+ * to PQR-MSR is done only when a task with a
+ * different bitmask is scheduled in.
+ */
+
+static int cat_cbm_write(struct cgroup_subsys_state *css,
+				 struct cftype *cft, u64 cbmvalue)
+{
+	struct intel_rdt *ir = css_rdt(css);
+	ssize_t err = 0;
+	unsigned long cbm;
+	unsigned long *cbm_tmp;
+	unsigned int closid;
+	u32 cbm_mask =
+		(u32)((u64)(1 << boot_cpu_data.x86_cat_cbmlength) - 1);
+
+	if (ir == &rdt_root_group)
+		return -EPERM;
+
+	/*
+	* Need global mutex as cbm write may allocate a closid.
+	*/
+	mutex_lock(&rdt_group_mutex);
+	cbm = cbmvalue & cbm_mask;
+	cbm_tmp = &ccmap[ir->clos].cbm;
+
+	if (bitmap_equal(&cbm, cbm_tmp, MAX_CBM_LENGTH))
+		goto out;
+
+	err = validate_cbm(ir, cbm);
+	if (err)
+		goto out;
+
+	rdt_free_closid(ir);
+
+	if (cbm_search(cbm, &closid)) {
+		ir->clos = closid;
+		ccmap[ir->clos].cgrp_count++;
+	} else {
+		err = rdt_alloc_closid(ir);
+		if (err)
+			goto out;
+
+		ccmap[ir->clos].cbm = cbm;
+		cbm_update(ir->clos);
+	}
+
+	cbmmap_dump();
+
+out:
+
+	mutex_unlock(&rdt_group_mutex);
+	return err;
+}
+
+static struct cftype rdt_files[] = {
+	{
+		.name = "cbm",
+		.seq_show = cat_cbm_read,
+		.write_u64 = cat_cbm_write,
+		.mode = 0666,
+	},
+	{ }	/* terminate */
+};
+
 struct cgroup_subsys rdt_cgrp_subsys = {
 	.css_alloc			= rdt_css_alloc,
 	.css_free			= rdt_css_free,
+	.legacy_cftypes			= rdt_files,
 	.early_init			= 0,
 };
-- 
1.9.1


  parent reply	other threads:[~2015-03-12 23:18 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-12 23:16 [PATCH V5 0/7] x86/intel_rdt: Intel Cache Allocation Technology Vikas Shivappa
2015-03-12 23:16 ` [PATCH 1/7] x86/intel_rdt: Intel Cache Allocation Technology detection Vikas Shivappa
2015-03-12 23:16 ` [PATCH 2/7] x86/intel_rdt: Adds support for Class of service management Vikas Shivappa
2015-03-12 23:16 ` Vikas Shivappa [this message]
2015-04-09 20:56   ` [PATCH 3/7] x86/intel_rdt: Support cache bit mask for Intel CAT Marcelo Tosatti
2015-04-13  2:36     ` Vikas Shivappa
2015-03-12 23:16 ` [PATCH 4/7] x86/intel_rdt: Implement scheduling support for Intel RDT Vikas Shivappa
2015-03-12 23:16 ` [PATCH 5/7] x86/intel_rdt: Software Cache for IA32_PQR_MSR Vikas Shivappa
2015-03-12 23:16 ` [PATCH 6/7] x86/intel_rdt: Intel haswell CAT enumeration Vikas Shivappa
2015-03-12 23:16 ` [PATCH 7/7] x86/intel_rdt: Add CAT documentation and usage guide Vikas Shivappa
2015-03-25 22:39   ` Marcelo Tosatti
2015-03-26 18:38     ` Vikas Shivappa
2015-03-27  1:29       ` Marcelo Tosatti
2015-03-31  1:17         ` Marcelo Tosatti
2015-03-31 17:27         ` Vikas Shivappa
2015-03-31 22:56           ` Marcelo Tosatti
2015-04-01 18:20             ` Vikas Shivappa
2015-07-28 23:37           ` Marcelo Tosatti
2015-07-29 21:20             ` Vikas Shivappa
2015-03-31 17:32         ` Vikas Shivappa
  -- strict thread matches above, loose matches on Subject: below --
2015-05-02  1:36 [PATCH V6 0/7] x86/intel_rdt: Intel Cache Allocation Technology Vikas Shivappa
2015-05-02  1:36 ` [PATCH 3/7] x86/intel_rdt: Support cache bit mask for Intel CAT Vikas Shivappa
2015-05-02 18:46   ` Peter Zijlstra
2015-05-04 17:30     ` Vikas Shivappa
2015-05-06  8:09       ` Peter Zijlstra
2015-05-06  8:30         ` Matt Fleming
2015-05-06 16:48         ` Vikas Shivappa
2015-05-06  8:11       ` Peter Zijlstra
2015-05-06 18:09         ` Vikas Shivappa
2015-02-24 23:16 [PATCH V4 0/7] x86/intel_rdt: Intel Cache Allocation Technology Vikas Shivappa
2015-02-24 23:16 ` [PATCH 3/7] x86/intel_rdt: Support cache bit mask for Intel CAT Vikas Shivappa
2015-02-27 12:12   ` Tejun Heo
2015-02-27 12:18     ` Tejun Heo
2015-02-27 19:34     ` Vikas Shivappa
2015-02-27 19:42       ` Tejun Heo
2015-02-27 21:38         ` Vikas Shivappa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1426202167-30598-4-git-send-email-vikas.shivappa@linux.intel.com \
    --to=vikas.shivappa@linux.intel.com \
    --cc=glenn.p.williamson@intel.com \
    --cc=hpa@zytor.com \
    --cc=kanaka.d.juvva@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=matt.fleming@intel.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=vikas.shivappa@intel.com \
    --cc=will.auld@intel.com \
    --cc=x86@kernel.org \
    --subject='Re: [PATCH 3/7] x86/intel_rdt: Support cache bit mask for Intel CAT' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).