LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
* [PATCH v1] gpu: host1x: Utilize IOMMU mapping for firewall-copied buffers
@ 2018-05-18 23:52 Dmitry Osipenko
  2018-05-19  0:54 ` Dmitry Osipenko
  0 siblings, 1 reply; 2+ messages in thread
From: Dmitry Osipenko @ 2018-05-18 23:52 UTC (permalink / raw)
  To: Thierry Reding, Mikko Perttunen; +Cc: linux-tegra, dri-devel, linux-kernel

Map firewall-copied buffers into Host1x's IOVA space, otherwise Host1x
CDMA can't access the command buffers and all submitted jobs fail if IOMMU
and Host1x firewall are enabled in the kernels config.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
---
 drivers/gpu/host1x/job.c | 58 +++++++++++++++++++++++++++++++++++-----
 include/linux/host1x.h   |  4 ++-
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index e2f4a4d93d20..57384a5b5059 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -449,10 +449,13 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 
 static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 {
+	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
 	struct host1x_firewall fw;
+	dma_addr_t dma_addr;
 	size_t size = 0;
 	size_t offset = 0;
 	unsigned int i;
+	int err;
 
 	fw.job = job;
 	fw.dev = dev;
@@ -466,23 +469,55 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 		size += g->words * sizeof(u32);
 	}
 
+	if (host->domain)
+		size = iova_align(&host->iova, size);
+
 	/*
 	 * Try a non-blocking allocation from a higher priority pools first,
 	 * as awaiting for the allocation here is a major performance hit.
 	 */
-	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
-					       GFP_NOWAIT);
+	job->gather_copy_mapped = dma_alloc_wc(dev, size,
+					       &job->gather_copy_phys,
+ 					       GFP_NOWAIT);
 
 	/* the higher priority allocation failed, try the generic-blocking */
 	if (!job->gather_copy_mapped)
 		job->gather_copy_mapped = dma_alloc_wc(dev, size,
-						       &job->gather_copy,
+						       &job->gather_copy_phys,
 						       GFP_KERNEL);
 	if (!job->gather_copy_mapped)
 		return -ENOMEM;
 
 	job->gather_copy_size = size;
 
+	if (host->domain) {
+		unsigned long shift;
+
+		shift = iova_shift(&host->iova);
+		job->gather_copy_iova_alloc = alloc_iova(
+					&host->iova, size >> shift,
+					host->iova_end >> shift, true);
+		if (!job->gather_copy_iova_alloc)
+			return -ENOMEM;
+
+		job->gather_copy_iova = iova_dma_addr(
+				&host->iova, job->gather_copy_iova_alloc);
+
+		err = iommu_map(host->domain,
+				job->gather_copy_iova,
+				job->gather_copy_phys,
+				size, IOMMU_READ);
+		if (err) {
+			__free_iova(&host->iova, job->gather_copy_iova_alloc);
+			job->gather_copy_iova_alloc = NULL;
+			return err;
+		}
+
+		dma_addr = job->gather_copy_iova;
+	} else {
+		dma_addr = job->gather_copy_phys;
+	}
+
 	for (i = 0; i < job->num_gathers; i++) {
 		struct host1x_job_gather *g = &job->gathers[i];
 		void *gather;
@@ -494,7 +529,7 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 		host1x_bo_munmap(g->bo, gather);
 
 		/* Store the location in the buffer */
-		g->base = job->gather_copy;
+		g->base = dma_addr;
 		g->offset = offset;
 
 		/* Validate the job */
@@ -582,9 +617,20 @@ void host1x_job_unpin(struct host1x_job *job)
 
 	job->num_unpins = 0;
 
-	if (job->gather_copy_size)
+	if (job->gather_copy_size) {
 		dma_free_wc(job->channel->dev, job->gather_copy_size,
-			    job->gather_copy_mapped, job->gather_copy);
+			    job->gather_copy_mapped, job->gather_copy_phys);
+
+		if (job->gather_copy_iova_alloc) {
+			iommu_unmap(host->domain,
+				    job->gather_copy_iova,
+				    job->gather_copy_size);
+
+			__free_iova(&host->iova, job->gather_copy_iova_alloc);
+
+			job->gather_copy_iova_alloc = NULL;
+		}
+	}
 }
 EXPORT_SYMBOL(host1x_job_unpin);
 
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 57d26406bdfd..536a678f81d4 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -235,8 +235,10 @@ struct host1x_job {
 	unsigned int num_slots;
 
 	/* Copy of gathers */
+	struct iova *gather_copy_iova_alloc;
 	size_t gather_copy_size;
-	dma_addr_t gather_copy;
+	dma_addr_t gather_copy_iova;
+	dma_addr_t gather_copy_phys;
 	u8 *gather_copy_mapped;
 
 	/* Check if register is marked as an address reg */
-- 
2.17.0

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH v1] gpu: host1x: Utilize IOMMU mapping for firewall-copied buffers
  2018-05-18 23:52 [PATCH v1] gpu: host1x: Utilize IOMMU mapping for firewall-copied buffers Dmitry Osipenko
@ 2018-05-19  0:54 ` Dmitry Osipenko
  0 siblings, 0 replies; 2+ messages in thread
From: Dmitry Osipenko @ 2018-05-19  0:54 UTC (permalink / raw)
  To: Thierry Reding, Mikko Perttunen; +Cc: linux-tegra, dri-devel, linux-kernel

On 19.05.2018 02:52, Dmitry Osipenko wrote:
> Map firewall-copied buffers into Host1x's IOVA space, otherwise Host1x
> CDMA can't access the command buffers and all submitted jobs fail if IOMMU
> and Host1x firewall are enabled in the kernels config.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/gpu/host1x/job.c | 58 +++++++++++++++++++++++++++++++++++-----
>  include/linux/host1x.h   |  4 ++-
>  2 files changed, 55 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
> index e2f4a4d93d20..57384a5b5059 100644
> --- a/drivers/gpu/host1x/job.c
> +++ b/drivers/gpu/host1x/job.c
> @@ -449,10 +449,13 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
>  
>  static inline int copy_gathers(struct host1x_job *job, struct device *dev)
>  {
> +	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
>  	struct host1x_firewall fw;
> +	dma_addr_t dma_addr;
>  	size_t size = 0;
>  	size_t offset = 0;
>  	unsigned int i;
> +	int err;
>  
>  	fw.job = job;
>  	fw.dev = dev;
> @@ -466,23 +469,55 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
>  		size += g->words * sizeof(u32);
>  	}
>  
> +	if (host->domain)
> +		size = iova_align(&host->iova, size);
> +
>  	/*
>  	 * Try a non-blocking allocation from a higher priority pools first,
>  	 * as awaiting for the allocation here is a major performance hit.
>  	 */
> -	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
> -					       GFP_NOWAIT);
> +	job->gather_copy_mapped = dma_alloc_wc(dev, size,
> +					       &job->gather_copy_phys,
> + 					       GFP_NOWAIT);
>  
>  	/* the higher priority allocation failed, try the generic-blocking */
>  	if (!job->gather_copy_mapped)
>  		job->gather_copy_mapped = dma_alloc_wc(dev, size,
> -						       &job->gather_copy,
> +						       &job->gather_copy_phys,
>  						       GFP_KERNEL);
>  	if (!job->gather_copy_mapped)
>  		return -ENOMEM;
>  
>  	job->gather_copy_size = size;
>  
> +	if (host->domain) {
> +		unsigned long shift;
> +
> +		shift = iova_shift(&host->iova);
> +		job->gather_copy_iova_alloc = alloc_iova(
> +					&host->iova, size >> shift,
> +					host->iova_end >> shift, true);
> +		if (!job->gather_copy_iova_alloc)
> +			return -ENOMEM;
> +
> +		job->gather_copy_iova = iova_dma_addr(
> +				&host->iova, job->gather_copy_iova_alloc);
> +
> +		err = iommu_map(host->domain,
> +				job->gather_copy_iova,
> +				job->gather_copy_phys,
> +				size, IOMMU_READ);
> +		if (err) {
> +			__free_iova(&host->iova, job->gather_copy_iova_alloc);
> +			job->gather_copy_iova_alloc = NULL;
> +			return err;
> +		}
> +
> +		dma_addr = job->gather_copy_iova;
> +	} else {
> +		dma_addr = job->gather_copy_phys;
> +	}
> +
>  	for (i = 0; i < job->num_gathers; i++) {
>  		struct host1x_job_gather *g = &job->gathers[i];
>  		void *gather;
> @@ -494,7 +529,7 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
>  		host1x_bo_munmap(g->bo, gather);
>  
>  		/* Store the location in the buffer */
> -		g->base = job->gather_copy;
> +		g->base = dma_addr;
>  		g->offset = offset;
>  
>  		/* Validate the job */
> @@ -582,9 +617,20 @@ void host1x_job_unpin(struct host1x_job *job)
>  
>  	job->num_unpins = 0;
>  
> -	if (job->gather_copy_size)
> +	if (job->gather_copy_size) {
>  		dma_free_wc(job->channel->dev, job->gather_copy_size,
> -			    job->gather_copy_mapped, job->gather_copy);
> +			    job->gather_copy_mapped, job->gather_copy_phys);
> +
> +		if (job->gather_copy_iova_alloc) {
> +			iommu_unmap(host->domain,
> +				    job->gather_copy_iova,
> +				    job->gather_copy_size);
> +
> +			__free_iova(&host->iova, job->gather_copy_iova_alloc);
> +
> +			job->gather_copy_iova_alloc = NULL;
> +		}
> +	}
>  }
>  EXPORT_SYMBOL(host1x_job_unpin);
>  
> diff --git a/include/linux/host1x.h b/include/linux/host1x.h
> index 57d26406bdfd..536a678f81d4 100644
> --- a/include/linux/host1x.h
> +++ b/include/linux/host1x.h
> @@ -235,8 +235,10 @@ struct host1x_job {
>  	unsigned int num_slots;
>  
>  	/* Copy of gathers */
> +	struct iova *gather_copy_iova_alloc;
>  	size_t gather_copy_size;
> -	dma_addr_t gather_copy;
> +	dma_addr_t gather_copy_iova;
> +	dma_addr_t gather_copy_phys;
>  	u8 *gather_copy_mapped;
>  
>  	/* Check if register is marked as an address reg */
> 

Though much better would be to just skip the IOMMU initialization if firewall is
enabled. Please scratch this patch, I'll make another.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-05-19  0:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-18 23:52 [PATCH v1] gpu: host1x: Utilize IOMMU mapping for firewall-copied buffers Dmitry Osipenko
2018-05-19  0:54 ` Dmitry Osipenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).