drm/etnaviv: implement per-process address spaces on MMUv2lst/etnaviv-ppas

This builds on top of the MMU contexts introduced earlier. Instead of having one context per GPU core, each GPU client receives its own context. On MMUv1 this still means a single shared pagetable set is used by all clients, but on MMUv2 there is now a distinct set of pagetables for each client. As the command fetch is also translated via the MMU on MMUv2 the kernel command ringbuffer is mapped into each of the client pagetables. As the MMU context switch is a bit of a heavy operation, due to the needed cache and TLB flushing, this patch implements a lazy way of switching the MMU context. The kernel does not have its own MMU context, but reuses the last client context for all of its operations. This has some visible impact, as the GPU can now only be started once a client has submitted some work and we got the client MMU context assigned. Also the MMU context has a different lifetime than the general client context, as the GPU might still execute the kernel command buffer in the context of a client even after the client has completed all GPU work and has been terminated. Only when the GPU is runtime suspended or switches to another clients MMU context is the old context freed up. Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
author: Lucas Stach <l.stach@pengutronix.de> 2018-11-23 19:00:11 +0100
committer: Lucas Stach <l.stach@pengutronix.de> 2019-07-09 10:21:39 +0200
commit: dc031a583a380d202315afebe68acde069f8f841 (patch)
tree: 1aa001bdd9a6b25e34ae6af858610b326746b366 /drivers/gpu/drm/etnaviv/etnaviv_gpu.c
parent: f34a43080208db31e152e579585737c3dccf704d (diff)
download: linux-0-day-dc031a583a380d202315afebe68acde069f8f841.tar.gz
linux-0-day-dc031a583a380d202315afebe68acde069f8f841.tar.xz
1 files changed, 39 insertions, 61 deletions
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index b46d8207f6e63..a3d0e7adc8bfb 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -602,7 +602,7 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch)
 
 static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu)
 {
-	u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer, &gpu->cmdbuf_mapping);
+	u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer, &gpu->mmu->cmdbuf_mapping);
 	u16 prefetch;
 
 	/* setup the MMU */
@@ -693,8 +693,6 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 	etnaviv_gpu_setup_pulse_eater(gpu);
 
 	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
-
-	etnaviv_gpu_start_fe_idleloop(gpu);
 }
 
 int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
@@ -725,28 +723,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	/*
-	 * Set the GPU linear window to be at the end of the DMA window, where
-	 * the CMA area is likely to reside. This ensures that we are able to
-	 * map the command buffers while having the linear window overlap as
-	 * much RAM as possible, so we can optimize mappings for other buffers.
-	 *
-	 * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
-	 * to different views of the memory on the individual engines.
-	 */
-	if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
-	    (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
-		u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
-		if (dma_mask < PHYS_OFFSET + SZ_2G)
-			gpu->memory_base = PHYS_OFFSET;
-		else
-			gpu->memory_base = dma_mask - SZ_2G + 1;
-	} else if (PHYS_OFFSET >= SZ_2G) {
-		dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
-		gpu->memory_base = PHYS_OFFSET;
-		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
-	}
-
-	/*
 	 * On cores with security features supported, we claim control over the
 	 * security states.
 	 */
@@ -764,19 +740,26 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	if (ret)
 		goto fail;
 
-	gpu->mmu = etnaviv_iommu_context_init(priv->mmu_global);
-	if (IS_ERR(gpu->mmu)) {
-		dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
-		ret = PTR_ERR(gpu->mmu);
-		goto iommu_global_fini;
-	}
-
-	ret = etnaviv_cmdbuf_suballoc_map(priv->cmdbuf_suballoc, gpu->mmu,
-					  &gpu->cmdbuf_mapping,
-					  gpu->memory_base);
-	if (ret) {
-		dev_err(gpu->dev, "failed to map cmdbuf suballoc\n");
-		goto destroy_iommu;
+	/*
+	 * Set the GPU linear window to be at the end of the DMA window, where
+	 * the CMA area is likely to reside. This ensures that we are able to
+	 * map the command buffers while having the linear window overlap as
+	 * much RAM as possible, so we can optimize mappings for other buffers.
+	 *
+	 * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
+	 * to different views of the memory on the individual engines.
+	 */
+	if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
+	    (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
+		u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
+		if (dma_mask < PHYS_OFFSET + SZ_2G)
+			priv->mmu_global->memory_base = PHYS_OFFSET;
+		else
+			priv->mmu_global->memory_base = dma_mask - SZ_2G + 1;
+	} else if (PHYS_OFFSET >= SZ_2G) {
+		dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
+		priv->mmu_global->memory_base = PHYS_OFFSET;
+		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 	}
 
 	/* Create buffer: */
@@ -784,15 +767,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 				  PAGE_SIZE);
 	if (ret) {
 		dev_err(gpu->dev, "could not create command buffer\n");
-		goto unmap_suballoc;
-	}
-
-	if (!(gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION) &&
-	    etnaviv_cmdbuf_get_va(&gpu->buffer, &gpu->cmdbuf_mapping) > 0x80000000) {
-		ret = -EINVAL;
-		dev_err(gpu->dev,
-			"command buffer outside valid memory window\n");
-		goto free_buffer;
+		goto fail;
 	}
 
 	/* Setup event management */
@@ -815,14 +790,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
 	return 0;
 
-free_buffer:
-	etnaviv_cmdbuf_free(&gpu->buffer);
-unmap_suballoc:
-	etnaviv_cmdbuf_suballoc_unmap(gpu->mmu, &gpu->cmdbuf_mapping);
-destroy_iommu:
-	etnaviv_iommu_context_put(gpu->mmu);
-iommu_global_fini:
-	etnaviv_iommu_global_fini(gpu);
 fail:
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);
@@ -1016,6 +983,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 
 	etnaviv_gpu_hw_init(gpu);
 	gpu->exec_state = -1;
+	gpu->mmu = NULL;
 
 	mutex_unlock(&gpu->lock);
 	pm_runtime_mark_last_busy(gpu->dev);
@@ -1322,6 +1290,15 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 		goto out_unlock;
 	}
 
+	if (!gpu->mmu) {
+		etnaviv_iommu_context_get(submit->mmu);
+		gpu->mmu = submit->mmu;
+		etnaviv_gpu_start_fe_idleloop(gpu);
+	} else {
+		etnaviv_iommu_context_get(gpu->mmu);
+		submit->prev_mmu = gpu->mmu;
+	}
+
 	if (submit->nr_pmrs) {
 		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
 		kref_get(&submit->refcount);
@@ -1331,7 +1308,7 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 
 	gpu->event[event[0]].fence = gpu_fence;
 	submit->cmdbuf.user_size = submit->cmdbuf.size - 8;
-	etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
+	etnaviv_buffer_queue(gpu, submit->exec_state, submit->mmu, event[0],
 			     &submit->cmdbuf);
 
 	if (submit->nr_pmrs) {
@@ -1534,7 +1511,7 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms)
 
 static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 {
-	if (gpu->initialized) {
+	if (gpu->initialized && gpu->mmu) {
 		/* Replace the last WAIT with END */
 		mutex_lock(&gpu->lock);
 		etnaviv_buffer_end(gpu);
@@ -1546,8 +1523,13 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 		 * we fail, just warn and continue.
 		 */
 		etnaviv_gpu_wait_idle(gpu, 100);
+
+		etnaviv_iommu_context_put(gpu->mmu);
+		gpu->mmu = NULL;
 	}
 
+	gpu->exec_state = -1;
+
 	return etnaviv_gpu_clk_disable(gpu);
 }
 
@@ -1563,8 +1545,6 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 	etnaviv_gpu_update_clock(gpu);
 	etnaviv_gpu_hw_init(gpu);
 
-	gpu->exec_state = -1;
-
 	mutex_unlock(&gpu->lock);
 
 	return 0;
@@ -1695,8 +1675,6 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 
 	if (gpu->initialized) {
 		etnaviv_cmdbuf_free(&gpu->buffer);
-		etnaviv_cmdbuf_suballoc_unmap(gpu->mmu, &gpu->cmdbuf_mapping);
-		etnaviv_iommu_context_put(gpu->mmu);
 		etnaviv_iommu_global_fini(gpu);
 		gpu->initialized = false;
 	}
author	Lucas Stach <l.stach@pengutronix.de>	2018-11-23 19:00:11 +0100
committer	Lucas Stach <l.stach@pengutronix.de>	2019-07-09 10:21:39 +0200
commit	dc031a583a380d202315afebe68acde069f8f841 (patch)
tree	1aa001bdd9a6b25e34ae6af858610b326746b366 /drivers/gpu/drm/etnaviv/etnaviv_gpu.c
parent	f34a43080208db31e152e579585737c3dccf704d (diff)
download	linux-0-day-dc031a583a380d202315afebe68acde069f8f841.tar.gz linux-0-day-dc031a583a380d202315afebe68acde069f8f841.tar.xz