*size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset);
- *addr = adev->gmc.gart_start;
- *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
- AMDGPU_GPU_PAGE_SIZE;
+ *addr = amdgpu_compute_gart_address(&adev->gmc, entity, window);
*addr += offset;
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
src_addr += job->ibs[0].gpu_addr;
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
- dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
+ dst_addr += (entity->gart_window_offs[window] >> AMDGPU_GPU_PAGE_SHIFT) * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes, 0);
amdgpu_res_first(src->mem, src->offset, size, &src_mm);
amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&entity->lock);
while (src_mm.remaining) {
uint64_t from, to, cur_size, tiling_flags;
uint32_t num_type, data_format, max_com, write_compress_disable;
amdgpu_res_next(&dst_mm, cur_size);
}
error:
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&entity->lock);
*f = fence;
return r;
}
if (r)
goto out;
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&adev->mman.default_entity.lock);
amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
src_mm.start;
PAGE_SIZE, 0);
fence = amdgpu_ttm_job_submit(adev, job, num_dw);
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&adev->mman.default_entity.lock);
if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
r = -ETIMEDOUT;
adev->rmmio_remap.bo = NULL;
}
+static int amdgpu_ttm_buffer_entity_init(struct amdgpu_ttm_buffer_entity *entity,
+ int starting_gart_window,
+ u32 num_gart_windows)
+{
+ int i;
+
+ mutex_init(&entity->lock);
+
+ if (ARRAY_SIZE(entity->gart_window_offs) < num_gart_windows)
+ return starting_gart_window;
+
+ for (i = 0; i < num_gart_windows; i++) {
+ entity->gart_window_offs[i] =
+ (u64)starting_gart_window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
+ AMDGPU_GPU_PAGE_SIZE;
+ starting_gart_window++;
+ }
+
+ return starting_gart_window;
+}
+
/*
* amdgpu_ttm_init - Init the memory management (ttm) as well as various
* gtt/vram related fields.
uint64_t gtt_size;
int r;
- mutex_init(&adev->mman.gtt_window_lock);
-
dma_set_max_seg_size(adev->dev, UINT_MAX);
/* No others user of address space so set it to 0 */
r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
{
struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ u32 used_windows;
uint64_t size;
int r;
drm_sched_entity_destroy(&adev->mman.clear_entity.base);
goto error_free_entity;
}
+
+ /* Statically assign GART windows to each entity. */
+ used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.default_entity, 0, 0);
+ used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.move_entity,
+ used_windows, 2);
+ used_windows = amdgpu_ttm_buffer_entity_init(&adev->mman.clear_entity,
+ used_windows, 1);
} else {
drm_sched_entity_destroy(&adev->mman.default_entity.base);
drm_sched_entity_destroy(&adev->mman.clear_entity.base);
struct dma_fence **fence)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ttm_buffer_entity *entity;
struct amdgpu_res_cursor cursor;
u64 addr;
int r = 0;
if (!fence)
return -EINVAL;
+ entity = &adev->mman.clear_entity;
*fence = dma_fence_get_stub();
amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&entity->lock);
while (cursor.remaining) {
struct dma_fence *next = NULL;
u64 size;
/* Never clear more than 256MiB at once to avoid timeouts */
size = min(cursor.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&adev->mman.clear_entity,
- &bo->tbo, bo->tbo.resource, &cursor,
- 1, false, &size, &addr);
+ r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &cursor,
+ 0, false, &size, &addr);
if (r)
goto err;
- r = amdgpu_ttm_fill_mem(adev, &adev->mman.clear_entity, 0, addr, size, resv,
+ r = amdgpu_ttm_fill_mem(adev, entity, 0, addr, size, resv,
&next, true,
AMDGPU_KERNEL_JOB_ID_TTM_CLEAR_BUFFER);
if (r)
amdgpu_res_next(&cursor, size);
}
err:
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&entity->lock);
return r;
}
amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst);
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&entity->lock);
while (dst.remaining) {
struct dma_fence *next;
uint64_t cur_size, to;
cur_size = min(dst.size, 256ULL << 20);
r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst,
- 1, false, &cur_size, &to);
+ 0, false, &cur_size, &to);
if (r)
goto error;
amdgpu_res_next(&dst, cur_size);
}
error:
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&entity->lock);
if (f)
*f = dma_fence_get(fence);
dma_fence_put(fence);
#include <drm/ttm/ttm_placement.h>
#include "amdgpu_vram_mgr.h"
#include "amdgpu_hmm.h"
+#include "amdgpu_gmc.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_PL_GWS (TTM_PL_PRIV + 1)
#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 6)
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
-#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
+#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 3
extern const struct attribute_group amdgpu_vram_mgr_attr_group;
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
struct amdgpu_ttm_buffer_entity {
struct drm_sched_entity base;
+ struct mutex lock;
+ u64 gart_window_offs[2];
};
struct amdgpu_mman {
struct amdgpu_ring *buffer_funcs_ring;
bool buffer_funcs_enabled;
- struct mutex gtt_window_lock;
-
+ /* @default_entity: for workarounds, has no gart windows */
struct amdgpu_ttm_buffer_entity default_entity;
struct amdgpu_ttm_buffer_entity clear_entity;
struct amdgpu_ttm_buffer_entity move_entity;
}
#endif
+/**
+ * amdgpu_compute_gart_address() - Returns GART address of an entity's window
+ * @gmc: The &struct amdgpu_gmc instance to use
+ * @entity: The &struct amdgpu_ttm_buffer_entity owning the GART window
+ * @index: The window to use (must be 0 or 1)
+ */
+static inline u64 amdgpu_compute_gart_address(struct amdgpu_gmc *gmc,
+ struct amdgpu_ttm_buffer_entity *entity,
+ int index)
+{
+ return gmc->gart_start + entity->gart_window_offs[index];
+}
+
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct amdgpu_hmm_range *range);
int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
uint64_t *user_addr);
void *cpu_addr;
int r;
- /* use gart window 0 */
- *gart_addr = adev->gmc.gart_start;
+ *gart_addr = amdgpu_compute_gart_address(&adev->gmc, entity, 0);
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = npages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
src_addr += job->ibs[0].gpu_addr;
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
+ dst_addr += (entity->gart_window_offs[0] >> AMDGPU_GPU_PAGE_SHIFT) * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
dst_addr, num_bytes, 0);
* multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for
* the last sdma finish fence which is returned to check copy memory is done.
*
- * Context: Process context, takes and releases gtt_window_lock
+ * Context: Process context
*
* Return:
* 0 - OK, otherwise error code
u64 size;
int r;
- entity = &adev->mman.default_entity;
+ entity = &adev->mman.move_entity;
- mutex_lock(&adev->mman.gtt_window_lock);
+ mutex_lock(&entity->lock);
while (npages) {
size = min(GTT_MAX_PAGES, npages);
}
out_unlock:
- mutex_unlock(&adev->mman.gtt_window_lock);
+ mutex_unlock(&entity->lock);
return r;
}