entity->guilty = guilty;
entity->priority = priority;
entity->last_user = current->group_leader;
+ entity->rq_priority = drm_sched_policy == DRM_SCHED_POLICY_FAIR ?
+ DRM_SCHED_PRIORITY_KERNEL : priority;
entity->num_sched_list = num_sched_list;
entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
RCU_INIT_POINTER(entity->last_scheduled, NULL);
*/
pr_warn("%s: called with uninitialized scheduler\n", __func__);
} else {
- /* The "priority" of an entity cannot exceed the number of run-queues of a
- * scheduler. Protect against num_rqs being 0, by converting to signed. Choose
- * the lowest priority available.
+ enum drm_sched_priority p = entity->priority;
+
+ /*
+ * The "priority" of an entity cannot exceed the number of
+ * run-queues of a scheduler. Protect against num_rqs being 0,
+ * by converting to signed. Choose the lowest priority
+ * available.
*/
- if (entity->priority >= sched_list[0]->num_rqs) {
- dev_err(sched_list[0]->dev, "entity has out-of-bounds priority: %u. num_rqs: %u\n",
- entity->priority, sched_list[0]->num_rqs);
- entity->priority = max_t(s32, (s32) sched_list[0]->num_rqs - 1,
- (s32) DRM_SCHED_PRIORITY_KERNEL);
+ if (p >= sched_list[0]->num_user_rqs) {
+ dev_err(sched_list[0]->dev, "entity with out-of-bounds priority:%u num_user_rqs:%u\n",
+ p, sched_list[0]->num_user_rqs);
+ p = max_t(s32,
+ (s32)sched_list[0]->num_user_rqs - 1,
+ (s32)DRM_SCHED_PRIORITY_KERNEL);
+ entity->priority = p;
}
- entity->rq = sched_list[0]->sched_rq[entity->priority];
+ entity->rq = sched_list[0]->sched_rq[entity->rq_priority];
}
init_completion(&entity->entity_idle);
spin_lock(&entity->lock);
sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list);
- rq = sched ? sched->sched_rq[entity->priority] : NULL;
+ rq = sched ? sched->sched_rq[entity->rq_priority] : NULL;
if (rq != entity->rq) {
drm_sched_rq_remove_entity(entity->rq, entity);
entity->rq = rq;
* DOC: sched_policy (int)
* Used to override default entities scheduling policy in a run queue.
*/
-MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
+MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default), " __stringify(DRM_SCHED_POLICY_FAIR) " = Fair.");
module_param_named(sched_policy, drm_sched_policy, int, 0444);
static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
sched->own_submit_wq = true;
}
- sched->sched_rq = kmalloc_objs(*sched->sched_rq, args->num_rqs,
- GFP_KERNEL | __GFP_ZERO);
+ sched->num_user_rqs = args->num_rqs;
+ sched->num_rqs = drm_sched_policy != DRM_SCHED_POLICY_FAIR ?
+ args->num_rqs : 1;
+ sched->sched_rq = kzalloc_objs(*sched->sched_rq, args->num_rqs);
if (!sched->sched_rq)
goto Out_check_own;
- sched->num_rqs = args->num_rqs;
+
for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
sched->sched_rq[i] = kzalloc_obj(*sched->sched_rq[i]);
if (!sched->sched_rq[i])
if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
atomic_inc(&bad->karma);
- for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) {
+ for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
struct drm_sched_rq *rq = sched->sched_rq[i];
spin_lock(&rq->lock);
return ktime_before(ea->oldest_job_waiting, eb->oldest_job_waiting);
}
+static void drm_sched_rq_update_prio(struct drm_sched_rq *rq)
+{
+ enum drm_sched_priority prio = DRM_SCHED_PRIORITY_INVALID;
+ struct rb_node *rb;
+
+ lockdep_assert_held(&rq->lock);
+
+ rb = rb_first_cached(&rq->rb_tree_root);
+ if (rb) {
+ struct drm_sched_entity *entity =
+ rb_entry(rb, typeof(*entity), rb_tree_node);
+
+ /*
+ * The normal locking order is entity then run-queue so taking
+ * the entity lock here would be a locking inversion for the
+ * case when the current head of the run-queue is different from
+ * the one we already have locked. The unlocked read is fine
+ * though, because if the priority had just changed it is no big
+ * deal for our algorithm, but just a transient reachable only
+ * by drivers with userspace dynamic priority changes API. Equal
+ * in effect to the priority change becoming visible a few
+ * instructions later.
+ */
+ prio = READ_ONCE(entity->priority);
+ }
+
+ rq->head_prio = prio;
+}
+
static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
struct drm_sched_rq *rq)
{
if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
RB_CLEAR_NODE(&entity->rb_tree_node);
+ drm_sched_rq_update_prio(rq);
}
}
rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
drm_sched_entity_compare_before);
+ drm_sched_rq_update_prio(rq);
}
/**
INIT_LIST_HEAD(&rq->entities);
rq->rb_tree_root = RB_ROOT_CACHED;
rq->sched = sched;
+ rq->head_prio = DRM_SCHED_PRIORITY_INVALID;
+}
+
+/*
+ * Core part of the CFS-like algorithm is that the virtual runtime of lower
+ * priority tasks should grow quicker than the higher priority ones, so that
+ * when we then schedule entities with the aim of keeping their accumulated
+ * virtual time balanced, we can approach fair distribution of GPU time.
+ *
+ * For converting the real GPU time into virtual we pick some multipliers with
+ * the idea to achieve the following GPU time distribution:
+ *
+ * - Kernel priority gets roughly 2x GPU time compared to high.
+ * - High gets ~4x relative to normal.
+ * - Normal gets ~8x relative to low.
+ */
+static const unsigned int vruntime_shift[] = {
+ [DRM_SCHED_PRIORITY_KERNEL] = 1,
+ [DRM_SCHED_PRIORITY_HIGH] = 2,
+ [DRM_SCHED_PRIORITY_NORMAL] = 4,
+ [DRM_SCHED_PRIORITY_LOW] = 7,
+};
+
+static ktime_t
+drm_sched_rq_get_min_vruntime(struct drm_sched_rq *rq)
+{
+ ktime_t vruntime = 0;
+ struct rb_node *rb;
+
+ lockdep_assert_held(&rq->lock);
+
+ rb = rb_first_cached(&rq->rb_tree_root);
+ if (rb) {
+ struct drm_sched_entity *entity =
+ rb_entry(rb, typeof(*entity), rb_tree_node);
+ struct drm_sched_entity_stats *stats = entity->stats;
+
+ spin_lock(&stats->lock);
+ vruntime = stats->vruntime;
+ spin_unlock(&stats->lock);
+ }
+
+ return vruntime;
+}
+
+static void
+drm_sched_entity_save_vruntime(struct drm_sched_entity *entity,
+ ktime_t min_vruntime)
+{
+ struct drm_sched_entity_stats *stats = entity->stats;
+ ktime_t vruntime;
+
+ spin_lock(&stats->lock);
+ vruntime = stats->vruntime;
+ if (min_vruntime && vruntime > min_vruntime)
+ vruntime = ktime_sub(vruntime, min_vruntime);
+ else
+ vruntime = 0;
+ stats->vruntime = vruntime;
+ spin_unlock(&stats->lock);
+}
+
+static ktime_t
+drm_sched_entity_restore_vruntime(struct drm_sched_entity *entity,
+ ktime_t min_vruntime,
+ enum drm_sched_priority rq_prio)
+{
+ struct drm_sched_entity_stats *stats = entity->stats;
+ enum drm_sched_priority prio = entity->priority;
+ ktime_t vruntime;
+
+ BUILD_BUG_ON(DRM_SCHED_PRIORITY_NORMAL < DRM_SCHED_PRIORITY_HIGH);
+
+ spin_lock(&stats->lock);
+ vruntime = stats->vruntime;
+
+ /*
+ * Special handling for entities which were picked from the top of the
+ * queue and are now re-joining the top with another one already there.
+ */
+ if (!vruntime && rq_prio != DRM_SCHED_PRIORITY_INVALID) {
+ if (prio > rq_prio) {
+ /*
+ * Lower priority should not overtake higher when re-
+ * joining at the top of the queue.
+ */
+ vruntime = ns_to_ktime(prio - rq_prio);
+ } else if (prio < rq_prio) {
+ /*
+ * Higher priority can go first.
+ */
+ vruntime = -ns_to_ktime(rq_prio - prio);
+ }
+ }
+
+ /*
+ * Restore saved relative position in the queue.
+ */
+ vruntime = ktime_add(min_vruntime, vruntime);
+
+ stats->vruntime = vruntime;
+ spin_unlock(&stats->lock);
+
+ return vruntime;
+}
+
+static ktime_t drm_sched_entity_update_vruntime(struct drm_sched_entity *entity)
+{
+ struct drm_sched_entity_stats *stats = entity->stats;
+ ktime_t runtime, prev;
+
+ spin_lock(&stats->lock);
+ prev = stats->prev_runtime;
+ runtime = stats->runtime;
+ stats->prev_runtime = runtime;
+ runtime = ktime_add_ns(stats->vruntime,
+ ktime_to_ns(ktime_sub(runtime, prev)) <<
+ vruntime_shift[entity->priority]);
+ stats->vruntime = runtime;
+ spin_unlock(&stats->lock);
+
+ return runtime;
+}
+
+static ktime_t drm_sched_entity_get_job_ts(struct drm_sched_entity *entity)
+{
+ return drm_sched_entity_update_vruntime(entity);
}
/**
list_add_tail(&entity->list, &rq->entities);
}
- if (drm_sched_policy == DRM_SCHED_POLICY_RR)
+ if (drm_sched_policy == DRM_SCHED_POLICY_FAIR) {
+ ts = drm_sched_rq_get_min_vruntime(rq);
+ ts = drm_sched_entity_restore_vruntime(entity, ts,
+ rq->head_prio);
+ } else if (drm_sched_policy == DRM_SCHED_POLICY_RR) {
ts = entity->rr_ts;
+ }
+
drm_sched_rq_update_fifo_locked(entity, rq, ts);
spin_unlock(&rq->lock);
if (next_job) {
ktime_t ts;
- if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
+ if (drm_sched_policy == DRM_SCHED_POLICY_FAIR)
+ ts = drm_sched_entity_get_job_ts(entity);
+ else if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
ts = next_job->submit_ts;
else
ts = drm_sched_rq_next_rr_ts(rq, entity);
drm_sched_rq_update_fifo_locked(entity, rq, ts);
} else {
drm_sched_rq_remove_fifo_locked(entity, rq);
+
+ if (drm_sched_policy == DRM_SCHED_POLICY_FAIR) {
+ ktime_t min_vruntime;
+
+ min_vruntime = drm_sched_rq_get_min_vruntime(rq);
+ drm_sched_entity_save_vruntime(entity, min_vruntime);
+ }
}
spin_unlock(&rq->lock);
spin_unlock(&entity->lock);
* to an array, and as such should start at 0.
*/
enum drm_sched_priority {
+ DRM_SCHED_PRIORITY_INVALID = -1, /* Internal marker - do not use. */
DRM_SCHED_PRIORITY_KERNEL,
DRM_SCHED_PRIORITY_HIGH,
DRM_SCHED_PRIORITY_NORMAL,
*/
enum drm_sched_priority priority;
+ /**
+ * @rq_priority: Run-queue priority
+ */
+ enum drm_sched_priority rq_priority;
+
/**
* @rr_ts:
*
* struct drm_sched_rq - queue of entities to be scheduled.
*
* @sched: the scheduler to which this rq belongs to.
- * @lock: protects @entities, @rb_tree_root and @rr_ts.
+ * @lock: protects @entities, @rb_tree_root, @rr_ts and @head_prio.
* @rr_ts: monotonically incrementing fake timestamp for RR mode.
* @entities: list of the entities to be scheduled.
* @rb_tree_root: root of time based priority queue of entities for FIFO scheduling
+ * @head_prio: priority of the top tree element.
*
* Run queue is a set of entities scheduling command submissions for
* one specific ring. It implements the scheduling policy that selects
ktime_t rr_ts;
struct list_head entities;
struct rb_root_cached rb_tree_root;
+ enum drm_sched_priority head_prio;
};
/**
* @credit_count: the current credit count of this scheduler
* @timeout: the time after which a job is removed from the scheduler.
* @name: name of the ring for which this scheduler is being used.
- * @num_rqs: Number of run-queues. This is at most DRM_SCHED_PRIORITY_COUNT,
- * as there's usually one run-queue per priority, but could be less.
+ * @num_user_rqs: Number of run-queues. This is at most
+ * DRM_SCHED_PRIORITY_COUNT, as there's usually one run-queue per
+ * priority, but could be less.
+ * @num_rqs: Equal to @num_user_rqs for FIFO and RR and 1 for the FAIR policy.
* @sched_rq: An allocated array of run-queues of size @num_rqs;
* @job_scheduled: once drm_sched_entity_flush() is called the scheduler
* waits on this wait queue until all the scheduled jobs are
long timeout;
const char *name;
u32 num_rqs;
+ u32 num_user_rqs;
struct drm_sched_rq **sched_rq;
wait_queue_head_t job_scheduled;
atomic64_t job_id_count;