kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
}
+void amdgpu_amdkfd_teardown_processes(struct amdgpu_device *adev)
+{
+ kgd2kfd_teardown_processes(adev);
+}
+
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
{
if (adev->kfd.dev) {
int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
+void amdgpu_amdkfd_teardown_processes(struct amdgpu_device *adev);
void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc);
int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc);
bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
bool retry_fault);
+void kgd2kfd_lock_kfd(void);
+void kgd2kfd_teardown_processes(struct amdgpu_device *adev);
#else
static inline int kgd2kfd_init(void)
return false;
}
+static inline void kgd2kfd_lock_kfd(void)
+{
+}
+
+static inline void kgd2kfd_teardown_processes(struct amdgpu_device *adev)
+{
+}
+
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
amdgpu_amdkfd_suspend(adev, true);
+ amdgpu_amdkfd_teardown_processes(adev);
amdgpu_userq_suspend(adev);
/* Workaround for ASICs need to disable SMC first */
}
kfree(kfd);
+
+ /* after remove a kfd device unlock kfd driver */
+ kgd2kfd_unlock_kfd(NULL);
}
int kgd2kfd_pre_reset(struct kfd_dev *kfd,
return r;
}
+/* unlock a kfd dev or kfd driver */
void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
{
mutex_lock(&kfd_processes_mutex);
- --kfd->kfd_dev_lock;
+ if (kfd)
+ --kfd->kfd_dev_lock;
+ else
+ --kfd_locked;
mutex_unlock(&kfd_processes_mutex);
}
return false;
}
+/* check if there is kfd process still uses adev */
+static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev)
+{
+ struct kfd_process *p;
+ struct hlist_node *p_temp;
+ unsigned int temp;
+ struct kfd_node *dev;
+
+ mutex_lock(&kfd_processes_mutex);
+
+ if (hash_empty(kfd_processes_table)) {
+ mutex_unlock(&kfd_processes_mutex);
+ return true;
+ }
+
+ /* check if there is device still use adev */
+ hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
+ for (int i = 0; i < p->n_pdds; i++) {
+ dev = p->pdds[i]->dev;
+ if (dev->adev == adev) {
+ mutex_unlock(&kfd_processes_mutex);
+ return false;
+ }
+ }
+ }
+
+ mutex_unlock(&kfd_processes_mutex);
+
+ return true;
+}
+
+/** kgd2kfd_teardown_processes - gracefully tear down existing
+ * kfd processes that use adev
+ *
+ * @adev: amdgpu_device where kfd processes run on and will be
+ * teardown
+ *
+ */
+void kgd2kfd_teardown_processes(struct amdgpu_device *adev)
+{
+ struct hlist_node *p_temp;
+ struct kfd_process *p;
+ struct kfd_node *dev;
+ unsigned int temp;
+
+ mutex_lock(&kfd_processes_mutex);
+
+ if (hash_empty(kfd_processes_table)) {
+ mutex_unlock(&kfd_processes_mutex);
+ return;
+ }
+
+ hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
+ for (int i = 0; i < p->n_pdds; i++) {
+ dev = p->pdds[i]->dev;
+ if (dev->adev == adev)
+ kfd_signal_process_terminate_event(p);
+ }
+ }
+
+ mutex_unlock(&kfd_processes_mutex);
+
+ /* wait all kfd processes use adev terminate */
+ while (!kgd2kfd_check_device_idle(adev))
+ cond_resched();
+}
+
#if defined(CONFIG_DEBUG_FS)
/* This function will send a package to HIQ to hang the HWS
kfd_unref_process(p);
}
+
+/* signal KFD_EVENT_TYPE_SIGNAL events from process p
+ * send signal SIGBUS to correspondent user space process
+ */
+void kfd_signal_process_terminate_event(struct kfd_process *p)
+{
+ struct kfd_event *ev;
+ u32 id;
+
+ rcu_read_lock();
+
+ /* iterate from id 1 for KFD_EVENT_TYPE_SIGNAL events */
+ id = 1;
+ idr_for_each_entry_continue(&p->event_idr, ev, id)
+ if (ev->type == KFD_EVENT_TYPE_SIGNAL) {
+ spin_lock(&ev->lock);
+ set_event(ev);
+ spin_unlock(&ev->lock);
+ }
+
+ /* Send SIGBUS to p->lead_thread */
+ dev_notice(kfd_device,
+ "Sending SIGBUS to process %d",
+ p->lead_thread->pid);
+
+ send_sig(SIGBUS, p->lead_thread, 0);
+
+ rcu_read_unlock();
+}
}
int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev);
int kfd_numa_node_to_apic_id(int numa_node_id);
+uint32_t kfd_gpu_node_num(void);
/* Interrupts */
#define KFD_IRQ_FENCE_CLIENTID 0xff
void kfd_signal_reset_event(struct kfd_node *dev);
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
+void kfd_signal_process_terminate_event(struct kfd_process *p);
static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
enum TLB_FLUSH_TYPE type)
*/
mutex_lock(&kfd_processes_mutex);
+ if (kfd_gpu_node_num() <= 0) {
+ pr_warn("no gpu node! Cannot create KFD process");
+ process = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
if (kfd_is_locked(NULL)) {
pr_debug("KFD is locked! Cannot create process");
process = ERR_PTR(-EINVAL);
else
ida_destroy(&p->id_table);
- kfd_process_remove_sysfs(p);
kfd_debugfs_remove_process(p);
kfd_process_kunmap_signal_bo(p);
put_task_struct(p->lead_thread);
+ /* the last step is removing process entries under /sys
+ * to indicate the process has been terminated.
+ */
+ kfd_process_remove_sysfs(p);
+
kfree(p);
}
return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
}
+/* kfd_gpu_node_num - Return kfd gpu node number at system */
+uint32_t kfd_gpu_node_num(void)
+{
+ struct kfd_node *dev;
+ u8 gpu_num = 0;
+ u8 id = 0;
+
+ while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
+ if (!dev || kfd_devcgroup_check_permission(dev)) {
+ /* Skip non GPU devices and devices to which the
+ * current process have no access to
+ */
+ id++;
+ continue;
+ }
+ id++;
+ gpu_num++;
+ }
+
+ return gpu_num;
+}
+
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)