Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b1f389ec authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-amdkfd-next-2015-03-25' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Allow amdkfd to work with multiple kgd instances. This is in preparation for
  AMD's new open source kernel graphic driver (amdgpu), and for the new
  AMD APU, Carrizo.

- Convert timestamping to use 64bit time accessors

- Three other minor changes.

* tag 'drm-amdkfd-next-2015-03-25' of git://people.freedesktop.org/~gabbayo/linux:
  drm/amdkfd: Add multiple kgd support
  drm/amdkfd: Convert timestamping to use 64bit time accessors
  drm/amdkfd: add debug prints for process teardown
  drm/amdkfd: Remove unused field from struct qcm_process_device
  drm/amdkfd: rename fence_wait_timeout
parents 517bc045 cea405b1
Loading
Loading
Loading
Loading
+7 −6
Original line number Original line Diff line number Diff line
@@ -435,21 +435,22 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,
{
{
	struct kfd_ioctl_get_clock_counters_args *args = data;
	struct kfd_ioctl_get_clock_counters_args *args = data;
	struct kfd_dev *dev;
	struct kfd_dev *dev;
	struct timespec time;
	struct timespec64 time;


	dev = kfd_device_by_id(args->gpu_id);
	dev = kfd_device_by_id(args->gpu_id);
	if (dev == NULL)
	if (dev == NULL)
		return -EINVAL;
		return -EINVAL;


	/* Reading GPU clock counter from KGD */
	/* Reading GPU clock counter from KGD */
	args->gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd);
	args->gpu_clock_counter =
		dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);


	/* No access to rdtsc. Using raw monotonic time */
	/* No access to rdtsc. Using raw monotonic time */
	getrawmonotonic(&time);
	getrawmonotonic64(&time);
	args->cpu_clock_counter = (uint64_t)timespec_to_ns(&time);
	args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);


	get_monotonic_boottime(&time);
	get_monotonic_boottime64(&time);
	args->system_clock_counter = (uint64_t)timespec_to_ns(&time);
	args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);


	/* Since the counter is in nano-seconds we use 1GHz frequency */
	/* Since the counter is in nano-seconds we use 1GHz frequency */
	args->system_clock_freq = 1000000000;
	args->system_clock_freq = 1000000000;
+12 −5
Original line number Original line Diff line number Diff line
@@ -94,7 +94,8 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did)
	return NULL;
	return NULL;
}
}


struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
	struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
{
{
	struct kfd_dev *kfd;
	struct kfd_dev *kfd;


@@ -112,6 +113,11 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev)
	kfd->device_info = device_info;
	kfd->device_info = device_info;
	kfd->pdev = pdev;
	kfd->pdev = pdev;
	kfd->init_complete = false;
	kfd->init_complete = false;
	kfd->kfd2kgd = f2g;

	mutex_init(&kfd->doorbell_mutex);
	memset(&kfd->doorbell_available_index, 0,
		sizeof(kfd->doorbell_available_index));


	return kfd;
	return kfd;
}
}
@@ -200,7 +206,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
	/* add another 512KB for all other allocations on gart (HPD, fences) */
	/* add another 512KB for all other allocations on gart (HPD, fences) */
	size += 512 * 1024;
	size += 512 * 1024;


	if (kfd2kgd->init_gtt_mem_allocation(kfd->kgd, size, &kfd->gtt_mem,
	if (kfd->kfd2kgd->init_gtt_mem_allocation(
			kfd->kgd, size, &kfd->gtt_mem,
			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
		dev_err(kfd_device,
		dev_err(kfd_device,
			"Could not allocate %d bytes for device (%x:%x)\n",
			"Could not allocate %d bytes for device (%x:%x)\n",
@@ -270,7 +277,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_topology_add_device_error:
kfd_topology_add_device_error:
	kfd_gtt_sa_fini(kfd);
	kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
kfd_gtt_sa_init_error:
	kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
	kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
	dev_err(kfd_device,
	dev_err(kfd_device,
		"device (%x:%x) NOT added due to errors\n",
		"device (%x:%x) NOT added due to errors\n",
		kfd->pdev->vendor, kfd->pdev->device);
		kfd->pdev->vendor, kfd->pdev->device);
@@ -285,7 +292,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
		amd_iommu_free_device(kfd->pdev);
		amd_iommu_free_device(kfd->pdev);
		kfd_topology_remove_device(kfd);
		kfd_topology_remove_device(kfd);
		kfd_gtt_sa_fini(kfd);
		kfd_gtt_sa_fini(kfd);
		kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
		kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
	}
	}


	kfree(kfd);
	kfree(kfd);
+11 −7
Original line number Original line Diff line number Diff line
@@ -82,7 +82,8 @@ static inline unsigned int get_pipes_num_cpsch(void)
void program_sh_mem_settings(struct device_queue_manager *dqm,
void program_sh_mem_settings(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
					struct qcm_process_device *qpd)
{
{
	return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
	return dqm->dev->kfd2kgd->program_sh_mem_settings(
						dqm->dev->kgd, qpd->vmid,
						qpd->sh_mem_config,
						qpd->sh_mem_config,
						qpd->sh_mem_ape1_base,
						qpd->sh_mem_ape1_base,
						qpd->sh_mem_ape1_limit,
						qpd->sh_mem_ape1_limit,
@@ -457,9 +458,12 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
{
{
	uint32_t pasid_mapping;
	uint32_t pasid_mapping;


	pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
	pasid_mapping = (pasid == 0) ? 0 :
		(uint32_t)pasid |
		ATC_VMID_PASID_MAPPING_VALID;
		ATC_VMID_PASID_MAPPING_VALID;
	return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping,

	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
						dqm->dev->kgd, pasid_mapping,
						vmid);
						vmid);
}
}


@@ -511,7 +515,7 @@ int init_pipelines(struct device_queue_manager *dqm,
		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
		/* = log2(bytes/4)-1 */
		/* = log2(bytes/4)-1 */
		kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
		dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
	}
	}


@@ -897,7 +901,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
	return retval;
	return retval;
}
}


static int fence_wait_timeout(unsigned int *fence_addr,
static int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
				unsigned int fence_value,
				unsigned int fence_value,
				unsigned long timeout)
				unsigned long timeout)
{
{
@@ -953,7 +957,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
				KFD_FENCE_COMPLETED);
				KFD_FENCE_COMPLETED);
	/* should be timed out */
	/* should be timed out */
	fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
	amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
	pm_release_ib(&dqm->packets);
	pm_release_ib(&dqm->packets);
	dqm->active_runlist = false;
	dqm->active_runlist = false;
+7 −10
Original line number Original line Diff line number Diff line
@@ -32,9 +32,6 @@
 * and that's assures that any user process won't get access to the
 * and that's assures that any user process won't get access to the
 * kernel doorbells page
 * kernel doorbells page
 */
 */
static DEFINE_MUTEX(doorbell_mutex);
static unsigned long doorbell_available_index[
	DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)] = { 0 };


#define KERNEL_DOORBELL_PASID 1
#define KERNEL_DOORBELL_PASID 1
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
@@ -170,12 +167,12 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,


	BUG_ON(!kfd || !doorbell_off);
	BUG_ON(!kfd || !doorbell_off);


	mutex_lock(&doorbell_mutex);
	mutex_lock(&kfd->doorbell_mutex);
	inx = find_first_zero_bit(doorbell_available_index,
	inx = find_first_zero_bit(kfd->doorbell_available_index,
					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);


	__set_bit(inx, doorbell_available_index);
	__set_bit(inx, kfd->doorbell_available_index);
	mutex_unlock(&doorbell_mutex);
	mutex_unlock(&kfd->doorbell_mutex);


	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
		return NULL;
		return NULL;
@@ -203,9 +200,9 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)


	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);


	mutex_lock(&doorbell_mutex);
	mutex_lock(&kfd->doorbell_mutex);
	__clear_bit(inx, doorbell_available_index);
	__clear_bit(inx, kfd->doorbell_available_index);
	mutex_unlock(&doorbell_mutex);
	mutex_unlock(&kfd->doorbell_mutex);
}
}


inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
+1 −11
Original line number Original line Diff line number Diff line
@@ -34,7 +34,6 @@
#define KFD_DRIVER_MINOR	7
#define KFD_DRIVER_MINOR	7
#define KFD_DRIVER_PATCHLEVEL	1
#define KFD_DRIVER_PATCHLEVEL	1


const struct kfd2kgd_calls *kfd2kgd;
static const struct kgd2kfd_calls kgd2kfd = {
static const struct kgd2kfd_calls kgd2kfd = {
	.exit		= kgd2kfd_exit,
	.exit		= kgd2kfd_exit,
	.probe		= kgd2kfd_probe,
	.probe		= kgd2kfd_probe,
@@ -55,9 +54,7 @@ module_param(max_num_of_queues_per_device, int, 0444);
MODULE_PARM_DESC(max_num_of_queues_per_device,
MODULE_PARM_DESC(max_num_of_queues_per_device,
	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");
	"Maximum number of supported queues per device (1 = Minimum, 4096 = default)");


bool kgd2kfd_init(unsigned interface_version,
bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
		  const struct kfd2kgd_calls *f2g,
		  const struct kgd2kfd_calls **g2f)
{
{
	/*
	/*
	 * Only one interface version is supported,
	 * Only one interface version is supported,
@@ -66,11 +63,6 @@ bool kgd2kfd_init(unsigned interface_version,
	if (interface_version != KFD_INTERFACE_VERSION)
	if (interface_version != KFD_INTERFACE_VERSION)
		return false;
		return false;


	/* Protection against multiple amd kgd loads */
	if (kfd2kgd)
		return true;

	kfd2kgd = f2g;
	*g2f = &kgd2kfd;
	*g2f = &kgd2kfd;


	return true;
	return true;
@@ -85,8 +77,6 @@ static int __init kfd_module_init(void)
{
{
	int err;
	int err;


	kfd2kgd = NULL;

	/* Verify module parameters */
	/* Verify module parameters */
	if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
	if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
		(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
		(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
Loading