Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f6a8cd99 authored by Farrukh Qurashi's avatar Farrukh Qurashi
Browse files

msm: npu: Add driver functionality to support NPU



Add clock control, bandwidth monitor integration, thermal
integration, IPC and execution management for operation of the NPU
hardware block. The driver provides control of the NPU hardware for
execution of neural networks.

Change-Id: I6b7616ffd758e8660ac0855d37f53af732f49e48
Signed-off-by: default avatarKen Zhang <kenz@codeaurora.org>
Signed-off-by: default avatarFarrukh Qurashi <fqurashi@codeaurora.org>
parent 452becbf
Loading
Loading
Loading
Loading
+164 −0
Original line number Original line Diff line number Diff line
Qualcomm Technologies, Inc. NPU powerlevels

Powerlevels are defined in sets by qcom,npu-pwrlevels. Each powerlevel defines
a series of clock frequencies. These frequencies are for the corresponding
clocks in the clocks property of the msm_npu device.

qcom,npu-pwrlevels bindings:

Required Properties:
- #address-cells: Should be set to 1
- #size-cells: Should be set to 0
- compatible: Must be qcom,npu-pwrlevels
- initial-pwrlevel: NPU initial wakeup power level, this is the index of the
	child node.

qcom,npu-pwrlevel: This is a child node defining power levels.
qcom,npu-pwrlevels must contain at least one power level node. Each child node
has the following properties:

Required Properties:
- reg: Index of the powerlevel (0 = lowest performance)
- clk-freq: List of clock frequencies (in Hz) of each clock for the current
	powerlevel. List of clocks and order described in:
	Documentation/devicetree/bindings/media/msm-npu.txt

Example:
	qcom,npu-pwrlevels {
		#address-cells = <1>;
		#size-cells = <0>;
		compatible = "qcom,npu-pwrlevels";
		initial-pwrlevel = <4>;
		qcom,npu-pwrlevel@0 {
			reg = <0>;
			clk-freq = <9600000
					9600000
					19200000
					19200000
					19200000
					19200000
					9600000
					60000000
					19200000
					19200000
					30000000
					19200000
					19200000
					19200000
					19200000
					19200000
					9600000
					19200000
					0>;
		};
		qcom,npu-pwrlevel@1 {
			reg = <1>;
			clk-freq = <300000000
					300000000
					19200000
					100000000
					19200000
					19200000
					300000000
					150000000
					19200000
					19200000
					60000000
					100000000
					100000000
					37500000
					100000000
					19200000
					300000000
					19200000
					0>;
		};
		qcom,npu-pwrlevel@2 {
			reg = <2>;
			clk-freq = <350000000
					350000000
					19200000
					150000000
					19200000
					19200000
					350000000
					200000000
					37500000
					19200000
					120000000
					150000000
					150000000
					75000000
					150000000
					19200000
					350000000
					19200000
					0>;
		};
		qcom,npu-pwrlevel@3 {
			reg = <3>;
			clk-freq = <400000000
					400000000
					19200000
					200000000
					19200000
					19200000
					400000000
					300000000
					37500000
					19200000
					120000000
					200000000
					200000000
					75000000
					200000000
					19200000
					400000000
					19200000
					0>;
		};
		qcom,npu-pwrlevel@4 {
			reg = <4>;
			clk-freq = <600000000
					600000000
					19200000
					300000000
					19200000
					19200000
					600000000
					403000000
					75000000
					19200000
					240000000
					300000000
					300000000
					150000000
					300000000
					19200000
					600000000
					19200000
					0>;
		};
		qcom,npu-pwrlevel@5 {
			reg = <5>;
			clk-freq = <715000000
					715000000
					19200000
					350000000
					19200000
					19200000
					715000000
					533000000
					75000000
					19200000
					240000000
					350000000
					350000000
					150000000
					350000000
					19200000
					715000000
					19200000
					0>;
		};
	};
+208 −23
Original line number Original line Diff line number Diff line
@@ -3,14 +3,18 @@
NPU (Neural Network Processing Unit) applies neural network processing
NPU (Neural Network Processing Unit) applies neural network processing


Required properties:
Required properties:
- compatible:
- compatible: Must be "qcom,msm-npu"
    - "qcom,msm-npu"
- reg: Specify offset and length of the device register sets.
- reg: Specify offset and length of the device register sets.
- reg-names: Names corresponding to the defined register sets.
- reg-names: Names corresponding to the defined register sets.
	- "npu_base": npu base registers
	- "npu_base": npu base registers
- interrupts: Specify the npu interrupts.
- interrupts: Specify the npu interrupts.
- interrupt-names: should specify relevant names to each interrupts
- interrupt-names: should specify relevant names to each interrupts
	property defined.
	property defined.
- cache-slice-names: A set of names that identify the usecase names of a
	client that uses cache slice. These strings are used to look up the
	cache slice entries by name
- cache-slices: The tuple has phandle to llcc device as the first argument
	and the second argument is the usecase id of the client
- clocks: clocks required for the device.
- clocks: clocks required for the device.
- clock-names: names of clocks required for the device.
- clock-names: names of clocks required for the device.
- vdd-supply: Phandle for vdd regulator device node
- vdd-supply: Phandle for vdd regulator device node
@@ -20,24 +24,205 @@ Required properties:
	during proxy voting/unvoting.
	during proxy voting/unvoting.
- qcom,vdd_'reg'-uV-uA: Voltage and current values for the 'reg' regulator,
- qcom,vdd_'reg'-uV-uA: Voltage and current values for the 'reg' regulator,
	e.g. qcom,vdd_cx-uV-uA.
	e.g. qcom,vdd_cx-uV-uA.

- mboxes: Phandle array for mailbox controllers to be used for IPC
- mbox-names: names of each mailboxes
- #cooling-cells: Should be set to 2
- qcom,npubw-dev: a phandle to a device representing bus bandwidth requirements
	(see devbw.txt)
- qcom,npu-pwrlevels: Container for NPU power levels
	(see msm-npu-pwrlevels.txt)
Example:
Example:
	msm_npu: qcom,msm_npu {
	msm_npu: qcom,msm_npu@9800000 {
		compatible = "qcom,msm-npu";
		compatible = "qcom,msm-npu";
		status = "ok";
		reg = <0x9800000 0x800000>;
		reg = <0x9800000 0x800000>;
		reg-names = "npu_base";
		reg-names = "npu_base";
		interrupts = <0 346 0>;
		interrupts = <GIC_SPI 368 IRQ_TYPE_EDGE_RISING>;
		interrupt-names = "single";
		iommus = <&apps_smmu 0x1461 0x0>, <&apps_smmu 0x2061 0x0>;
		clocks = <&clock_npucc NPU_CC_XO_CLK>,
		cache-slice-names = "npu";
					<&clock_npucc NPU_CC_NPU_CORE_CLK>,
		cache-slices = <&llcc 23>;
					<&clock_npucc NPU_CC_CAL_DP_CLK>,
		clocks = <&clock_npucc NPU_CC_CAL_DP_CLK>,
				<&clock_npucc NPU_CC_CAL_DP_CLK_SRC>,
				<&clock_npucc NPU_CC_XO_CLK>,
				<&clock_npucc NPU_CC_ARMWIC_CORE_CLK>,
				<&clock_npucc NPU_CC_ARMWIC_CORE_CLK>,
				<&clock_npucc NPU_CC_BTO_CORE_CLK>,
				<&clock_npucc NPU_CC_BWMON_CLK>,
				<&clock_npucc NPU_CC_CAL_DP_CDC_CLK>,
				<&clock_npucc NPU_CC_COMP_NOC_AXI_CLK>,
				<&clock_npucc NPU_CC_COMP_NOC_AXI_CLK>,
					<&clock_npucc NPU_CC_CONF_NOC_AHB_CLK>;
				<&clock_npucc NPU_CC_CONF_NOC_AHB_CLK>,
		clock-names = "xo", "core", "cal_dp", "armwic",
				<&clock_npucc NPU_CC_NPU_CORE_APB_CLK>,
						"axi", "ahb";
				<&clock_npucc NPU_CC_NPU_CORE_ATB_CLK>,
				<&clock_npucc NPU_CC_NPU_CORE_CLK>,
				<&clock_npucc NPU_CC_NPU_CORE_CLK_SRC>,
				<&clock_npucc NPU_CC_NPU_CORE_CTI_CLK>,
				<&clock_npucc NPU_CC_NPU_CPC_CLK>,
				<&clock_npucc NPU_CC_NPU_CPC_TIMER_CLK>,
				<&clock_npucc NPU_CC_PERF_CNT_CLK>,
				<&clock_npucc NPU_CC_QTIMER_CORE_CLK>,
				<&clock_npucc NPU_CC_SLEEP_CLK>;
		clock-names = "cal_dp_clk",
				"cal_dp_clk_src",
				"xo_clk",
				"armwic_core_clk",
				"bto_core_clk",
				"bwmon_clk",
				"cal_dp_cdc_clk",
				"comp_noc_axi_clk",
				"conf_noc_ahb_clk",
				"npu_core_apb_clk",
				"npu_core_atb_clk",
				"npu_core_clk",
				"npu_core_clk_src",
				"npu_core_cti_clk",
				"npu_cpc_clk",
				"npu_cpc_timer_clk",
				"perf_cnt_clk",
				"qtimer_core_clk",
				"sleep_clk";
		vdd-supply = <&npu_core_gdsc>;
		vdd-supply = <&npu_core_gdsc>;
		vdd_cx-supply = <&pm855l_s6_level>;
		vdd_cx-supply = <&pm855l_s6_level>;
		qcom,proxy-reg-names ="vdd", "vdd_cx";
		qcom,proxy-reg-names ="vdd", "vdd_cx";
		qcom,vdd_cx-uV-uA = <RPMH_REGULATOR_LEVEL_TURBO 100000>;
		qcom,vdd_cx-uV-uA = <RPMH_REGULATOR_LEVEL_TURBO 100000>;
		mboxes = <&qmp_npu0 0>, <&qmp_npu1 0>;
		mbox-names = "npu_low", "npu_high";
		#cooling-cells = <2>;
		qcom,npubw-dev = <&npu_npu_ddr_bw>;
		qcom,npu-pwrlevels {
			#address-cells = <1>;
			#size-cells = <0>;
			compatible = "qcom,npu-pwrlevels";
			initial-pwrlevel = <4>;
			qcom,npu-pwrlevel@0 {
				reg = <0>;
				clk-freq = <9600000
						9600000
						19200000
						19200000
						19200000
						19200000
						9600000
						60000000
						19200000
						19200000
						30000000
						19200000
						19200000
						19200000
						19200000
						19200000
						9600000
						19200000
						0>;
			};
			qcom,npu-pwrlevel@1 {
				reg = <1>;
				clk-freq = <300000000
						300000000
						19200000
						100000000
						19200000
						19200000
						300000000
						150000000
						19200000
						19200000
						60000000
						100000000
						100000000
						37500000
						100000000
						19200000
						300000000
						19200000
						0>;
			};
			qcom,npu-pwrlevel@2 {
				reg = <2>;
				clk-freq = <350000000
						350000000
						19200000
						150000000
						19200000
						19200000
						350000000
						200000000
						37500000
						19200000
						120000000
						150000000
						150000000
						75000000
						150000000
						19200000
						350000000
						19200000
						0>;
			};
			qcom,npu-pwrlevel@3 {
				reg = <3>;
				clk-freq = <400000000
						400000000
						19200000
						200000000
						19200000
						19200000
						400000000
						300000000
						37500000
						19200000
						120000000
						200000000
						200000000
						75000000
						200000000
						19200000
						400000000
						19200000
						0>;
			};
			qcom,npu-pwrlevel@4 {
				reg = <4>;
				clk-freq = <600000000
						600000000
						19200000
						300000000
						19200000
						19200000
						600000000
						403000000
						75000000
						19200000
						240000000
						300000000
						300000000
						150000000
						300000000
						19200000
						600000000
						19200000
						0>;
			};
			qcom,npu-pwrlevel@5 {
				reg = <5>;
				clk-freq = <715000000
						715000000
						19200000
						350000000
						19200000
						19200000
						715000000
						533000000
						75000000
						19200000
						240000000
						350000000
						350000000
						150000000
						350000000
						19200000
						715000000
						19200000
						0>;
			};
		};
	};
	};
+7 −1
Original line number Original line Diff line number Diff line
obj-$(CONFIG_MSM_NPU)	    += npu_dev.o
obj-$(CONFIG_MSM_NPU)      := msm_npu.o
msm_npu-objs := npu_dbg.o \
                npu_dev.o \
                npu_debugfs.o \
                npu_host_ipc.o \
                npu_hw_access.o \
                npu_mgr.o
+151 −40
Original line number Original line Diff line number Diff line
/* Copyright (c) 2017, The Linux Foundation. All rights reserved.
/* Copyright (c) 2017-2018, The Linux Foundation. All rights reserved.
 *
 *
 * This program is free software; you can redistribute it and/or modify
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
 * it under the terms of the GNU General Public License version 2 and
@@ -12,35 +12,29 @@


#ifndef _NPU_COMMON_H
#ifndef _NPU_COMMON_H
#define _NPU_COMMON_H
#define _NPU_COMMON_H
#include <linux/list.h>

#include <linux/file.h>
/* -------------------------------------------------------------------------
#include <linux/mutex.h>
 * Includes
#include <linux/types.h>
 * -------------------------------------------------------------------------
 */
#include <asm/dma-iommu.h>
#include <linux/cdev.h>
#include <linux/cdev.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mod_devicetable.h>
#include <linux/platform_device.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/of.h>
#include <linux/clk.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
#include <linux/mailbox_controller.h>
#include <linux/regulator/consumer.h>
#include <linux/mailbox_client.h>
#include <linux/dma-mapping.h>
#include <linux/module.h>
#include <linux/msm-bus.h>
#include <linux/dma-buf.h>
#include <linux/msm_dma_iommu_mapping.h>
#include <asm/dma-iommu.h>
#include <stdarg.h>
#include <linux/msm_npu.h>
#include <linux/msm_npu.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>

#include "npu_mgr.h"


/* -------------------------------------------------------------------------
 * Defines
 * -------------------------------------------------------------------------
 */
/* get npu info */
/* get npu info */
#define MSM_NPU_GET_INFO_32 \
#define MSM_NPU_GET_INFO_32 \
	_IOWR(MSM_NPU_IOCTL_MAGIC, 1, compat_caddr_t)
	_IOWR(MSM_NPU_IOCTL_MAGIC, 1, compat_caddr_t)
@@ -65,27 +59,109 @@
#define MSM_NPU_EXEC_NETWORK_32 \
#define MSM_NPU_EXEC_NETWORK_32 \
	_IOWR(MSM_NPU_IOCTL_MAGIC, 6, compat_caddr_t)
	_IOWR(MSM_NPU_IOCTL_MAGIC, 6, compat_caddr_t)


#define NPU_MAX_CLK_NUM		8
#define NPU_MAX_MBOX_NUM	    2
#define NPU_MAX_REGULATOR_NUM	4
#define NPU_MBOX_LOW_PRI	    0
#define NPU_MAX_DT_NAME_LEN	16
#define NPU_MBOX_HIGH_PRI	    1


#define NPU_FIRMWARE_VERSION	0x1000
#define DEFAULT_REG_DUMP_NUM	64
#define ROW_BYTES 16
#define GROUP_BYTES 4

#define NUM_TOTAL_CLKS          19
#define NPU_MAX_REGULATOR_NUM	2
#define NPU_MAX_DT_NAME_LEN	    21
#define NPU_MAX_PWRLEVELS		7

/* -------------------------------------------------------------------------
 * Data Structures
 * -------------------------------------------------------------------------
 */
struct npu_smmu_ctx {
	int domain;
	struct dma_iommu_mapping *mmu_mapping;
	struct reg_bus_client *reg_bus_clt;
	int32_t attach_cnt;
};


struct npu_clk_t {
struct npu_ion_buf {
	int fd;
	struct dma_buf *dma_buf;
	struct dma_buf_attachment *attachment;
	struct sg_table *table;
	dma_addr_t iova;
	uint32_t size;
	void *phys_addr;
	void *buf;
	struct list_head list;
};

struct npu_clk {
	struct clk *clk;
	struct clk *clk;
	char clk_name[NPU_MAX_DT_NAME_LEN];
	char clk_name[NPU_MAX_DT_NAME_LEN];
};
};


struct npu_regulator_t {
struct npu_regulator {
	struct regulator *regulator;
	struct regulator *regulator;
	char regulator_name[NPU_MAX_DT_NAME_LEN];
	char regulator_name[NPU_MAX_DT_NAME_LEN];
};
};


#define DEFAULT_REG_DUMP_NUM	0x100
struct npu_debugfs_ctx {
#define ROW_BYTES 16
	struct dentry *root;
#define GROUP_BYTES 4
	uint32_t reg_off;
	uint32_t reg_cnt;
	char *buf;
	size_t buf_len;
	uint8_t *log_buf;
	struct mutex log_lock;
	uint32_t log_num_bytes_buffered;
	uint32_t log_read_index;
	uint32_t log_write_index;
	uint32_t log_buf_size;
	bool sys_cache_disable;
};


struct npu_device_t {
struct npu_mbox {
	struct mbox_client client;
	struct mbox_chan *chan;
	struct npu_device *npu_dev;
	uint32_t id;
};

/**
 * struct npul_pwrlevel - Struct holding different pwrlevel info obtained from
 * from dtsi file
 * @freq[]:              NPU frequency vote in Hz
 */
struct npu_pwrlevel {
	long clk_freq[NUM_TOTAL_CLKS];
};

/**
 * struct npu_pwrctrl - Power control settings for a NPU device
 * @pwr_vote_num - voting information for power enable
 * @pwrlevels - List of supported power levels
 * @active_pwrlevel - The currently active power level
 * @default_pwrlevel - device wake up power level
 * @max_pwrlevel - maximum allowable powerlevel per the user
 * @min_pwrlevel - minimum allowable powerlevel per the user
 * @num_pwrlevels - number of available power levels
 * @devbw - bw device
 */
struct npu_pwrctrl {
	int32_t pwr_vote_num;

	struct npu_pwrlevel pwrlevels[NPU_MAX_PWRLEVELS];
	uint32_t active_pwrlevel;
	uint32_t default_pwrlevel;
	uint32_t max_pwrlevel;
	uint32_t min_pwrlevel;
	uint32_t num_pwrlevels;

	struct device *devbw;
	uint32_t bwmon_enabled;
};

struct npu_device {
	struct mutex ctx_lock;
	struct mutex ctx_lock;


	struct platform_device *pdev;
	struct platform_device *pdev;
@@ -97,14 +173,49 @@ struct npu_device_t {


	size_t reg_size;
	size_t reg_size;
	char __iomem *npu_base;
	char __iomem *npu_base;
	u32 npu_phys;
	uint32_t npu_phys;


	uint32_t core_clk_num;
	uint32_t core_clk_num;
	struct npu_clk_t core_clks[NPU_MAX_CLK_NUM];
	struct npu_clk core_clks[NUM_TOTAL_CLKS];


	uint32_t regulator_num;
	uint32_t regulator_num;
	struct npu_regulator_t regulators[NPU_MAX_DT_NAME_LEN];
	struct npu_regulator regulators[NPU_MAX_DT_NAME_LEN];

	uint32_t irq;

	struct npu_ion_buf mapped_buffers;

	struct device *cb_device;


	u32 irq;
	struct npu_host_ctx host_ctx;
	struct npu_smmu_ctx smmu_ctx;
	struct npu_debugfs_ctx debugfs_ctx;

	struct npu_mbox mbox[NPU_MAX_MBOX_NUM];

	struct thermal_cooling_device *tcdev;
	struct npu_pwrctrl pwrctrl;

	struct llcc_slice_desc *sys_cache;
};
};


/* -------------------------------------------------------------------------
 * Function Prototypes
 * -------------------------------------------------------------------------
 */
int npu_debugfs_init(struct npu_device *npu_dev);
void npu_debugfs_deinit(struct npu_device *npu_dev);

int npu_enable_core_power(struct npu_device *npu_dev);
void npu_disable_core_power(struct npu_device *npu_dev);
int npu_enable_post_pil_clocks(struct npu_device *npu_dev);

irqreturn_t npu_intr_hdler(int irq, void *ptr);

int npu_set_power_level(struct npu_device *npu_dev, uint32_t pwr_level);

int fw_init(struct npu_device *npu_dev);
void fw_deinit(struct npu_device *npu_dev);

#endif /* _NPU_COMMON_H */
#endif /* _NPU_COMMON_H */
Loading