mirror of
				https://github.com/Ysurac/openmptcprouter.git
				synced 2025-03-09 15:40:20 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			805 lines
		
	
	
	
		
			27 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			805 lines
		
	
	
	
		
			27 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| From 9a52a9918306a3ff09f1c6e201d2fd6301c0eff6 Mon Sep 17 00:00:00 2001
 | |
| From: Phil Elwell <phil@raspberrypi.org>
 | |
| Date: Thu, 4 Apr 2019 13:33:47 +0100
 | |
| Subject: [PATCH 239/726] bcm2835-dma: Add proper 40-bit DMA support
 | |
| 
 | |
| BCM2711 has 4 DMA channels with a 40-bit address range, allowing them
 | |
| to access the full 4GB of memory on a Pi 4.
 | |
| 
 | |
| Signed-off-by: Phil Elwell <phil@raspberrypi.org>
 | |
| ---
 | |
|  drivers/dma/bcm2835-dma.c | 485 ++++++++++++++++++++++++++++++++------
 | |
|  1 file changed, 412 insertions(+), 73 deletions(-)
 | |
| 
 | |
| diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
 | |
| index 87b5d0bef355..ac0458a8027f 100644
 | |
| --- a/drivers/dma/bcm2835-dma.c
 | |
| +++ b/drivers/dma/bcm2835-dma.c
 | |
| @@ -38,6 +38,11 @@
 | |
|  #define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14
 | |
|  #define BCM2835_DMA_CHAN_NAME_SIZE 8
 | |
|  #define BCM2835_DMA_BULK_MASK  BIT(0)
 | |
| +#define BCM2711_DMA_MEMCPY_CHAN 14
 | |
| +
 | |
| +struct bcm2835_dma_cfg_data {
 | |
| +	u32	chan_40bit_mask;
 | |
| +};
 | |
|  
 | |
|  /**
 | |
|   * struct bcm2835_dmadev - BCM2835 DMA controller
 | |
| @@ -50,6 +55,7 @@ struct bcm2835_dmadev {
 | |
|  	struct dma_device ddev;
 | |
|  	void __iomem *base;
 | |
|  	dma_addr_t zero_page;
 | |
| +	const struct bcm2835_dma_cfg_data *cfg_data;
 | |
|  };
 | |
|  
 | |
|  struct bcm2835_dma_cb {
 | |
| @@ -62,6 +68,17 @@ struct bcm2835_dma_cb {
 | |
|  	uint32_t pad[2];
 | |
|  };
 | |
|  
 | |
| +struct bcm2711_dma40_scb {
 | |
| +	uint32_t ti;
 | |
| +	uint32_t src;
 | |
| +	uint32_t srci;
 | |
| +	uint32_t dst;
 | |
| +	uint32_t dsti;
 | |
| +	uint32_t len;
 | |
| +	uint32_t next_cb;
 | |
| +	uint32_t rsvd;
 | |
| +};
 | |
| +
 | |
|  struct bcm2835_cb_entry {
 | |
|  	struct bcm2835_dma_cb *cb;
 | |
|  	dma_addr_t paddr;
 | |
| @@ -82,6 +99,7 @@ struct bcm2835_chan {
 | |
|  	unsigned int irq_flags;
 | |
|  
 | |
|  	bool is_lite_channel;
 | |
| +	bool is_40bit_channel;
 | |
|  };
 | |
|  
 | |
|  struct bcm2835_desc {
 | |
| @@ -171,13 +189,118 @@ struct bcm2835_desc {
 | |
|  #define BCM2835_DMA_DATA_TYPE_S128	16
 | |
|  
 | |
|  /* Valid only for channels 0 - 14, 15 has its own base address */
 | |
| -#define BCM2835_DMA_CHAN(n)	((n) << 8) /* Base address */
 | |
| +#define BCM2835_DMA_CHAN_SIZE	0x100
 | |
| +#define BCM2835_DMA_CHAN(n)	((n) * BCM2835_DMA_CHAN_SIZE) /* Base address */
 | |
|  #define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n))
 | |
|  
 | |
|  /* the max dma length for different channels */
 | |
|  #define MAX_DMA_LEN SZ_1G
 | |
|  #define MAX_LITE_DMA_LEN (SZ_64K - 4)
 | |
|  
 | |
| +/* 40-bit DMA support */
 | |
| +#define BCM2711_DMA40_CS	0x00
 | |
| +#define BCM2711_DMA40_CB	0x04
 | |
| +#define BCM2711_DMA40_DEBUG	0x0c
 | |
| +#define BCM2711_DMA40_TI	0x10
 | |
| +#define BCM2711_DMA40_SRC	0x14
 | |
| +#define BCM2711_DMA40_SRCI	0x18
 | |
| +#define BCM2711_DMA40_DEST	0x1c
 | |
| +#define BCM2711_DMA40_DESTI	0x20
 | |
| +#define BCM2711_DMA40_LEN	0x24
 | |
| +#define BCM2711_DMA40_NEXT_CB	0x28
 | |
| +#define BCM2711_DMA40_DEBUG2	0x2c
 | |
| +
 | |
| +#define BCM2711_DMA40_ACTIVE		BIT(0)
 | |
| +#define BCM2711_DMA40_END		BIT(1)
 | |
| +#define BCM2711_DMA40_INT		BIT(2)
 | |
| +#define BCM2711_DMA40_DREQ		BIT(3)  /* DREQ state */
 | |
| +#define BCM2711_DMA40_RD_PAUSED		BIT(4)  /* Reading is paused */
 | |
| +#define BCM2711_DMA40_WR_PAUSED		BIT(5)  /* Writing is paused */
 | |
| +#define BCM2711_DMA40_DREQ_PAUSED	BIT(6)  /* Is paused by DREQ flow control */
 | |
| +#define BCM2711_DMA40_WAITING_FOR_WRITES BIT(7)  /* Waiting for last write */
 | |
| +#define BCM2711_DMA40_ERR		BIT(10)
 | |
| +#define BCM2711_DMA40_QOS(x)		(((x) & 0x1f) << 16)
 | |
| +#define BCM2711_DMA40_PANIC_QOS(x)	(((x) & 0x1f) << 20)
 | |
| +#define BCM2711_DMA40_WAIT_FOR_WRITES	BIT(28)
 | |
| +#define BCM2711_DMA40_DISDEBUG		BIT(29)
 | |
| +#define BCM2711_DMA40_ABORT		BIT(30)
 | |
| +#define BCM2711_DMA40_HALT		BIT(31)
 | |
| +#define BCM2711_DMA40_CS_FLAGS(x) (x & (BCM2711_DMA40_QOS(15) | \
 | |
| +					BCM2711_DMA40_PANIC_QOS(15) | \
 | |
| +					BCM2711_DMA40_WAIT_FOR_WRITES |	\
 | |
| +					BCM2711_DMA40_DISDEBUG))
 | |
| +
 | |
| +/* Transfer information bits */
 | |
| +#define BCM2711_DMA40_INTEN		BIT(0)
 | |
| +#define BCM2711_DMA40_TDMODE		BIT(1) /* 2D-Mode */
 | |
| +#define BCM2711_DMA40_WAIT_RESP		BIT(2) /* wait for AXI write to be acked */
 | |
| +#define BCM2711_DMA40_WAIT_RD_RESP	BIT(3) /* wait for AXI read to complete */
 | |
| +#define BCM2711_DMA40_PER_MAP(x)	((x & 31) << 9) /* REQ source */
 | |
| +#define BCM2711_DMA40_S_DREQ		BIT(14) /* enable SREQ for source */
 | |
| +#define BCM2711_DMA40_D_DREQ		BIT(15) /* enable DREQ for destination */
 | |
| +#define BCM2711_DMA40_S_WAIT(x)		((x & 0xff) << 16) /* add DMA read-wait cycles */
 | |
| +#define BCM2711_DMA40_D_WAIT(x)		((x & 0xff) << 24) /* add DMA write-wait cycles */
 | |
| +
 | |
| +/* debug register bits */
 | |
| +#define BCM2711_DMA40_DEBUG_WRITE_ERR		BIT(0)
 | |
| +#define BCM2711_DMA40_DEBUG_FIFO_ERR		BIT(1)
 | |
| +#define BCM2711_DMA40_DEBUG_READ_ERR		BIT(2)
 | |
| +#define BCM2711_DMA40_DEBUG_READ_CB_ERR		BIT(3)
 | |
| +#define BCM2711_DMA40_DEBUG_IN_ON_ERR		BIT(8)
 | |
| +#define BCM2711_DMA40_DEBUG_ABORT_ON_ERR	BIT(9)
 | |
| +#define BCM2711_DMA40_DEBUG_HALT_ON_ERR		BIT(10)
 | |
| +#define BCM2711_DMA40_DEBUG_DISABLE_CLK_GATE	BIT(11)
 | |
| +#define BCM2711_DMA40_DEBUG_RSTATE_SHIFT	14
 | |
| +#define BCM2711_DMA40_DEBUG_RSTATE_BITS		4
 | |
| +#define BCM2711_DMA40_DEBUG_WSTATE_SHIFT	18
 | |
| +#define BCM2711_DMA40_DEBUG_WSTATE_BITS		4
 | |
| +#define BCM2711_DMA40_DEBUG_RESET		BIT(23)
 | |
| +#define BCM2711_DMA40_DEBUG_ID_SHIFT		24
 | |
| +#define BCM2711_DMA40_DEBUG_ID_BITS		4
 | |
| +#define BCM2711_DMA40_DEBUG_VERSION_SHIFT	28
 | |
| +#define BCM2711_DMA40_DEBUG_VERSION_BITS	4
 | |
| +
 | |
| +/* Valid only for channels 0 - 3 (11 - 14) */
 | |
| +#define BCM2711_DMA40_CHAN(n)	(((n) + 11) << 8) /* Base address */
 | |
| +#define BCM2711_DMA40_CHANIO(base, n) ((base) + BCM2711_DMA_CHAN(n))
 | |
| +
 | |
| +/* the max dma length for different channels */
 | |
| +#define MAX_DMA40_LEN SZ_1G
 | |
| +
 | |
| +#define BCM2711_DMA40_BURST_LEN(x)	((min(x,16) - 1) << 8)
 | |
| +#define BCM2711_DMA40_INC		BIT(12)
 | |
| +#define BCM2711_DMA40_SIZE_32		(0 << 13)
 | |
| +#define BCM2711_DMA40_SIZE_64		(1 << 13)
 | |
| +#define BCM2711_DMA40_SIZE_128		(2 << 13)
 | |
| +#define BCM2711_DMA40_SIZE_256		(3 << 13)
 | |
| +#define BCM2711_DMA40_IGNORE		BIT(15)
 | |
| +#define BCM2711_DMA40_STRIDE(x)		((x) << 16) /* For 2D mode */
 | |
| +
 | |
| +#define BCM2711_DMA40_MEMCPY_FLAGS \
 | |
| +	(BCM2711_DMA40_QOS(0) | \
 | |
| +	 BCM2711_DMA40_PANIC_QOS(0) | \
 | |
| +	 BCM2711_DMA40_WAIT_FOR_WRITES | \
 | |
| +	 BCM2711_DMA40_DISDEBUG)
 | |
| +
 | |
| +#define BCM2711_DMA40_MEMCPY_XFER_INFO \
 | |
| +	(BCM2711_DMA40_SIZE_128 | \
 | |
| +	 BCM2711_DMA40_INC | \
 | |
| +	 BCM2711_DMA40_BURST_LEN(16))
 | |
| +
 | |
| +struct bcm2835_dmadev *memcpy_parent;
 | |
| +static void __iomem *memcpy_chan;
 | |
| +static struct bcm2711_dma40_scb *memcpy_scb;
 | |
| +static dma_addr_t memcpy_scb_dma;
 | |
| +DEFINE_SPINLOCK(memcpy_lock);
 | |
| +
 | |
| +static const struct bcm2835_dma_cfg_data bcm2835_dma_cfg = {
 | |
| +	.chan_40bit_mask = 0,
 | |
| +};
 | |
| +
 | |
| +static const struct bcm2835_dma_cfg_data bcm2711_dma_cfg = {
 | |
| +	.chan_40bit_mask = BIT(11) | BIT(12) | BIT(13) | BIT(14),
 | |
| +};
 | |
| +
 | |
|  static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c)
 | |
|  {
 | |
|  	/* lite and normal channels have different max frame length */
 | |
| @@ -207,6 +330,32 @@ static inline struct bcm2835_desc *to_bcm2835_dma_desc(
 | |
|  	return container_of(t, struct bcm2835_desc, vd.tx);
 | |
|  }
 | |
|  
 | |
| +static inline uint32_t to_bcm2711_ti(uint32_t info)
 | |
| +{
 | |
| +	return ((info & BCM2835_DMA_INT_EN) ? BCM2711_DMA40_INTEN : 0) |
 | |
| +		((info & BCM2835_DMA_WAIT_RESP) ? BCM2711_DMA40_WAIT_RESP : 0) |
 | |
| +		((info & BCM2835_DMA_S_DREQ) ?
 | |
| +		 (BCM2711_DMA40_S_DREQ | BCM2711_DMA40_WAIT_RD_RESP) : 0) |
 | |
| +		((info & BCM2835_DMA_D_DREQ) ? BCM2711_DMA40_D_DREQ : 0) |
 | |
| +		BCM2711_DMA40_PER_MAP((info >> 16) & 0x1f);
 | |
| +}
 | |
| +
 | |
| +static inline uint32_t to_bcm2711_srci(uint32_t info)
 | |
| +{
 | |
| +	return ((info & BCM2835_DMA_S_INC) ? BCM2711_DMA40_INC : 0);
 | |
| +}
 | |
| +
 | |
| +static inline uint32_t to_bcm2711_dsti(uint32_t info)
 | |
| +{
 | |
| +	return ((info & BCM2835_DMA_D_INC) ? BCM2711_DMA40_INC : 0);
 | |
| +}
 | |
| +
 | |
| +static inline uint32_t to_bcm2711_cbaddr(dma_addr_t addr)
 | |
| +{
 | |
| +	BUG_ON(addr & 0x1f);
 | |
| +	return (addr >> 5);
 | |
| +}
 | |
| +
 | |
|  static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc)
 | |
|  {
 | |
|  	size_t i;
 | |
| @@ -225,45 +374,53 @@ static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
 | |
|  }
 | |
|  
 | |
|  static void bcm2835_dma_create_cb_set_length(
 | |
| -	struct bcm2835_chan *chan,
 | |
| +	struct bcm2835_chan *c,
 | |
|  	struct bcm2835_dma_cb *control_block,
 | |
|  	size_t len,
 | |
|  	size_t period_len,
 | |
|  	size_t *total_len,
 | |
|  	u32 finalextrainfo)
 | |
|  {
 | |
| -	size_t max_len = bcm2835_dma_max_frame_length(chan);
 | |
| +	size_t max_len = bcm2835_dma_max_frame_length(c);
 | |
| +	uint32_t cb_len;
 | |
|  
 | |
|  	/* set the length taking lite-channel limitations into account */
 | |
| -	control_block->length = min_t(u32, len, max_len);
 | |
| +	cb_len = min_t(u32, len, max_len);
 | |
|  
 | |
| -	/* finished if we have no period_length */
 | |
| -	if (!period_len)
 | |
| -		return;
 | |
| +	if (period_len) {
 | |
| +		/*
 | |
| +		 * period_len means: that we need to generate
 | |
| +		 * transfers that are terminating at every
 | |
| +		 * multiple of period_len - this is typically
 | |
| +		 * used to set the interrupt flag in info
 | |
| +		 * which is required during cyclic transfers
 | |
| +		 */
 | |
|  
 | |
| -	/*
 | |
| -	 * period_len means: that we need to generate
 | |
| -	 * transfers that are terminating at every
 | |
| -	 * multiple of period_len - this is typically
 | |
| -	 * used to set the interrupt flag in info
 | |
| -	 * which is required during cyclic transfers
 | |
| -	 */
 | |
| +		/* have we filled in period_length yet? */
 | |
| +		if (*total_len + cb_len < period_len) {
 | |
| +			/* update number of bytes in this period so far */
 | |
| +			*total_len += cb_len;
 | |
| +		} else {
 | |
| +			/* calculate the length that remains to reach period_len */
 | |
| +			cb_len = period_len - *total_len;
 | |
|  
 | |
| -	/* have we filled in period_length yet? */
 | |
| -	if (*total_len + control_block->length < period_len) {
 | |
| -		/* update number of bytes in this period so far */
 | |
| -		*total_len += control_block->length;
 | |
| -		return;
 | |
| +			/* reset total_length for next period */
 | |
| +			*total_len = 0;
 | |
| +		}
 | |
|  	}
 | |
|  
 | |
| -	/* calculate the length that remains to reach period_length */
 | |
| -	control_block->length = period_len - *total_len;
 | |
| -
 | |
| -	/* reset total_length for next period */
 | |
| -	*total_len = 0;
 | |
| +	if (c->is_40bit_channel) {
 | |
| +		struct bcm2711_dma40_scb *scb =
 | |
| +			(struct bcm2711_dma40_scb *)control_block;
 | |
|  
 | |
| -	/* add extrainfo bits in info */
 | |
| -	control_block->info |= finalextrainfo;
 | |
| +		scb->len = cb_len;
 | |
| +		/* add extrainfo bits to ti */
 | |
| +		scb->ti |= to_bcm2711_ti(finalextrainfo);
 | |
| +	} else {
 | |
| +		control_block->length = cb_len;
 | |
| +		/* add extrainfo bits to info */
 | |
| +		control_block->info |= finalextrainfo;
 | |
| +	}
 | |
|  }
 | |
|  
 | |
|  static inline size_t bcm2835_dma_count_frames_for_sg(
 | |
| @@ -286,7 +443,7 @@ static inline size_t bcm2835_dma_count_frames_for_sg(
 | |
|  /**
 | |
|   * bcm2835_dma_create_cb_chain - create a control block and fills data in
 | |
|   *
 | |
| - * @chan:           the @dma_chan for which we run this
 | |
| + * @c:              the @bcm2835_chan for which we run this
 | |
|   * @direction:      the direction in which we transfer
 | |
|   * @cyclic:         it is a cyclic transfer
 | |
|   * @info:           the default info bits to apply per controlblock
 | |
| @@ -304,12 +461,11 @@ static inline size_t bcm2835_dma_count_frames_for_sg(
 | |
|   * @gfp:            the GFP flag to use for allocation
 | |
|   */
 | |
|  static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
 | |
| -	struct dma_chan *chan, enum dma_transfer_direction direction,
 | |
| +	struct bcm2835_chan *c, enum dma_transfer_direction direction,
 | |
|  	bool cyclic, u32 info, u32 finalextrainfo, size_t frames,
 | |
|  	dma_addr_t src, dma_addr_t dst, size_t buf_len,
 | |
|  	size_t period_len, gfp_t gfp)
 | |
|  {
 | |
| -	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
 | |
|  	size_t len = buf_len, total_len;
 | |
|  	size_t frame;
 | |
|  	struct bcm2835_desc *d;
 | |
| @@ -341,11 +497,23 @@ static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
 | |
|  
 | |
|  		/* fill in the control block */
 | |
|  		control_block = cb_entry->cb;
 | |
| -		control_block->info = info;
 | |
| -		control_block->src = src;
 | |
| -		control_block->dst = dst;
 | |
| -		control_block->stride = 0;
 | |
| -		control_block->next = 0;
 | |
| +		if (c->is_40bit_channel) {
 | |
| +			struct bcm2711_dma40_scb *scb =
 | |
| +				(struct bcm2711_dma40_scb *)control_block;
 | |
| +			scb->ti = to_bcm2711_ti(info);
 | |
| +			scb->src = lower_32_bits(src);
 | |
| +			scb->srci= upper_32_bits(src) | to_bcm2711_srci(info);
 | |
| +			scb->dst = lower_32_bits(dst);
 | |
| +			scb->dsti = upper_32_bits(dst) | to_bcm2711_dsti(info);
 | |
| +			scb->next_cb = 0;
 | |
| +		} else {
 | |
| +			control_block->info = info;
 | |
| +			control_block->src = src;
 | |
| +			control_block->dst = dst;
 | |
| +			control_block->stride = 0;
 | |
| +			control_block->next = 0;
 | |
| +		}
 | |
| +
 | |
|  		/* set up length in control_block if requested */
 | |
|  		if (buf_len) {
 | |
|  			/* calculate length honoring period_length */
 | |
| @@ -359,7 +527,11 @@ static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
 | |
|  		}
 | |
|  
 | |
|  		/* link this the last controlblock */
 | |
| -		if (frame)
 | |
| +		if (frame && c->is_40bit_channel)
 | |
| +			((struct bcm2711_dma40_scb *)
 | |
| +			 d->cb_list[frame - 1].cb)->next_cb =
 | |
| +				to_bcm2711_cbaddr(cb_entry->paddr);
 | |
| +		if (frame && !c->is_40bit_channel)
 | |
|  			d->cb_list[frame - 1].cb->next = cb_entry->paddr;
 | |
|  
 | |
|  		/* update src and dst and length */
 | |
| @@ -369,11 +541,21 @@ static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
 | |
|  			dst += control_block->length;
 | |
|  
 | |
|  		/* Length of total transfer */
 | |
| -		d->size += control_block->length;
 | |
| +		if (c->is_40bit_channel)
 | |
| +			d->size += ((struct bcm2711_dma40_scb *)control_block)->len;
 | |
| +		else
 | |
| +			d->size += control_block->length;
 | |
|  	}
 | |
|  
 | |
|  	/* the last frame requires extra flags */
 | |
| -	d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
 | |
| +	if (c->is_40bit_channel) {
 | |
| +		struct bcm2711_dma40_scb *scb =
 | |
| +			(struct bcm2711_dma40_scb *)d->cb_list[d->frames-1].cb;
 | |
| +
 | |
| +		scb->ti |= to_bcm2711_ti(finalextrainfo);
 | |
| +	} else {
 | |
| +		d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
 | |
| +	}
 | |
|  
 | |
|  	/* detect a size missmatch */
 | |
|  	if (buf_len && (d->size != buf_len))
 | |
| @@ -387,13 +569,12 @@ static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
 | |
|  }
 | |
|  
 | |
|  static void bcm2835_dma_fill_cb_chain_with_sg(
 | |
| -	struct dma_chan *chan,
 | |
| +	struct bcm2835_chan *c,
 | |
|  	enum dma_transfer_direction direction,
 | |
|  	struct bcm2835_cb_entry *cb,
 | |
|  	struct scatterlist *sgl,
 | |
|  	unsigned int sg_len)
 | |
|  {
 | |
| -	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
 | |
|  	size_t len, max_len;
 | |
|  	unsigned int i;
 | |
|  	dma_addr_t addr;
 | |
| @@ -401,14 +582,35 @@ static void bcm2835_dma_fill_cb_chain_with_sg(
 | |
|  
 | |
|  	max_len = bcm2835_dma_max_frame_length(c);
 | |
|  	for_each_sg(sgl, sgent, sg_len, i) {
 | |
| -		for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent);
 | |
| -		     len > 0;
 | |
| -		     addr += cb->cb->length, len -= cb->cb->length, cb++) {
 | |
| -			if (direction == DMA_DEV_TO_MEM)
 | |
| -				cb->cb->dst = addr;
 | |
| -			else
 | |
| -				cb->cb->src = addr;
 | |
| -			cb->cb->length = min(len, max_len);
 | |
| +		if (c->is_40bit_channel) {
 | |
| +			struct bcm2711_dma40_scb *scb;
 | |
| +
 | |
| +			for (addr = sg_dma_address(sgent),
 | |
| +				     len = sg_dma_len(sgent);
 | |
| +				     len > 0;
 | |
| +			     addr += scb->len, len -= scb->len, cb++) {
 | |
| +				scb = (struct bcm2711_dma40_scb *)cb->cb;
 | |
| +				if (direction == DMA_DEV_TO_MEM) {
 | |
| +					scb->dst = lower_32_bits(addr);
 | |
| +					scb->dsti = upper_32_bits(addr) | BCM2711_DMA40_INC;
 | |
| +				} else {
 | |
| +					scb->src = lower_32_bits(addr);
 | |
| +					scb->srci = upper_32_bits(addr) | BCM2711_DMA40_INC;
 | |
| +				}
 | |
| +				scb->len = min(len, max_len);
 | |
| +			}
 | |
| +		} else {
 | |
| +			for (addr = sg_dma_address(sgent),
 | |
| +				     len = sg_dma_len(sgent);
 | |
| +			     len > 0;
 | |
| +			     addr += cb->cb->length, len -= cb->cb->length,
 | |
| +			     cb++) {
 | |
| +				if (direction == DMA_DEV_TO_MEM)
 | |
| +					cb->cb->dst = addr;
 | |
| +				else
 | |
| +					cb->cb->src = addr;
 | |
| +				cb->cb->length = min(len, max_len);
 | |
| +			}
 | |
|  		}
 | |
|  	}
 | |
|  }
 | |
| @@ -417,6 +619,10 @@ static void bcm2835_dma_abort(struct bcm2835_chan *c)
 | |
|  {
 | |
|  	void __iomem *chan_base = c->chan_base;
 | |
|  	long int timeout = 10000;
 | |
| +	u32 wait_mask = BCM2835_DMA_WAITING_FOR_WRITES;
 | |
| +
 | |
| +	if (c->is_40bit_channel)
 | |
| +		wait_mask = BCM2711_DMA40_WAITING_FOR_WRITES;
 | |
|  
 | |
|  	/*
 | |
|  	 * A zero control block address means the channel is idle.
 | |
| @@ -429,8 +635,7 @@ static void bcm2835_dma_abort(struct bcm2835_chan *c)
 | |
|  	writel(0, chan_base + BCM2835_DMA_CS);
 | |
|  
 | |
|  	/* Wait for any current AXI transfer to complete */
 | |
| -	while ((readl(chan_base + BCM2835_DMA_CS) &
 | |
| -		BCM2835_DMA_WAITING_FOR_WRITES) && --timeout)
 | |
| +	while ((readl(chan_base + BCM2835_DMA_CS) & wait_mask) && --timeout)
 | |
|  		cpu_relax();
 | |
|  
 | |
|  	/* Peripheral might be stuck and fail to signal AXI write responses */
 | |
| @@ -455,9 +660,16 @@ static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
 | |
|  
 | |
|  	c->desc = d = to_bcm2835_dma_desc(&vd->tx);
 | |
|  
 | |
| -	writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
 | |
| -	writel(BCM2835_DMA_ACTIVE | BCM2835_DMA_CS_FLAGS(c->dreq),
 | |
| -	       c->chan_base + BCM2835_DMA_CS);
 | |
| +	if (c->is_40bit_channel) {
 | |
| +		writel(to_bcm2711_cbaddr(d->cb_list[0].paddr),
 | |
| +		       c->chan_base + BCM2711_DMA40_CB);
 | |
| +		writel(BCM2711_DMA40_ACTIVE | BCM2711_DMA40_CS_FLAGS(c->dreq),
 | |
| +		       c->chan_base + BCM2711_DMA40_CS);
 | |
| +	} else {
 | |
| +		writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
 | |
| +		writel(BCM2835_DMA_ACTIVE | BCM2835_DMA_CS_FLAGS(c->dreq),
 | |
| +		       c->chan_base + BCM2835_DMA_CS);
 | |
| +	}
 | |
|  }
 | |
|  
 | |
|  static irqreturn_t bcm2835_dma_callback(int irq, void *data)
 | |
| @@ -484,8 +696,7 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data)
 | |
|  	 * if this IRQ handler is threaded.) If the channel is finished, it
 | |
|  	 * will remain idle despite the ACTIVE flag being set.
 | |
|  	 */
 | |
| -	writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE |
 | |
| -	       BCM2835_DMA_CS_FLAGS(c->dreq),
 | |
| +	writel(BCM2835_DMA_INT | BCM2835_DMA_ACTIVE,
 | |
|  	       c->chan_base + BCM2835_DMA_CS);
 | |
|  
 | |
|  	d = c->desc;
 | |
| @@ -588,9 +799,17 @@ static enum dma_status bcm2835_dma_tx_status(struct dma_chan *chan,
 | |
|  		struct bcm2835_desc *d = c->desc;
 | |
|  		dma_addr_t pos;
 | |
|  
 | |
| -		if (d->dir == DMA_MEM_TO_DEV)
 | |
| +		if (d->dir == DMA_MEM_TO_DEV && c->is_40bit_channel)
 | |
| +			pos = readl(c->chan_base + BCM2711_DMA40_SRC) +
 | |
| +				((readl(c->chan_base + BCM2711_DMA40_SRCI) &
 | |
| +				  0xff) << 8);
 | |
| +		else if (d->dir == DMA_MEM_TO_DEV && !c->is_40bit_channel)
 | |
|  			pos = readl(c->chan_base + BCM2835_DMA_SOURCE_AD);
 | |
| -		else if (d->dir == DMA_DEV_TO_MEM)
 | |
| +		else if (d->dir == DMA_DEV_TO_MEM && c->is_40bit_channel)
 | |
| +			pos = readl(c->chan_base + BCM2711_DMA40_DEST) +
 | |
| +				((readl(c->chan_base + BCM2711_DMA40_DESTI) &
 | |
| +				  0xff) << 8);
 | |
| +		else if (d->dir == DMA_DEV_TO_MEM && !c->is_40bit_channel)
 | |
|  			pos = readl(c->chan_base + BCM2835_DMA_DEST_AD);
 | |
|  		else
 | |
|  			pos = 0;
 | |
| @@ -636,7 +855,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy(
 | |
|  	frames = bcm2835_dma_frames_for_length(len, max_len);
 | |
|  
 | |
|  	/* allocate the CB chain - this also fills in the pointers */
 | |
| -	d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false,
 | |
| +	d = bcm2835_dma_create_cb_chain(c, DMA_MEM_TO_MEM, false,
 | |
|  					info, extra, frames,
 | |
|  					src, dst, len, 0, GFP_KERNEL);
 | |
|  	if (!d)
 | |
| @@ -671,11 +890,21 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
 | |
|  		if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
 | |
|  			return NULL;
 | |
|  		src = c->cfg.src_addr;
 | |
| +		/*
 | |
| +		 * One would think it ought to be possible to get the physical
 | |
| +		 * to dma address mapping information from the dma-ranges DT
 | |
| +		 * property, but I've not found a way yet that doesn't involve
 | |
| +		 * open-coding the whole thing.
 | |
| +		 */
 | |
| +		if (c->is_40bit_channel)
 | |
| +		    src |= 0x400000000ull;
 | |
|  		info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
 | |
|  	} else {
 | |
|  		if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
 | |
|  			return NULL;
 | |
|  		dst = c->cfg.dst_addr;
 | |
| +		if (c->is_40bit_channel)
 | |
| +		    dst |= 0x400000000ull;
 | |
|  		info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
 | |
|  	}
 | |
|  
 | |
| @@ -683,7 +912,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
 | |
|  	frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len);
 | |
|  
 | |
|  	/* allocate the CB chain */
 | |
| -	d = bcm2835_dma_create_cb_chain(chan, direction, false,
 | |
| +	d = bcm2835_dma_create_cb_chain(c, direction, false,
 | |
|  					info, extra,
 | |
|  					frames, src, dst, 0, 0,
 | |
|  					GFP_NOWAIT);
 | |
| @@ -691,7 +920,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
 | |
|  		return NULL;
 | |
|  
 | |
|  	/* fill in frames with scatterlist pointers */
 | |
| -	bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list,
 | |
| +	bcm2835_dma_fill_cb_chain_with_sg(c, direction, d->cb_list,
 | |
|  					  sgl, sg_len);
 | |
|  
 | |
|  	return vchan_tx_prep(&c->vc, &d->vd, flags);
 | |
| @@ -745,12 +974,16 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 | |
|  		if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
 | |
|  			return NULL;
 | |
|  		src = c->cfg.src_addr;
 | |
| +		if (c->is_40bit_channel)
 | |
| +		    src |= 0x400000000ull;
 | |
|  		dst = buf_addr;
 | |
|  		info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
 | |
|  	} else {
 | |
|  		if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
 | |
|  			return NULL;
 | |
|  		dst = c->cfg.dst_addr;
 | |
| +		if (c->is_40bit_channel)
 | |
| +		    dst |= 0x400000000ull;
 | |
|  		src = buf_addr;
 | |
|  		info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
 | |
|  
 | |
| @@ -770,7 +1003,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 | |
|  	 * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine
 | |
|  	 * implementation calls prep_dma_cyclic with interrupts disabled.
 | |
|  	 */
 | |
| -	d = bcm2835_dma_create_cb_chain(chan, direction, true,
 | |
| +	d = bcm2835_dma_create_cb_chain(c, direction, true,
 | |
|  					info, extra,
 | |
|  					frames, src, dst, buf_len,
 | |
|  					period_len, GFP_NOWAIT);
 | |
| @@ -778,7 +1011,12 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 | |
|  		return NULL;
 | |
|  
 | |
|  	/* wrap around into a loop */
 | |
| -	d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr;
 | |
| +	if (c->is_40bit_channel)
 | |
| +		((struct bcm2711_dma40_scb *)
 | |
| +		 d->cb_list[frames - 1].cb)->next_cb =
 | |
| +			to_bcm2711_cbaddr(d->cb_list[0].paddr);
 | |
| +	else
 | |
| +		d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr;
 | |
|  
 | |
|  	return vchan_tx_prep(&c->vc, &d->vd, flags);
 | |
|  }
 | |
| @@ -839,9 +1077,11 @@ static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id,
 | |
|  	c->irq_number = irq;
 | |
|  	c->irq_flags = irq_flags;
 | |
|  
 | |
| -	/* check in DEBUG register if this is a LITE channel */
 | |
| -	if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
 | |
| -		BCM2835_DMA_DEBUG_LITE)
 | |
| +	/* check for 40bit and lite channels */
 | |
| +	if (d->cfg_data->chan_40bit_mask & BIT(chan_id))
 | |
| +		c->is_40bit_channel = true;
 | |
| +	else if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
 | |
| +		 BCM2835_DMA_DEBUG_LITE)
 | |
|  		c->is_lite_channel = true;
 | |
|  
 | |
|  	return 0;
 | |
| @@ -861,8 +1101,58 @@ static void bcm2835_dma_free(struct bcm2835_dmadev *od)
 | |
|  			     DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
 | |
|  }
 | |
|  
 | |
| +int bcm2711_dma40_memcpy_init(void)
 | |
| +{
 | |
| +	if (!memcpy_parent)
 | |
| +		return -EPROBE_DEFER;
 | |
| +
 | |
| +	if (!memcpy_chan)
 | |
| +		return -EINVAL;
 | |
| +
 | |
| +	if (!memcpy_scb)
 | |
| +		return -ENOMEM;
 | |
| +
 | |
| +	return 0;
 | |
| +}
 | |
| +EXPORT_SYMBOL(bcm2711_dma40_memcpy_init);
 | |
| +
 | |
| +void bcm2711_dma40_memcpy(dma_addr_t dst, dma_addr_t src, size_t size)
 | |
| +{
 | |
| +	struct bcm2711_dma40_scb *scb = memcpy_scb;
 | |
| +	unsigned long flags;
 | |
| +
 | |
| +	if (!scb) {
 | |
| +		pr_err("bcm2711_dma40_memcpy not initialised!\n");
 | |
| +		return;
 | |
| +	}
 | |
| +
 | |
| +	spin_lock_irqsave(&memcpy_lock, flags);
 | |
| +
 | |
| +	scb->ti = 0;
 | |
| +	scb->src = lower_32_bits(src);
 | |
| +	scb->srci = upper_32_bits(src) | BCM2711_DMA40_MEMCPY_XFER_INFO;
 | |
| +	scb->dst = lower_32_bits(dst);
 | |
| +	scb->dsti = upper_32_bits(dst) | BCM2711_DMA40_MEMCPY_XFER_INFO;
 | |
| +	scb->len = size;
 | |
| +	scb->next_cb = 0;
 | |
| +
 | |
| +	writel((u32)(memcpy_scb_dma >> 5), memcpy_chan + BCM2711_DMA40_CB);
 | |
| +	writel(BCM2711_DMA40_MEMCPY_FLAGS + BCM2711_DMA40_ACTIVE,
 | |
| +	       memcpy_chan + BCM2711_DMA40_CS);
 | |
| +
 | |
| +	/* Poll for completion */
 | |
| +	while (!(readl(memcpy_chan + BCM2711_DMA40_CS) & BCM2711_DMA40_END))
 | |
| +		cpu_relax();
 | |
| +
 | |
| +	writel(BCM2711_DMA40_END, memcpy_chan + BCM2711_DMA40_CS);
 | |
| +
 | |
| +	spin_unlock_irqrestore(&memcpy_lock, flags);
 | |
| +}
 | |
| +EXPORT_SYMBOL(bcm2711_dma40_memcpy);
 | |
| +
 | |
|  static const struct of_device_id bcm2835_dma_of_match[] = {
 | |
| -	{ .compatible = "brcm,bcm2835-dma", },
 | |
| +	{ .compatible = "brcm,bcm2835-dma", .data = &bcm2835_dma_cfg },
 | |
| +	{ .compatible = "brcm,bcm2711-dma", .data = &bcm2711_dma_cfg },
 | |
|  	{},
 | |
|  };
 | |
|  MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match);
 | |
| @@ -894,6 +1184,8 @@ static int bcm2835_dma_probe(struct platform_device *pdev)
 | |
|  	int irq_flags;
 | |
|  	uint32_t chans_available;
 | |
|  	char chan_name[BCM2835_DMA_CHAN_NAME_SIZE];
 | |
| +	const struct of_device_id *of_id;
 | |
| +	int chan_count, chan_start, chan_end;
 | |
|  
 | |
|  	if (!pdev->dev.dma_mask)
 | |
|  		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
 | |
| @@ -914,9 +1206,13 @@ static int bcm2835_dma_probe(struct platform_device *pdev)
 | |
|  	base = devm_ioremap_resource(&pdev->dev, res);
 | |
|  	if (IS_ERR(base))
 | |
|  		return PTR_ERR(base);
 | |
| -	rc = bcm_dmaman_probe(pdev, base, BCM2835_DMA_BULK_MASK);
 | |
| -	if (rc)
 | |
| -		dev_err(&pdev->dev, "Failed to initialize the legacy API\n");
 | |
| +
 | |
| +	/* The set of channels can be split across multiple instances. */
 | |
| +	chan_start = ((u32)(uintptr_t)base / BCM2835_DMA_CHAN_SIZE) & 0xf;
 | |
| +	base -= BCM2835_DMA_CHAN(chan_start);
 | |
| +	chan_count = resource_size(res) / BCM2835_DMA_CHAN_SIZE;
 | |
| +	chan_end = min(chan_start + chan_count,
 | |
| +			 BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1);
 | |
|  
 | |
|  	od->base = base;
 | |
|  
 | |
| @@ -953,6 +1249,14 @@ static int bcm2835_dma_probe(struct platform_device *pdev)
 | |
|  		return -ENOMEM;
 | |
|  	}
 | |
|  
 | |
| +	of_id = of_match_node(bcm2835_dma_of_match, pdev->dev.of_node);
 | |
| +	if (!of_id) {
 | |
| +		dev_err(&pdev->dev, "Failed to match compatible string\n");
 | |
| +		return -EINVAL;
 | |
| +	}
 | |
| +
 | |
| +	od->cfg_data = of_id->data;
 | |
| +
 | |
|  	/* Request DMA channel mask from device tree */
 | |
|  	if (of_property_read_u32(pdev->dev.of_node,
 | |
|  			"brcm,dma-channel-mask",
 | |
| @@ -962,11 +1266,34 @@ static int bcm2835_dma_probe(struct platform_device *pdev)
 | |
|  		goto err_no_dma;
 | |
|  	}
 | |
|  
 | |
| -	/* Channel 0 is used by the legacy API */
 | |
| -	chans_available &= ~BCM2835_DMA_BULK_MASK;
 | |
| +	/* One channel is reserved for the legacy API */
 | |
| +	if (chans_available & BCM2835_DMA_BULK_MASK) {
 | |
| +		rc = bcm_dmaman_probe(pdev, base,
 | |
| +				      chans_available & BCM2835_DMA_BULK_MASK);
 | |
| +		if (rc)
 | |
| +			dev_err(&pdev->dev,
 | |
| +				"Failed to initialize the legacy API\n");
 | |
| +
 | |
| +		chans_available &= ~BCM2835_DMA_BULK_MASK;
 | |
| +	}
 | |
| +
 | |
| +	/* And possibly one for the 40-bit DMA memcpy API */
 | |
| +	if (chans_available & od->cfg_data->chan_40bit_mask &
 | |
| +	    BIT(BCM2711_DMA_MEMCPY_CHAN)) {
 | |
| +		memcpy_parent = od;
 | |
| +		memcpy_chan = BCM2835_DMA_CHANIO(base, BCM2711_DMA_MEMCPY_CHAN);
 | |
| +		memcpy_scb = dma_alloc_coherent(memcpy_parent->ddev.dev,
 | |
| +						sizeof(*memcpy_scb),
 | |
| +						&memcpy_scb_dma, GFP_KERNEL);
 | |
| +		if (!memcpy_scb)
 | |
| +			dev_warn(&pdev->dev,
 | |
| +				 "Failed to allocated memcpy scb\n");
 | |
| +
 | |
| +		chans_available &= ~BIT(BCM2711_DMA_MEMCPY_CHAN);
 | |
| +	}
 | |
|  
 | |
|  	/* get irqs for each channel that we support */
 | |
| -	for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
 | |
| +	for (i = chan_start; i < chan_end; i++) {
 | |
|  		/* skip masked out channels */
 | |
|  		if (!(chans_available & (1 << i))) {
 | |
|  			irq[i] = -1;
 | |
| @@ -989,13 +1316,17 @@ static int bcm2835_dma_probe(struct platform_device *pdev)
 | |
|  		irq[i] = platform_get_irq(pdev, i < 11 ? i : 11);
 | |
|  	}
 | |
|  
 | |
| +	chan_count = 0;
 | |
| +
 | |
|  	/* get irqs for each channel */
 | |
| -	for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
 | |
| +	for (i = chan_start; i < chan_end; i++) {
 | |
|  		/* skip channels without irq */
 | |
|  		if (irq[i] < 0)
 | |
|  			continue;
 | |
|  
 | |
|  		/* check if there are other channels that also use this irq */
 | |
| +		/* FIXME: This will fail if interrupts are shared across
 | |
| +		   instances */
 | |
|  		irq_flags = 0;
 | |
|  		for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++)
 | |
|  			if ((i != j) && (irq[j] == irq[i])) {
 | |
| @@ -1007,9 +1338,10 @@ static int bcm2835_dma_probe(struct platform_device *pdev)
 | |
|  		rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags);
 | |
|  		if (rc)
 | |
|  			goto err_no_dma;
 | |
| +		chan_count++;
 | |
|  	}
 | |
|  
 | |
| -	dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i);
 | |
| +	dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", chan_count);
 | |
|  
 | |
|  	/* Device-tree DMA controller registration */
 | |
|  	rc = of_dma_controller_register(pdev->dev.of_node,
 | |
| @@ -1041,6 +1373,13 @@ static int bcm2835_dma_remove(struct platform_device *pdev)
 | |
|  
 | |
|  	bcm_dmaman_remove(pdev);
 | |
|  	dma_async_device_unregister(&od->ddev);
 | |
| +	if (memcpy_parent == od) {
 | |
| +		dma_free_coherent(&pdev->dev, sizeof(*memcpy_scb), memcpy_scb,
 | |
| +				  memcpy_scb_dma);
 | |
| +		memcpy_parent = NULL;
 | |
| +		memcpy_scb = NULL;
 | |
| +		memcpy_chan = NULL;
 | |
| +	}
 | |
|  	bcm2835_dma_free(od);
 | |
|  
 | |
|  	return 0;
 | |
| -- 
 | |
| 2.33.1
 | |
| 
 |