四足机器人高算力、低成本主控第七步:树莓派STM32 SLAVE 从机SPI通讯优化基于DMA高速可靠交互

在构建单片机与树莓派的通讯中,我之前已经基于中断的方式实现了较可靠的通讯,由于SPI发送中采用轮询的方式,即树莓派作为主机一直给底层发送数据,其数据长度按帧格式定义不同每次都不一样,单片机端采用SPI接收中断的方式:

if(SPI_I2S_GetFlagStatus(SPI2, SPI_I2S_FLAG_RXNE) != RESET)	
	{
		if(spi_flag_pi[1]==1){//data can change
			spi_flag_pi[1]=0;
			spi_dt[0] = Get_Cycle_T(16); 
		
			timer_sys+=spi_dt[0];
			if(timer_sys>0.1){timer_sys=0;
				slave_send(3);//系统状态
			}else{
				if(send_flag==1){
				send_flag=0;
				slave_send(1);//姿态
				}//发送数据重新赋值
				else{
				send_flag=1;
				slave_send(2);
				}		
			}
			 spi_tx_cnt_send=0;
		}					
		
		data = SPI_I2S_ReceiveData(SPI2);//中断读取SPI数据
	
		if(state==0&&data==0xFB)
		{
			state=1;
			spi_rx_buf[0]=data;
		}
		else if(state==1&&data==0xFC)
		{
			state=2;
			spi_rx_buf[1]=data;
		}
		else if(state==2&&data>0&&data<0XF1)
		{
			state=3;
			spi_rx_buf[2]=data;
		}
		else if(state==3&&data<SPI_BUF_SIZE)
		{
			state = 4;
			spi_rx_buf[3]=data;
			_data_len2 = data;
			_data_cnt2 = 0;
		}
		else if(state==4&&_data_len2>0)
		{
			_data_len2--;
			spi_rx_buf[4+_data_cnt2++]=data;
			if(_data_len2==0)
				state= 5;
		}
		else if(state==5)
		{
			state = 0;
			spi_rx_buf[4+_data_cnt2]=data;
			spi_rx_cnt=4;
			slave_rx(spi_rx_buf,_data_cnt2+5);
			spi_rx_cnt_all++;
		}
		else
			state = 0;
		
		
		//同步发送
		
		if(spi_tx_cnt_send>=spi_tx_cnt&&spi_flag_pi[1]==0)//发送完毕可以重新赋值
		{ spi_flag_pi[1]=1;
		}else
			SPI2_ReadWriteByte_s(spi_tx_buf[spi_tx_cnt_send++]); 
	}

其原理即当进入中断后单片机马上回传一个SPI数据,按顺序发送Buf中的数据,当发送完毕后才刷新该buf,对于接收部分比较简单就按帧协议构建一个状态机对数据进行缓存和解码,采用该方法能保证了2ms左右的通讯,但是当我把直接运行在单片机中的代码移植到树莓派中时发现根本没法运行:

单片机版本

树莓派版本,参数和程序均一致

一开始我认为可能是树莓派实时补丁的问题,虽然打上了补丁但是没法验证其实时性到底好不好,对于这一块我也没法继续优化,后面考虑到在独立测试SPI通讯时数据没啥问题,而在实际机器人运行时除了SPI外还需要接收CAN,而SPI高速收发STM32一直在进入中断导致CAN发送的故障和不连续,另外采用轮询发送的方式STM32回传底层数据速率受主机制约,中间存在错位时会导致数据反馈不连续和中断,因此想通过DMA收发的方式来实现。

DMA在串口中我已经测试过,本来需要堵塞等待发送的程序采用DMA后可以非常方便的传输大量数据而不影响主程序,因此需要采用DMA的方式来控制SPI收发,但是在网络上搜索很长时间后均没有可以却人运行的程序,国内很多教程基本都是采用Hal库或者简单配置接收,没有配置如何双工通讯,在经过不断寻找后在Github上检索到项目STM32F401_DSP_StdPeriph_Lib_V1.6.0_SPI_Slave,在经过很长时间移植后终于测试通过实行双工DMA数据传输。

对SPI部分的修改首先将之前的轮询方案改成树莓派和单片机交换同样大小的缓冲区域,这样能保证每一帧的帧头对齐,也利于界面和校验,通过DMA我们能直接在树莓派中发送固定长度的数据后直接进入对应接收中断,这样类似UDP的处理机制直接在中断中可以将接收到的数据进行界面和拆包,最后在DMA接收完毕后使能发送,DMA就自动将当前发送缓冲中的数据回传到树莓派中,最终在发送中断中判断发送完毕既可以刷新发送寄存器保证数据的一致,其项目中几个关键部分如下:

STM32F4 SPI2 DMA1配置

void Custom_SPI_DEVICE_Slave_Config(void)
{
//	GPIO_InitTypeDef 	GPIO_InitStructure;
	SPI_InitTypeDef  	SPI_InitStructure;
//	EXTI_InitTypeDef   	EXTI_InitStructure;
//	NVIC_InitTypeDef   NVIC_InitStructure;

	/* Enable the SPI periph */
	SPI_DEVICE_CLK_INIT(SPI_DEVICE_CLK, ENABLE);

	/* SPI configuration -------------------------------------------------------*/
	SPI_I2S_DeInit(SPI_DEVICE);
	SPI_InitStructure.SPI_Direction = SPI_Direction_2Lines_FullDuplex;
	SPI_InitStructure.SPI_Mode = SPI_Mode_Slave;	
	SPI_InitStructure.SPI_DataSize = SPI_DataSize_8b;
	SPI_InitStructure.SPI_CPOL = SPI_CPOL_Low;
	SPI_InitStructure.SPI_CPHA = SPI_CPHA_1Edge;
	SPI_InitStructure.SPI_NSS = SPI_NSS_Soft;
	SPI_InitStructure.SPI_BaudRatePrescaler = SLAVE_SPI_BAUDRATE;
	SPI_InitStructure.SPI_FirstBit = SPI_FirstBit_MSB;
	SPI_InitStructure.SPI_CRCPolynomial = 7;
	SPI_Init(SPI_DEVICE, &SPI_InitStructure);

	/* Enable SPI DMA RX Requsts */
	SPI_I2S_DMACmd(SPI_DEVICE, SPI_I2S_DMAReq_Rx, ENABLE);
	/* Enable SPI DMA TX Requsts */
	SPI_I2S_DMACmd(SPI_DEVICE, SPI_I2S_DMAReq_Tx, ENABLE);	

}

DMA接收完毕外部中断NS配置:

void Custom_SPI_DEVICE_Slave_EXTI_Config(void)
{
	GPIO_InitTypeDef 	GPIO_InitStructure;
	SPI_InitTypeDef  	SPI_InitStructure;
	EXTI_InitTypeDef   	EXTI_InitStructure;
	NVIC_InitTypeDef   NVIC_InitStructure;

	/* Enable the SPI periph */
	SPI_DEVICE_CLK_INIT(SPI_DEVICE_CLK, ENABLE);

	/* Enable SCK, MOSI, MISO and NSS GPIO clocks */
	RCC_AHB1PeriphClockCmd(SPI_DEVICE_MOSI_GPIO_CLK|SPI_DEVICE_MISO_GPIO_CLK |SPI_DEVICE_SCK_GPIO_CLK , ENABLE);

	GPIO_PinAFConfig(SPI_DEVICE_SCK_GPIO_PORT, SPI_DEVICE_SCK_GPIO_SOURCE, SPI_DEVICE_SCK_GPIO_AF);			//CLK
	GPIO_PinAFConfig(SPI_DEVICE_MISO_GPIO_PORT, SPI_DEVICE_MISO_GPIO_SOURCE, SPI_DEVICE_MISO_GPIO_AF);   	//MISO
	GPIO_PinAFConfig(SPI_DEVICE_MOSI_GPIO_PORT, SPI_DEVICE_MOSI_GPIO_SOURCE, SPI_DEVICE_MOSI_GPIO_AF);		//MOSI

	GPIO_InitStructure.GPIO_Mode = GPIO_Mode_AF;
	GPIO_InitStructure.GPIO_Speed = GPIO_Speed_50MHz;
	GPIO_InitStructure.GPIO_OType = GPIO_OType_PP;
	GPIO_InitStructure.GPIO_PuPd  = GPIO_PuPd_UP;
	/*!< Configure SPI_DEVICE_SPI pins: SCK */
	GPIO_InitStructure.GPIO_Pin = SPI_DEVICE_SCK_PIN;
	GPIO_Init(SPI_DEVICE_SCK_GPIO_PORT, &GPIO_InitStructure);
	/*!< Configure SPI_DEVICE_SPI pins: MISO */
	GPIO_InitStructure.GPIO_Pin =  SPI_DEVICE_MISO_PIN;
	GPIO_Init(SPI_DEVICE_MISO_GPIO_PORT, &GPIO_InitStructure);  
	/*!< Configure SPI_DEVICE_SPI pins: MOSI */
	GPIO_InitStructure.GPIO_Pin =  SPI_DEVICE_MOSI_PIN;
	GPIO_Init(SPI_DEVICE_MOSI_GPIO_PORT, &GPIO_InitStructure);

	/* SPI configuration -------------------------------------------------------*/
	SPI_I2S_DeInit(SPI_DEVICE);
	SPI_InitStructure.SPI_Direction = SPI_Direction_2Lines_FullDuplex;
	SPI_InitStructure.SPI_Mode = SPI_Mode_Slave;	
	SPI_InitStructure.SPI_DataSize = SPI_DataSize_8b;
	SPI_InitStructure.SPI_CPOL = SPI_CPOL_Low;
	SPI_InitStructure.SPI_CPHA = SPI_CPHA_1Edge;
	SPI_InitStructure.SPI_NSS = SPI_NSS_Soft;
	SPI_InitStructure.SPI_BaudRatePrescaler = SLAVE_SPI_BAUDRATE;
	SPI_InitStructure.SPI_FirstBit = SPI_FirstBit_MSB;
	SPI_InitStructure.SPI_CRCPolynomial = 7;
	SPI_Init(SPI_DEVICE, &SPI_InitStructure);

	/* Enable SPI DMA RX Requsts */
	SPI_I2S_DMACmd(SPI_DEVICE, SPI_I2S_DMAReq_Rx, ENABLE);
	/* Enable SPI DMA TX Requsts */
	SPI_I2S_DMACmd(SPI_DEVICE, SPI_I2S_DMAReq_Tx, ENABLE);

	#if 1	//for slave CS
	GPIO_InitStructure.GPIO_Mode = GPIO_Mode_IN;
	GPIO_InitStructure.GPIO_Speed = GPIO_Speed_50MHz;
	GPIO_InitStructure.GPIO_OType = GPIO_OType_PP;
	GPIO_InitStructure.GPIO_PuPd  = GPIO_PuPd_UP;
	GPIO_InitStructure.GPIO_Pin = SPI_DEVICE_CS_PIN;
	GPIO_Init(SPI_DEVICE_CS_GPIO_PORT, &GPIO_InitStructure);

  	RCC_APB2PeriphClockCmd(RCC_APB2Periph_SYSCFG, ENABLE);
  	SYSCFG_EXTILineConfig(SPI_DEVICE_CS_EXTI_PortSource, SPI_DEVICE_CS_EXTI_PinSource);

	EXTI_InitStructure.EXTI_Line = SPI_DEVICE_CS_EXTI_Line;
	EXTI_InitStructure.EXTI_Mode = EXTI_Mode_Interrupt;
	EXTI_InitStructure.EXTI_Trigger = EXTI_Trigger_Rising;  
	EXTI_InitStructure.EXTI_LineCmd = ENABLE;
	EXTI_Init(&EXTI_InitStructure);

	NVIC_InitStructure.NVIC_IRQChannel = SPI_DEVICE_CS_EXTI_IRQn;
	NVIC_InitStructure.NVIC_IRQChannelPreemptionPriority = 0x00;
	NVIC_InitStructure.NVIC_IRQChannelSubPriority = 0x00;
	NVIC_InitStructure.NVIC_IRQChannelCmd = ENABLE;
	NVIC_Init(&NVIC_InitStructure);	
	#endif

}   

启动DMA发送

void Custom_SPI_DMABufferStart(void)
{
	NVIC_InitTypeDef 	NVIC_InitStructure;

	DMA_InitStructure_Slave.DMA_Priority = DMA_Priority_High;	//RX	
	DMA_InitStructure_Slave.DMA_DIR = DMA_DIR_PeripheralToMemory ;
	DMA_InitStructure_Slave.DMA_Memory0BaseAddr = (uint32_t)&DataRxBuffer[0];
	DMA_InitStructure_Slave.DMA_BufferSize = (uint32_t)(DataSize+CheckSumSize);
	DMA_Init(DMA1_Stream3, &DMA_InitStructure_Slave);

	DMA_InitStructure_Slave.DMA_Priority = DMA_Priority_Low;	//TX
	DMA_InitStructure_Slave.DMA_DIR = DMA_DIR_MemoryToPeripheral ;
	DMA_InitStructure_Slave.DMA_Memory0BaseAddr = (uint32_t)&DataTxBuffer[0];
	DMA_InitStructure_Slave.DMA_BufferSize = (uint32_t)(DataSize+CheckSumSize);
	DMA_Init(DMA1_Stream4, &DMA_InitStructure_Slave);
	
	DMA_ITConfig(DMA1_Stream3, DMA_IT_TC, ENABLE);	
	/* I2S DMA IRQ Channel configuration */
	NVIC_InitStructure.NVIC_IRQChannel = DMA1_Stream3_IRQn;
	NVIC_InitStructure.NVIC_IRQChannelPreemptionPriority = 0;
	NVIC_InitStructure.NVIC_IRQChannelSubPriority = 0;
	NVIC_InitStructure.NVIC_IRQChannelCmd = ENABLE;
	NVIC_Init(&NVIC_InitStructure);	

	DMA_ITConfig(DMA1_Stream4, DMA_IT_TC, ENABLE);	
	/* I2S DMA IRQ Channel configuration */
	NVIC_InitStructure.NVIC_IRQChannel = DMA1_Stream4_IRQn;
	NVIC_InitStructure.NVIC_IRQChannelPreemptionPriority = 0;
	NVIC_InitStructure.NVIC_IRQChannelSubPriority = 0;
	NVIC_InitStructure.NVIC_IRQChannelCmd = ENABLE;
	NVIC_Init(&NVIC_InitStructure);		

//	DMA_SetCurrDataCounter(DMA1_Stream3,DataSize);
//	DMA_SetCurrDataCounter(DMA1_Stream4,DataSize);

//	DMA_MemoryTargetConfig(DMA1_Stream3,(uint32_t)&DataRxBuffer[0],DMA_Memory_0);
//	DMA_MemoryTargetConfig(DMA1_Stream4,(uint32_t)&DataTxBuffer[0],DMA_Memory_0);
	
	/* Enable DMA SPI RX Stream */
	DMA_Cmd(DMA1_Stream3,ENABLE);

	/* Enable DMA SPI TX Stream */
	DMA_Cmd(DMA1_Stream4,ENABLE);
	
	/* Enable SPI DMA RX Requsts */
	SPI_I2S_DMACmd(SPI_DEVICE, SPI_I2S_DMAReq_Rx, ENABLE);
	/* Enable SPI DMA TX Requsts */
	SPI_I2S_DMACmd(SPI_DEVICE, SPI_I2S_DMAReq_Tx, ENABLE);
	
	/* The Data transfer is performed in the SPI using Direct Memory Access */
	SPI_Cmd(SPI_DEVICE, ENABLE); /*!< SPI_DEVICE_SPI enable */

}

DMA等待数据

void Custom_SPI_DMABufferWait(void)
{
	/* Waiting the end of Data transfer */
//	while (DMA_GetFlagStatus(DMA1_Stream3,DMA_FLAG_TCIF3)==RESET);
//	while (DMA_GetFlagStatus(DMA1_Stream4,DMA_FLAG_TCIF4)==RESET);	

//	while (SPI_I2S_GetFlagStatus(SPI_DEVICE, SPI_I2S_FLAG_TXE) == RESET);
//	while (SPI_I2S_GetFlagStatus(SPI_DEVICE, SPI_I2S_FLAG_BSY) != RESET);

	/* Clear DMA Transfer Complete Flags */
	DMA_ClearFlag(DMA1_Stream3,DMA_FLAG_TCIF3);
	DMA_ClearFlag(DMA1_Stream4,DMA_FLAG_TCIF4);
	
	/* Disable DMA SPI RX Stream */
	DMA_Cmd(DMA1_Stream3,DISABLE);
	/* Disable DMA SPI TX Stream */
	DMA_Cmd(DMA1_Stream4,DISABLE);
	
	/* Disable SPI DMA RX Requsts */
	SPI_I2S_DMACmd(SPI_DEVICE, SPI_I2S_DMAReq_Rx, DISABLE);
	/* Disable SPI DMA TX Requsts */
	SPI_I2S_DMACmd(SPI_DEVICE, SPI_I2S_DMAReq_Tx, DISABLE);
	
	SPI_Cmd(SPI_DEVICE, DISABLE);
}


void Custom_SPI_DMABufferConfig(void)
{

	/* DMA configuration -------------------------------------------------------*/
	/* Deinitialize DMA Streams */
	RCC_AHB1PeriphClockCmd(RCC_AHB1Periph_DMA1,ENABLE);

	DMA_DeInit(DMA1_Stream3);//RX
	DMA_DeInit(DMA1_Stream4);//TX
	
	/* Configure DMA Initialization Structure */
//	DMA_InitStructure.DMA_BufferSize = DataSize;
	DMA_InitStructure_Slave.DMA_FIFOMode = DMA_FIFOMode_Disable ;
	DMA_InitStructure_Slave.DMA_FIFOThreshold = DMA_FIFOThreshold_Full ;
	DMA_InitStructure_Slave.DMA_MemoryBurst = DMA_MemoryBurst_Single ;
	DMA_InitStructure_Slave.DMA_MemoryDataSize = DMA_MemoryDataSize_Byte;
	DMA_InitStructure_Slave.DMA_MemoryInc = DMA_MemoryInc_Enable;
	DMA_InitStructure_Slave.DMA_Mode = DMA_Mode_Normal;

	DMA_InitStructure_Slave.DMA_PeripheralBurst = DMA_PeripheralBurst_Single;
	DMA_InitStructure_Slave.DMA_PeripheralDataSize = DMA_PeripheralDataSize_Byte;
	DMA_InitStructure_Slave.DMA_PeripheralInc = DMA_PeripheralInc_Disable;
	/* Configure RX DMA */
	DMA_InitStructure_Slave.DMA_PeripheralBaseAddr =(uint32_t) (&(SPI_DEVICE->DR)) ;
	DMA_InitStructure_Slave.DMA_Channel = DMA_Channel_0 ;
	
//	DMA_InitStructure_Slave.DMA_Priority = DMA_Priority_High;	
//	DMA_InitStructure_Slave.DMA_DIR = DMA_DIR_PeripheralToMemory ;
//	DMA_InitStructure_Slave.DMA_Memory0BaseAddr = (uint32_t)&DataRxBuffer[0];
//	DMA_Init(DMA1_Stream3, &DMA_InitStructure);

	/* Configure TX DMA */
//	DMA_InitStructure_Slave.DMA_Priority = DMA_Priority_Low;	
//	DMA_InitStructure_Slave.DMA_DIR = DMA_DIR_MemoryToPeripheral ;
//	DMA_InitStructure_Slave.DMA_Memory0BaseAddr = (uint32_t)&DataTxBuffer[0];
//	DMA_Init(DMA1_Stream4, &DMA_InitStructure);
	
}
树莓派优化spi后

最终就可以看到树莓派接收数据以帧头对齐,总共传输了60多个字节,通过watch可以看到其接收交互基本稳定在0.5ms内,另外在DMA模式下可以将SPI的通讯频率提高到4Mhz,而之前中断形式只能在0.8Mhz:

综上,之前在单片机中能运行的代码现在在树莓派RT系统下也能跑起来了,当然还需要进一步对比实时性和同步性,可见底层驱动的可靠性对四足机器人来说十分重要,当然这样我们也就能赶快脱离单片机算力限制,早日部署MPC等先进控制算法,而该驱动部分也能不局限与树莓派作为主控,后续可以进一步用于更高算力的NCU或AGX等主控模组。