STM32G431 – ioprog

Using the Espressif PSRAM64H IC with an STM32G431

November 7, 2025November 10, 2025FrankLeave a comment

The PSRAM64H is a 3.3V 8MByte RAM device that is accessed using SPI (datasheet here). It can operate at speeds of up to 133MHz (though 84MHz is a more realistic top-end value). I’m using it at 5MHz just so that my knock-off cheap logic analyser can keep up with it.

The picture above shows my test layout. The PSRAM64H is mounted on a breakout board so that it can be used in breadboard with the STM32G431.

The test code is shown below. It uses SPI2 on the STM32 device. After initialization, the main loop reads the chip ID and writes 256 bytes to PSRAM starting at address 0x123456. It then reads back data from the same address. If the received data does not match the transmitted data then the LED is turned on.

// Program to interact with the PSRAM64H IC from Espressif


/* IO LIST 
 * Will use simple SPI (not QSPI) in this example.
 * PSRAM64H         STM32G431
 * CE#             SPI2 NSS  Pin 2 (PF0)   = AF5
 * CLK             SPI2 SCK  Pin 3 (PF1)   = AF5
 * SI/SIO(0) MOSI  SPI2 MOSI Pin 21 (PA11) = AF5
 * SO/SIO(1) MISO  SPI2 MISO Pin 20 (PA10) = AF5
 * 
 * LED             Pin 5 (PA0) 
*/

#include <stdint.h>
#include "../include/STM32G431xx.h"
void enablePullUp(GPIO_Type *Port, uint32_t BitNumber)
{
	Port->PUPDR = Port->PUPDR &~(3u << BitNumber*2); // clear pull-up resistor bits
	Port->PUPDR = Port->PUPDR | (1u << BitNumber*2); // set pull-up bit
}
void pinMode(GPIO_Type *Port, uint32_t BitNumber, uint32_t Mode)
{
	/*
        Modes : 00 = input
                01 = output
                10 = special function
                11 = analog mode
	*/
	uint32_t mode_value = Port->MODER;
	Mode = Mode << (2 * BitNumber);
	mode_value = mode_value & ~(3u << (BitNumber * 2));
	mode_value = mode_value | Mode;
	Port->MODER = mode_value;
}
void selectAlternateFunction (GPIO_Type *Port, uint32_t BitNumber, uint32_t AF)
{
    // The alternative function control is spread across two 32 bit registers AFR[0] and AFR[1]
    // There are 4 bits for each port bit.
    if (BitNumber < 8)
    {
        Port->AFR[0] &= ~(0x0f << (4*BitNumber));
        Port->AFR[0] |= (AF << (4*BitNumber));
    }
    else
    {
        BitNumber = BitNumber - 8;
        Port->AFR[1] &= ~(0x0f << (4*BitNumber));
        Port->AFR[1] |= (AF << (4*BitNumber));
    }
}
void spi_startTransaction(void)
{
	SPI2->CR1 |= (1 << 6); // Enable SPI (SPE = 1)

}
void spi_stopTransaction(void)
{	
	volatile unsigned Timeout = 1000;    
	while (SPI2->SR & ((1 << 12) + (1 << 11)) );     // wait for fifo to empty
	while (((SPI2->SR & (1 << 0))!=0)&&(Timeout--)); // Wait for RXNE
	Timeout = 1000;    
	while (((SPI2->SR & (1 << 1))==0)&&(Timeout--)); // Wait for TXE
	Timeout = 1000;    
	while (((SPI2->SR & (1 << 7))!=0)&&(Timeout--)); // Wait for Busy		
	SPI2->CR1 &= ~(1 << 6); // Disable SPI (SPE = 0)
		
	while((GPIOF->IDR & (1 << 0))==0); // wait for NSS to go high
	
}
uint8_t spi_transfer(uint8_t data)
{	

    *((uint8_t*)&SPI2->DR) = data;        		
	while (((SPI2->SR & (1 << 7))!=0));// Wait for Busy			
	return *((uint8_t*)&SPI2->DR);
}
void delay(uint32_t dly)
{
    while(dly--);
}
void writePSRAM(uint32_t address, void *data, uint32_t nbytes)
{
    uint8_t b;
    spi_startTransaction();
    spi_transfer(0x02);
    b=address>>16;
    spi_transfer(b);
    b=(address>>8)&0xff;
    spi_transfer(b);
    b=(address)&0xff;
    spi_transfer(b);
    while(nbytes--)
    {
        b=*((uint8_t*)data);
        spi_transfer(b);
        data++;
    }
    spi_stopTransaction();
}
void readPSRAM(uint32_t address, void *data, uint32_t nbytes)
{
    uint8_t b;
    spi_startTransaction();
    spi_transfer(0x03);
    b=address>>16;
    spi_transfer(b);
    b=(address>>8)&0xff;
    spi_transfer(b);
    b=(address)&0xff;
    spi_transfer(b);
    while(nbytes--)
    {
        b=spi_transfer(0xff);
        *((uint8_t*)data)=b;
        data++;
    }
    spi_stopTransaction();
    
}
uint32_t readIDPSRAM(void)
{
    uint32_t id;
    spi_startTransaction();
    spi_transfer(0x9f);
    spi_transfer(0xff);
    spi_transfer(0xff);
    spi_transfer(0xff);
    id=spi_transfer(0xff);
    id=id<<8;
    id=id+spi_transfer(0xff);
    spi_stopTransaction();
    return id;
}
uint8_t data_out[2048];    
uint8_t data_in[2048];
uint32_t chip_id;
int main()
{   
    uint32_t count,drain;

    uint32_t error_count;
    RCC->AHB2ENR |= (1 << 0) | (1 << 5); // enable Port A and Port F
    pinMode(GPIOA,0,1);
    pinMode(GPIOA,10,2);
    pinMode(GPIOA,11,2);
    pinMode(GPIOF,0,2);
    pinMode(GPIOF,1,2);
    selectAlternateFunction(GPIOA,10,5);
    selectAlternateFunction(GPIOA,11,5);
    selectAlternateFunction(GPIOF,0,5);
    selectAlternateFunction(GPIOF,1,5);
    RCC->APB1ENR1 |= (1 << 14); // enable SPI2
    // set port bits up as high speed outputs
    GPIOA->OSPEEDR |= (3 << 2*10) + (3 << 2*11);
    GPIOF->OSPEEDR |= (3 << 0) + (3 << 2*1);
    drain = SPI1->SR;				// dummy read of SR to clear MODF	
	// enable SSM, set SSI, enable SPI, PCLK/2, MSB First Master, Clock = 1 when idle
	// Will use hardware slave management
	SPI2->CR1 = (1 << 5) + (1 << 2)+ (1 << 1) + (1 << 0); // Master mode, about 5MHz.  CPHA=CPOL=1	
	SPI2->CR2 = (1 << 12) + (1 << 10) + (1 << 9) + (1 << 8) + (1 << 2); 	// SS output enabled, 8 bit mode
    
    for (count=0;count<2048;count++)
        data_out[count]=count;
    while(1)
    {
        
        chip_id=readIDPSRAM();
        error_count=0;
        writePSRAM(0x123456,data_out,2048);
        readPSRAM(0x123456,data_in,2048);
        for (count=0;count<2020;count++)
        {
            if (data_in[count]!=data_out[count])
            {
                error_count++;
            }
        }
        if (error_count==0) 
        {
            GPIOA->ODR &= ~1;            
        }
        else
        {
            GPIOA->ODR |= 1;
        }
        delay(1000000);
        
    }
}

The SPI waveforms for the read ID transaction are shown below. Data reads and writes were successful at 5.33MHz. In a later post I will try bumping this speed up a bit.

Using the FMAC in the STM32G431

May 24, 2021May 24, 2021Frank2 Comments

The STM32G431 has a Filter Math ACellerator (FMAC) hardware unit inside of it. This unit can take be used to implement an FIR or IIR filter without burdening the CPU. The FMAC unit has input and output circular buffers as well as a coefficient buffer. It is possible to connect the input buffer to an ADC using DMA and similarly it is possible to connect an output buffer directly to a DAC over DMA. In the case of this project I used an ADC interrupt handler to manage data input and output to the FMAC.

There are lots of tools to help you design a digital filter. I chose to use python and jupyter notebook in this case. The jupyter notebook code is as follows (it is also on the github site linked below)

import numpy as np
import scipy as sp
import scipy.signal as sg
import matplotlib.pyplot as plt
Fs=48000
Fpass=1000
Order=16
Wp=Fpass/(Fs/2)
b=sg.firwin(Order+1,Wp,window = "hamming",pass_zero = True)
w,h=sg.freqz(b)
mag=20*np.log10(abs(h))
plt.figure()
plt.semilogx(w*(Fs/(2*np.pi)), mag)
plt.show()
bmax=np.max(np.abs(b))
# Working out the scale factor can be a bit tricky.  There is a 
# 24 bit accumulator in the FMAC.  The ADC has a 12bit range.
# This leaves 12 bits for coefficients if overflows are to be prevented.
# Furthermore, the multiply and accumulate nature of the FIR will push 
# results beyond 24 bits if we are not careful.  This is more pronounced with
# lower cut-off frequencies where there is a large central lobe to the filter 
# coefficients which may lead to overflows, particularly at low input 
# frequencies.  For now I'm just doing this by trial and error
ScaleFactor=4095/(bmax)
f = open('coffs.h', 'w')
f.write("#include <stdint.h>\n")
f.write("#define SCALE_FACTOR ")
f.write(str(int(np.round(ScaleFactor))))
f.write("\n")
f.write("#define FILTER_LENGTH ")
f.write(str(Order))
f.write("\n")
f.write("const int16_t b[]={")
for coeff in b:
    f.write(str(int(np.round(coeff*ScaleFactor))))
    f.write(",\n")
f.write("};\n")

f.close();
plt.figure();
plt.plot(b);

This code outputs a header file that includes the filter coefficients for a low pass FIR filter with a cutoff frequency of 1000Hz. The output at 2kHz is shown below

And it 4kHz this becomes:

It would appear that the filter is indeed working however there are a number of caveats. The FMAC uses fixed point arithmetic so coefficients and input signals must be shifted and scaled appropriately. The FMAC has a limited numeric range (24 bits of fractional data internally, 15 bits input and output) and overflows will happen. This is a particular problem at low frequencies with filters whose coefficients are mostly/all positive. I had to do some manual tweaking of the coefficients to get the output performance I wanted. When testing for such overflows it is useful to input a DC signal of maximum voltage to ensure that no overflows occur.

As usual, code is available over on github

Waiting for /CS

March 28, 2021Frank3 Comments

I have been working on an interface between an STM32G431 and a W25Q128FV SPI flash memory chip (128 Mbit/16MByte). The image above shows the memory chip on a breakout board with a surface mount capacitor next to it. Given the nature of breadboards and the wires used I’ve been running the memory interface at a reduced speed (1.3MHz). A PCB will be used at a later stage and which should allow for higher speeds.

Erasing the chip was presenting some problems. The code for erase was as follows:

void serial_flash::bulk_erase()
{
	write_enable();
	SPI->startTransaction();
	SPI->transfer((uint8_t)0xc7);	
	SPI->stopTransaction();
	while(read_status1() & 1); // wait until erase has completed
}

The chip must be put into “write” mode before the erase command (0xC7) is sent. The function exits when the “busy” bit in status register 1 is clear. While everything seemed ok, the function did not erase the chip. I took a closer look at the SPI bus using a logic analyzer. I have a very cheap logic analyzer which doesn’t have a very good trigger mechanism. My normal workaround for this is to put the area under test into an everlasting loop and then view the pins of interest on the logic analyzer. This is a problem for erase operations like this as the SPI flash chip has only so many erase cycles. As a precaution I change the command code to 0xd7 (not a supported command) which allowed me look at the SPI bus without harming the chip. I also commented out loop that polled the status register.

The write enable command (0x06) is plainly visible as is the “fake” chip erase command 0xd7. The CS line is driven low just before the 0x06 command and goes high some time after the 0xd7 command. This is not the correct way to erase this chip. The data sheet clearly states that the CS line must go high for a period after each command. It does not do this after the write enable command. The write_enable function is as follows:

void serial_flash::write_enable(void)
{
	SPI->startTransaction();
	SPI->transfer((uint8_t)0x06);	
	SPI->stopTransaction();		
}

The stopTransaction function should drive the CS line high but it didn’t seem to be working. The relevant SPI code is:

void spi::stopTransaction(void)
{	
	volatile unsigned Timeout = 1000;    
	while (SPI1->SR & ((1 << 12) + (1 << 11)) );     // wait for fifo to empty
	while (((SPI1->SR & (1 << 0))!=0)&&(Timeout--)); // Wait for RXNE
	Timeout = 1000;    
	while (((SPI1->SR & (1 << 1))==0)&&(Timeout--)); // Wait for TXE
	Timeout = 1000;    
	while (((SPI1->SR & (1 << 7))!=0)&&(Timeout--)); // Wait for Busy		
	SPI1->CR1 &= ~(1 << 6); // Disable SPI (SPE = 0)
				
}

This should have worked but it clearly didn’t. Thinking about the sequence of events involved in the bulk_erase function it occurred to me that the call to startTransaction just after the write_enable command may actually be happening before the SPI peripheral had a chance to raise the CS line. The SPI peripheral is routed through GPIO port A in this setup. I noticed that I could monitor the status of the CS pin by reading GPIOA’s input data register and hence wait for it to go high. The stopTransaction code was modified as follows:

void spi::stopTransaction(void)
{	
	volatile unsigned Timeout = 1000;    
	while (SPI1->SR & ((1 << 12) + (1 << 11)) );     // wait for fifo to empty
	while (((SPI1->SR & (1 << 0))!=0)&&(Timeout--)); // Wait for RXNE
	Timeout = 1000;    
	while (((SPI1->SR & (1 << 1))==0)&&(Timeout--)); // Wait for TXE
	Timeout = 1000;    
	while (((SPI1->SR & (1 << 7))!=0)&&(Timeout--)); // Wait for Busy		
	SPI1->CR1 &= ~(1 << 6); // Disable SPI (SPE = 0)
		
	while((GPIOA->IDR & (1 << 4))==0); // wait for CS to go high
	
}

This produced the following output from the logic analyzer:

A high pulse can now be seen between the two SPI commands. As a final test, I replaced the fake “0xD7” command with “0xC7” and presto: erases now work.

Various examples for the STM32G431

March 7, 2020FrankLeave a comment

The STM32G431 was recently introduced by ST-Microelectronics. It contains a Cortex M4 core running at 170MHz along with ADC’s, DAC’s timers and some interesting DSP acceleration hardware. I’ve just got started on this chip and have uploaded a number of examples to github.
The version of openocd that came with my Ubuntu installation did not support this chip so I had to download a more up to date version from here.
Example code so far ranges from Blinky up to stereo analogue pass-through. I plan to work work on some FIR and IIR examples soon. stm32g431_bbreadboard