STM32 Bit-Banged 32-bit Communication: Multi-Slave Protocol Issues
Hello,
I’m building a custom communication protocol using STM32 (F1 series) as a relay-only device, sitting between an ESP32F103C6T6A and multiple slave ESP32F103C6T6As.
The ESP handles all frame dispatching and reception. The STM32 "master" is just an intermediary to ensure matching internal clock timing between STM32s (since ESP32's timing is too loose). I can’t use external crystals or change pin assignments.
The system uses bit-banged GPIO for communication:
-
Master TX (to Slaves): PA11 (HIGH idle)
-
Master RX (from Slaves): PB10 (LOW idle)
-
Slave RX: PA9 / Slave TX: PA10
-
Timings: 20ms LOW front, 40ms HIGH sync, 7ms per bit (32 bits), 40ms end
The ESP sends 32-bit frames (e.g. 0xF1F1F1F1, 0xABABABAB) to the STM32 master via UART. The STM then dispatches them bit-banged to the slaves and listens for a bit-banged 32-bit response. That response is passed back to the ESP.
Each slave listens on PA9, and only responds if the 24 LSBs match its own ID. The ESP performs a manual DISCOVER loop, incrementing the ID one by one until no slave replies. There is no automatic discovery based on line state.
Issue: Slaves always receive the data correctly, but the master sometimes gets the wrong reply (previous frame) or times out. All delays (pre-bit sampling, post-reception delays) are implemented.
E.g:
ESP sent:
0xF1F1F1F1
Master recv/Sent to esclaves:
0xF1F1F1F1
Slaves recv/sent:
0xF1F1F1F1
RAW: 00001110000011100000111000001110
Master recv:
RAW: 00001110000011100000111000001110
0x70707070
Finally, ESP recv:
RAW: 00001110000011100000111000001110
Recv: 0x0E0E0E0E
Constraints:
-
No external clock
-
GPIO-only communication
-
Cannot change pin mapping
Questions:
-
How to ensure proper TX/RX timing between STM32s without a shared clock?
-
Is there a better method than delay-based bit-bang for stable GPIO communication?
-
Have you implemented similar "bit-forwarding relays" with STM32s or other MCUs?
Any insights from others who built custom GPIO protocols would be amazing. Thanks!
Master code:
#include "main.h"
#include <string.h>
#include <stdio.h>
UART_HandleTypeDef huart1;
UART_HandleTypeDef huart2;
TIM_HandleTypeDef htim2;
void delay_us(uint16_t us);
#define MASTER_TX_PIN GPIO_PIN_11
#define MASTER_TX_PORT GPIOA
#define MASTER_RX_PIN GPIO_PIN_10
#define MASTER_RX_PORT GPIOB
void send_32bit_frame(uint32_t frame);
uint32_t receive_32bit_frame(void);
void MX_GPIO_Init(void);
void MX_USART1_UART_Init(void);
void MX_USART2_UART_Init(void);
void MX_TIM2_Init(void);
uint8_t uart_rx[4];
int main(void)
{
HAL_Init();
MX_GPIO_Init();
MX_USART1_UART_Init();
MX_USART2_UART_Init();
MX_TIM2_Init();
char *msg = "MASTER READY\r\n";
HAL_UART_Transmit(&huart2, (uint8_t*)msg, strlen(msg), HAL_MAX_DELAY);
while (1)
{
HAL_UART_Receive(&huart1, uart_rx, 4, HAL_MAX_DELAY);
__HAL_UART_FLUSH_DRREGISTER(&huart1);
uint32_t tx = (uart_rx[0] << 24) | (uart_rx[1] << 16) | (uart_rx[2] << | uart_rx[3];
send_32bit_frame(tx);
uint32_t rx = receive_32bit_frame();
char dbg[64];
sprintf(dbg, "Sent: 0x%08lX | Recv: 0x%08lX\r\n", tx, rx);
HAL_UART_Transmit(&huart2, (uint8_t*)dbg, strlen(dbg), HAL_MAX_DELAY);
uint8_t resp_buf[4];
resp_buf[0] = (rx >> 24) & 0xFF;
resp_buf[1] = (rx >> 16) & 0xFF;
resp_buf[2] = (rx >> & 0xFF;
resp_buf[3] = rx & 0xFF;
HAL_UART_Transmit(&huart1, resp_buf, 4, HAL_MAX_DELAY);
HAL_Delay(5);
}
}
void send_32bit_frame(uint32_t frame)
{
HAL_GPIO_WritePin(MASTER_TX_PORT, MASTER_TX_PIN, GPIO_PIN_RESET);
delay_us(20000);
HAL_GPIO_WritePin(MASTER_TX_PORT, MASTER_TX_PIN, GPIO_PIN_SET);
delay_us(40000);
for (int i = 31; i >= 0; i--) {
HAL_GPIO_WritePin(MASTER_TX_PORT, MASTER_TX_PIN, (frame >> i) & 1 ? GPIO_PIN_SET : GPIO_PIN_RESET);
delay_us(7000);
}
HAL_GPIO_WritePin(MASTER_TX_PORT, MASTER_TX_PIN, GPIO_PIN_RESET);
delay_us(40000);
HAL_GPIO_WritePin(MASTER_TX_PORT, MASTER_TX_PIN, GPIO_PIN_SET);
}
uint8_t reverse_byte(uint8_t b) {
b = (b & 0xF0) >> 4 | (b & 0x0F) << 4;
b = (b & 0xCC) >> 2 | (b & 0x33) << 2;
b = (b & 0xAA) >> 1 | (b & 0x55) << 1;
return b;
}
uint32_t reverse_bits_per_byte(uint32_t val) {
return (reverse_byte((val >> 24) & 0xFF) << 24) |
(reverse_byte((val >> 16) & 0xFF) << 16) |
(reverse_byte((val >> & 0xFF) << |
(reverse_byte(val & 0xFF));
}
uint32_t receive_32bit_frame(void)
{
char dbg[33];
while (HAL_GPIO_ReadPin(MASTER_RX_PORT, MASTER_RX_PIN) == GPIO_PIN_SET);
delay_us(20000);
while (HAL_GPIO_ReadPin(MASTER_RX_PORT, MASTER_RX_PIN) == GPIO_PIN_RESET);
delay_us(40000);
delay_us(3500);
uint32_t result = 0;
for (int i = 0; i < 32; i++) {
result <<= 1;
GPIO_PinState bit = HAL_GPIO_ReadPin(MASTER_RX_PORT, MASTER_RX_PIN);
result |= (bit == GPIO_PIN_SET ? 1 : 0);
dbg[i] = (bit == GPIO_PIN_SET ? '1' : '0');
delay_us(7000);
}
dbg[32] = '\0';
HAL_UART_Transmit(&huart2, (uint8_t*)"MASTER RX BITS: ", 17, HAL_MAX_DELAY);
HAL_UART_Transmit(&huart2, (uint8_t*)dbg, 32, HAL_MAX_DELAY);
HAL_UART_Transmit(&huart2, (uint8_t*)"\r\n", 2, HAL_MAX_DELAY);
char hex[32];
sprintf(hex, "Master value: 0x%08lX\r\n", result);
HAL_UART_Transmit(&huart2, (uint8_t*)hex, strlen(hex), HAL_MAX_DELAY);
while (HAL_GPIO_ReadPin(MASTER_RX_PORT, MASTER_RX_PIN) == GPIO_PIN_SET);
delay_us(40000);
return result;
}
void delay_us(uint16_t us)
{
__HAL_TIM_SET_COUNTER(&htim2, 0);
HAL_TIM_Base_Start(&htim2);
while (__HAL_TIM_GET_COUNTER(&htim2) < us);
HAL_TIM_Base_Stop(&htim2);
}
Slave's code:
#include "main.h"
#include <string.h>
#include <stdio.h>
UART_HandleTypeDef huart2;
TIM_HandleTypeDef htim2;
void delay_us(uint16_t us);
#define SLAVE_RX_PIN GPIO_PIN_9
#define SLAVE_RX_PORT GPIOA
#define SLAVE_TX_PIN GPIO_PIN_10
#define SLAVE_TX_PORT GPIOA
uint32_t wait_and_read_frame(void);
void send_32bit_frame(uint32_t frame);
void MX_GPIO_Init(void);
void MX_USART2_UART_Init(void);
void MX_TIM2_Init(void);
int main(void)
{
HAL_Init();
MX_GPIO_Init();
MX_USART2_UART_Init();
MX_TIM2_Init();
char *msg = "SLAVE READY\r\n";
HAL_UART_Transmit(&huart2, (uint8_t*)msg, strlen(msg), HAL_MAX_DELAY);
while (1)
{
uint32_t received = wait_and_read_frame();
char dbg[64];
sprintf(dbg, "Recv: 0x%08lX\r\n", received);
HAL_UART_Transmit(&huart2, (uint8_t*)dbg, strlen(dbg), HAL_MAX_DELAY);
delay_us(10000);
send_32bit_frame(~received);
}
}
uint32_t wait_and_read_frame(void)
{
while (HAL_GPIO_ReadPin(SLAVE_RX_PORT, SLAVE_RX_PIN) == GPIO_PIN_SET);
delay_us(20000);
while (HAL_GPIO_ReadPin(SLAVE_RX_PORT, SLAVE_RX_PIN) == GPIO_PIN_RESET);
delay_us(40000);
delay_us(3500);
uint32_t frame = 0;
for (int i = 0; i < 32; i++)
{
frame <<= 1;
frame |= (HAL_GPIO_ReadPin(SLAVE_RX_PORT, SLAVE_RX_PIN) == GPIO_PIN_SET) ? 1 : 0;
delay_us(7000);
}
while (HAL_GPIO_ReadPin(SLAVE_RX_PORT, SLAVE_RX_PIN) == GPIO_PIN_SET);
delay_us(40000);
return frame;
}
void send_32bit_frame(uint32_t frame)
{
char dbg[33];
HAL_GPIO_WritePin(SLAVE_TX_PORT, SLAVE_TX_PIN, GPIO_PIN_RESET);
delay_us(20000);
HAL_GPIO_WritePin(SLAVE_TX_PORT, SLAVE_TX_PIN, GPIO_PIN_SET);
delay_us(40000);
for (int i = 31; i >= 0; i--) // MSB first
{
GPIO_PinState bit = (frame >> i) & 1 ? GPIO_PIN_SET : GPIO_PIN_RESET;
HAL_GPIO_WritePin(SLAVE_TX_PORT, SLAVE_TX_PIN, bit);
dbg[31 - i] = (bit == GPIO_PIN_SET ? '1' : '0');
delay_us(7000);
}
dbg[32] = '\0';
HAL_UART_Transmit(&huart2, (uint8_t*)dbg, 32, HAL_MAX_DELAY);
HAL_UART_Transmit(&huart2, (uint8_t*)"\n", 1, HAL_MAX_DELAY);
HAL_GPIO_WritePin(SLAVE_TX_PORT, SLAVE_TX_PIN, GPIO_PIN_RESET);
delay_us(40000);
}
void delay_us(uint16_t us)
{
__HAL_TIM_SET_COUNTER(&htim2, 0);
HAL_TIM_Base_Start(&htim2);
while (__HAL_TIM_GET_COUNTER(&htim2) < us);
HAL_TIM_Base_Stop(&htim2);
}
