Fix leaking non-active (e.g., bound) LWIP TCP PCBs

tcp_close_tcpip() only closed active TCP PCBs, leaking other types of TCP PCBs
(notably bound only, not yet connected/used).  With time, this led to DoS
situations.

Fix by giving up on the hacky way of searching the LWIP innards for the TCP PCB
needing closing.  The searching was non-comprehensive, hence the problem in
principle, and inherently racy: could close a meanwhile freed & reused PCB.

Pass the pointer to the struct mg_lwip_conn_state of the struct mg_connection
being destroyed to tcp_close_tcpip() instead of the bare PCB pointer, so the PCB
pointer stored within can be used.  Delay unassigning that pointer from the
struct mg_connection until tcp_close_tcpip() finishes, so that
a mg_lwip_tcp_error_cb() meanwhile can clear the PCB pointer stored within.
That ensures no double-closing of the TCP PCB when a concurrent TCP error makes
LWIP close the TCP PCB from the LWIP side.

NB!  At the moment, PCBs can still leak if tcp_close() fails, which is unlikely
but possible as per the API.  Retrying tcp_close() should somehow be
implemented.  For now, at least add logging of tcp_close() failures.

Also: remove the extraneous though benign double-calling of tcp_arg(…, NULL).

h/t @QRPp
This commit is contained in:
Deomid Ryabkov 2022-09-27 09:13:25 +01:00
parent a1fcbb0718
commit 44b15b76f7
2 changed files with 30 additions and 32 deletions

View File

@ -15594,24 +15594,24 @@ int mg_lwip_if_create_conn(struct mg_connection *nc) {
return 1;
}
extern struct tcp_pcb *tcp_active_pcbs;
static void tcp_close_tcpip(void *arg) {
/*
* A race with tcp error is possible:
* - mg_lwip_if_destroy_conn is called, schedules this function.
* - RST arrives, mg_lwip_tcp_error_cb is invoked and pcb is destroyed.
* - this function runs.
* Since tcp_err doesn't get the pcb pointer, I don't see any other way than
* walking the list of active connections. It is ostensibly private but
* fortunately for us is not declared static.
* That will have NULLed pcb.tcp.
*/
struct tcp_pcb *pcb;
for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
if (pcb == (struct tcp_pcb *) arg) break;
}
if (pcb != NULL) {
tcp_close(pcb);
struct mg_lwip_conn_state *cs = arg;
struct tcp_pcb *pcb = cs->pcb.tcp;
if (pcb == NULL) return;
tcp_arg(pcb, NULL);
err_t err = tcp_close(pcb);
if (err == ERR_OK) {
cs->pcb.tcp = NULL;
} else {
LOG(LL_ERROR, ("tcp_close(%p) err %d, PCB leak!", pcb, (int) err));
}
}
@ -15622,18 +15622,16 @@ static void udp_remove_tcpip(void *arg) {
void mg_lwip_if_destroy_conn(struct mg_connection *nc) {
if (nc->sock == INVALID_SOCKET) return;
struct mg_lwip_conn_state *cs = (struct mg_lwip_conn_state *) nc->sock;
nc->sock = INVALID_SOCKET;
/* Do not close "ephemeral" UDP conns */
if ((nc->flags & MG_F_UDP) && nc->listener != NULL) {
nc->sock = INVALID_SOCKET;
return;
}
if (!(nc->flags & MG_F_UDP)) {
struct tcp_pcb *tpcb = cs->pcb.tcp;
if (tpcb != NULL) {
tcp_arg(tpcb, NULL);
DBG(("%p tcp_close %p", nc, tpcb));
tcp_arg(tpcb, NULL);
mg_lwip_netif_run_on_tcpip(tcp_close_tcpip, tpcb);
mg_lwip_netif_run_on_tcpip(tcp_close_tcpip, cs);
}
} else {
struct udp_pcb *upcb = cs->pcb.udp;
@ -15642,6 +15640,7 @@ void mg_lwip_if_destroy_conn(struct mg_connection *nc) {
mg_lwip_netif_run_on_tcpip(udp_remove_tcpip, upcb);
}
}
nc->sock = INVALID_SOCKET;
while (cs->rx_chain != NULL) {
struct pbuf *seg = cs->rx_chain;
cs->rx_chain = pbuf_dechain(cs->rx_chain);

View File

@ -668,24 +668,24 @@ int mg_lwip_if_create_conn(struct mg_connection *nc) {
return 1;
}
extern struct tcp_pcb *tcp_active_pcbs;
static void tcp_close_tcpip(void *arg) {
/*
* A race with tcp error is possible:
* - mg_lwip_if_destroy_conn is called, schedules this function.
* - RST arrives, mg_lwip_tcp_error_cb is invoked and pcb is destroyed.
* - this function runs.
* Since tcp_err doesn't get the pcb pointer, I don't see any other way than
* walking the list of active connections. It is ostensibly private but
* fortunately for us is not declared static.
* That will have NULLed pcb.tcp.
*/
struct tcp_pcb *pcb;
for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) {
if (pcb == (struct tcp_pcb *) arg) break;
}
if (pcb != NULL) {
tcp_close(pcb);
struct mg_lwip_conn_state *cs = arg;
struct tcp_pcb *pcb = cs->pcb.tcp;
if (pcb == NULL) return;
tcp_arg(pcb, NULL);
err_t err = tcp_close(pcb);
if (err == ERR_OK) {
cs->pcb.tcp = NULL;
} else {
LOG(LL_ERROR, ("tcp_close(%p) err %d, PCB leak!", pcb, (int) err));
}
}
@ -696,18 +696,16 @@ static void udp_remove_tcpip(void *arg) {
void mg_lwip_if_destroy_conn(struct mg_connection *nc) {
if (nc->sock == INVALID_SOCKET) return;
struct mg_lwip_conn_state *cs = (struct mg_lwip_conn_state *) nc->sock;
nc->sock = INVALID_SOCKET;
/* Do not close "ephemeral" UDP conns */
if ((nc->flags & MG_F_UDP) && nc->listener != NULL) {
nc->sock = INVALID_SOCKET;
return;
}
if (!(nc->flags & MG_F_UDP)) {
struct tcp_pcb *tpcb = cs->pcb.tcp;
if (tpcb != NULL) {
tcp_arg(tpcb, NULL);
DBG(("%p tcp_close %p", nc, tpcb));
tcp_arg(tpcb, NULL);
mg_lwip_netif_run_on_tcpip(tcp_close_tcpip, tpcb);
mg_lwip_netif_run_on_tcpip(tcp_close_tcpip, cs);
}
} else {
struct udp_pcb *upcb = cs->pcb.udp;
@ -716,6 +714,7 @@ void mg_lwip_if_destroy_conn(struct mg_connection *nc) {
mg_lwip_netif_run_on_tcpip(udp_remove_tcpip, upcb);
}
}
nc->sock = INVALID_SOCKET;
while (cs->rx_chain != NULL) {
struct pbuf *seg = cs->rx_chain;
cs->rx_chain = pbuf_dechain(cs->rx_chain);