Skip to content

Commit 478e2ed

Browse files
committed
zebra: kernel_init function optimizations
these changes are for improving the code maintainability and readability Signed-off-by: sri-mohan1 <sri.mohan@samsung.com>
1 parent ca36e37 commit 478e2ed

1 file changed

Lines changed: 166 additions & 146 deletions

File tree

zebra/kernel_netlink.c

Lines changed: 166 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,14 @@
7272
*/
7373
#define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
7474

75+
/*
76+
* RTNLGRP_BIT - Convert an RTNLGRP_* group constant to a bit position
77+
* for the nl_groups bitmask. RTNLGRP constants are 1-based bit numbers,
78+
* so shift by (group - 1). Only valid for groups with bit positions < 32;
79+
* groups >= 32 must use setsockopt(NETLINK_ADD_MEMBERSHIP) via ext_groups.
80+
*/
81+
#define FRR_NLGRP_BIT(g) ((uint32_t)1 << ((g)-1))
82+
7583
static const struct message nlmsg_str[] = {
7684
{ RTM_NEWROUTE, "RTM_NEWROUTE" },
7785
{ RTM_DELROUTE, "RTM_DELROUTE" },
@@ -1616,189 +1624,198 @@ static bool kernel_netlink_nlsock_hash_equal(const void *arg1, const void *arg2)
16161624
return false;
16171625
}
16181626

1619-
/* Exported interface function. This function simply calls
1620-
netlink_socket (). */
1627+
/*
1628+
* Set a netlink socket to non-blocking mode for integration with the
1629+
* event loop. Uses flog_err_sys since this is a kernel/OS-level failure.
1630+
*/
1631+
static void netlink_set_nonblock(struct nlsock *nl)
1632+
{
1633+
if (fcntl(nl->sock, F_SETFL, O_NONBLOCK) < 0)
1634+
flog_err_sys(EC_LIB_SOCKET, "Can't set %s socket non-blocking: %s", nl->name,
1635+
safe_strerror(errno));
1636+
}
1637+
1638+
/*
1639+
* Create, configure, and register a netlink socket. Consolidates the
1640+
* common 5-step init pattern: format name, mark uncreated, create socket,
1641+
* log on failure, and insert into the global nlsock hash.
1642+
*
1643+
* @name_prefix: Prefix for socket name (e.g., "netlink-listen")
1644+
* @groups: Bitmask of RTMGRP/RTNLGRP groups for nl_groups (< 32)
1645+
* @ext_groups: Array of RTNLGRP group IDs >= 32 for setsockopt subscription
1646+
* @ext_group_size: Number of entries in ext_groups[]
1647+
* @ns_id: Network namespace ID
1648+
* @nl_family: Netlink protocol family (NETLINK_ROUTE or NETLINK_GENERIC)
1649+
* @warn_only: true -> log warning on failure, return -1 (non-fatal)
1650+
* false -> log error on failure, caller should exit (fatal)
1651+
*
1652+
* Returns 0 on success, -1 on failure.
1653+
*/
1654+
static int kernel_init_nlsock(struct nlsock *nl, const char *name_prefix, unsigned long groups,
1655+
uint32_t ext_groups[], uint8_t ext_group_size, ns_id_t ns_id,
1656+
int nl_family, bool warn_only)
1657+
{
1658+
snprintf(nl->name, sizeof(nl->name), "%s (NS %u)", name_prefix, ns_id);
1659+
nl->sock = -1;
1660+
1661+
if (netlink_socket(nl, groups, ext_groups, ext_group_size, ns_id, nl_family) < 0) {
1662+
if (warn_only)
1663+
zlog_warn("Failure to create %s socket", nl->name);
1664+
else
1665+
flog_err(EC_LIB_SOCKET, "Failure to create %s socket", nl->name);
1666+
return -1;
1667+
}
1668+
1669+
kernel_netlink_nlsock_insert(nl);
1670+
return 0;
1671+
}
1672+
1673+
#if defined SOL_NETLINK
1674+
/*
1675+
* Enable extended ACK messages on a netlink socket. Extended ACKs
1676+
* (Linux 4.2+) provide richer error diagnostics including human-readable
1677+
* error strings and offset information. Non-fatal on failure.
1678+
*
1679+
* @sock: Netlink socket file descriptor
1680+
* @desc: Short socket description for log messages (e.g., "cmd", "dp")
1681+
*/
1682+
static void netlink_enable_ext_ack(int sock, const char *desc)
1683+
{
1684+
int one = 1;
1685+
1686+
if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)) < 0)
1687+
zlog_notice("Registration for extended %s ACK failed: %d %s", desc, errno,
1688+
safe_strerror(errno));
1689+
}
1690+
#endif /* SOL_NETLINK */
1691+
1692+
/*
1693+
* Initialize all netlink sockets and subsystem for a given network namespace.
1694+
*
1695+
* Creates five netlink sockets:
1696+
* netlink - Inbound route/rule/nexthop events (main pthread)
1697+
* netlink_cmd - Outbound synchronous commands (main pthread)
1698+
* netlink_dplane_out - Outbound dataplane programming (dplane pthread)
1699+
* netlink_dplane_in - Inbound link/addr/neigh/netconf/tc events (dplane pthread)
1700+
* ge_netlink_cmd - Generic netlink commands (optional, non-fatal)
1701+
*
1702+
* Also configures: multicast group subscriptions, extended ACK, non-blocking
1703+
* mode, receive buffer sizes, BPF self-echo filters, and event loop registration.
1704+
*/
16211705
void kernel_init(struct zebra_ns *zns)
16221706
{
16231707
uint32_t groups, dplane_groups, ext_groups;
16241708
#if defined SOL_NETLINK
1625-
int one, ret, grp;
1709+
int ret, grp;
16261710
#endif
16271711

1628-
/*
1629-
* Initialize netlink sockets
1630-
*
1631-
* If RTMGRP_XXX exists use that, but at some point
1632-
* I think the kernel developers realized that
1633-
* keeping track of all the different values would
1634-
* lead to confusion, so we need to convert the
1635-
* RTNLGRP_XXX to a bit position for ourself
1636-
*
1637-
*
1638-
* NOTE: If the bit is >= 32, you must use setsockopt(). Those
1639-
* groups are added further below after SOL_NETLINK is verified to
1640-
* exist.
1712+
/* ----------------------------------------------------------------
1713+
* Compute multicast group membership bitmasks.
1714+
* Groups < 32 go into nl_groups; groups >= 32 use ext_groups
1715+
* and are subscribed via setsockopt in netlink_socket().
1716+
* ----------------------------------------------------------------
16411717
*/
1718+
1719+
/* Main listener: route, rule, and nexthop change notifications */
16421720
groups = RTMGRP_IPV4_ROUTE | RTMGRP_IPV6_ROUTE | RTMGRP_IPV4_MROUTE |
1643-
((uint32_t)1 << (RTNLGRP_IPV4_RULE - 1)) |
1644-
((uint32_t)1 << (RTNLGRP_IPV6_RULE - 1)) | ((uint32_t)1 << (RTNLGRP_NEXTHOP - 1));
1721+
FRR_NLGRP_BIT(RTNLGRP_IPV4_RULE) | FRR_NLGRP_BIT(RTNLGRP_IPV6_RULE) |
1722+
FRR_NLGRP_BIT(RTNLGRP_NEXTHOP);
16451723

1646-
dplane_groups = (RTMGRP_LINK | RTMGRP_NEIGH | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1647-
((uint32_t)1 << (RTNLGRP_IPV4_NETCONF - 1)) |
1648-
((uint32_t)1 << (RTNLGRP_IPV6_NETCONF - 1)) |
1649-
((uint32_t)1 << (RTNLGRP_MPLS_NETCONF - 1)) |
1650-
((uint32_t)1 << (RTNLGRP_TC - 1)));
1724+
/* Dataplane inbound: link, neighbor, address, netconf, TC events */
1725+
dplane_groups = RTMGRP_LINK | RTMGRP_NEIGH | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR |
1726+
FRR_NLGRP_BIT(RTNLGRP_IPV4_NETCONF) | FRR_NLGRP_BIT(RTNLGRP_IPV6_NETCONF) |
1727+
FRR_NLGRP_BIT(RTNLGRP_MPLS_NETCONF) | FRR_NLGRP_BIT(RTNLGRP_TC);
16511728

1652-
/* Use setsockopt for > 31 group */
1729+
/* Extended group: bit position >= 32, requires setsockopt */
16531730
ext_groups = RTNLGRP_TUNNEL;
16541731

1655-
snprintf(zns->netlink.name, sizeof(zns->netlink.name),
1656-
"netlink-listen (NS %u)", zns->ns_id);
1657-
zns->netlink.sock = -1;
1658-
if (netlink_socket(&zns->netlink, groups, &ext_groups, 1, zns->ns_id,
1659-
NETLINK_ROUTE) < 0) {
1660-
flog_err(EC_LIB_SOCKET, "Failure to create %s socket", zns->netlink.name);
1661-
frr_exit_with_buffer_flush(-1);
1662-
}
1663-
1664-
kernel_netlink_nlsock_insert(&zns->netlink);
1732+
/* ----------------------------------------------------------------
1733+
* Create netlink sockets. The first four are critical (fatal on
1734+
* failure). The generic netlink socket is optional (warn-only).
1735+
* ----------------------------------------------------------------
1736+
*/
16651737

1666-
snprintf(zns->netlink_cmd.name, sizeof(zns->netlink_cmd.name),
1667-
"netlink-cmd (NS %u)", zns->ns_id);
1668-
zns->netlink_cmd.sock = -1;
1669-
if (netlink_socket(&zns->netlink_cmd, 0, 0, 0, zns->ns_id,
1670-
NETLINK_ROUTE) < 0) {
1671-
flog_err(EC_LIB_SOCKET, "Failure to create %s socket", zns->netlink_cmd.name);
1738+
if (kernel_init_nlsock(&zns->netlink, "netlink-listen", groups, &ext_groups, 1, zns->ns_id,
1739+
NETLINK_ROUTE, false) < 0)
16721740
frr_exit_with_buffer_flush(-1);
1673-
}
16741741

1675-
kernel_netlink_nlsock_insert(&zns->netlink_cmd);
1676-
1677-
/* Outbound socket for dplane programming of the host OS. */
1678-
snprintf(zns->netlink_dplane_out.name,
1679-
sizeof(zns->netlink_dplane_out.name), "netlink-dp (NS %u)",
1680-
zns->ns_id);
1681-
zns->netlink_dplane_out.sock = -1;
1682-
if (netlink_socket(&zns->netlink_dplane_out, 0, 0, 0, zns->ns_id,
1683-
NETLINK_ROUTE) < 0) {
1684-
flog_err(EC_LIB_SOCKET, "Failure to create %s socket",
1685-
zns->netlink_dplane_out.name);
1742+
if (kernel_init_nlsock(&zns->netlink_cmd, "netlink-cmd", 0, NULL, 0, zns->ns_id,
1743+
NETLINK_ROUTE, false) < 0)
16861744
frr_exit_with_buffer_flush(-1);
1687-
}
16881745

1689-
kernel_netlink_nlsock_insert(&zns->netlink_dplane_out);
1690-
1691-
/* Inbound socket for OS events coming to the dplane. */
1692-
snprintf(zns->netlink_dplane_in.name,
1693-
sizeof(zns->netlink_dplane_in.name), "netlink-dp-in (NS %u)",
1694-
zns->ns_id);
1695-
zns->netlink_dplane_in.sock = -1;
1696-
if (netlink_socket(&zns->netlink_dplane_in, dplane_groups, 0, 0,
1697-
zns->ns_id, NETLINK_ROUTE) < 0) {
1698-
flog_err(EC_LIB_SOCKET, "Failure to create %s socket", zns->netlink_dplane_in.name);
1746+
if (kernel_init_nlsock(&zns->netlink_dplane_out, "netlink-dp", 0, NULL, 0, zns->ns_id,
1747+
NETLINK_ROUTE, false) < 0)
16991748
frr_exit_with_buffer_flush(-1);
1700-
}
1701-
1702-
kernel_netlink_nlsock_insert(&zns->netlink_dplane_in);
17031749

1704-
/* Generic Netlink socket. */
1705-
snprintf(zns->ge_netlink_cmd.name, sizeof(zns->ge_netlink_cmd.name),
1706-
"generic-netlink-cmd (NS %u)", zns->ns_id);
1707-
zns->ge_netlink_cmd.sock = -1;
1708-
if (netlink_socket(&zns->ge_netlink_cmd, 0, 0, 0, zns->ns_id,
1709-
NETLINK_GENERIC) < 0) {
1710-
zlog_warn("Failure to create %s socket",
1711-
zns->ge_netlink_cmd.name);
1712-
}
1750+
if (kernel_init_nlsock(&zns->netlink_dplane_in, "netlink-dp-in", dplane_groups, NULL, 0,
1751+
zns->ns_id, NETLINK_ROUTE, false) < 0)
1752+
frr_exit_with_buffer_flush(-1);
17131753

1714-
if (zns->ge_netlink_cmd.sock >= 0)
1715-
kernel_netlink_nlsock_insert(&zns->ge_netlink_cmd);
1754+
/* Generic netlink — non-fatal on failure */
1755+
kernel_init_nlsock(&zns->ge_netlink_cmd, "generic-netlink-cmd", 0, NULL, 0, zns->ns_id,
1756+
NETLINK_GENERIC, true);
17161757

1717-
/*
1718-
* SOL_NETLINK is not available on all platforms yet
1719-
* apparently. It's in bits/socket.h which I am not
1720-
* sure that we want to pull into our build system.
1758+
/* ----------------------------------------------------------------
1759+
* Platform-specific socket options (SOL_NETLINK).
1760+
* ----------------------------------------------------------------
17211761
*/
17221762
#if defined SOL_NETLINK
17231763

1724-
/*
1725-
* setsockopt multicast group subscriptions that don't fit in nl_groups
1726-
*/
1764+
/* Subscribe dplane inbound to BRVLAN group (bit >= 32) */
17271765
grp = RTNLGRP_BRVLAN;
17281766
ret = setsockopt(zns->netlink_dplane_in.sock, SOL_NETLINK,
17291767
NETLINK_ADD_MEMBERSHIP, &grp, sizeof(grp));
1730-
17311768
if (ret < 0)
1732-
zlog_notice(
1733-
"Registration for RTNLGRP_BRVLAN Membership failed : %d %s",
1734-
errno, safe_strerror(errno));
1735-
/*
1736-
* Let's tell the kernel that we want to receive extended
1737-
* ACKS over our command socket(s)
1738-
*/
1739-
one = 1;
1740-
ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK,
1741-
&one, sizeof(one));
1769+
zlog_notice("Registration for RTNLGRP_BRVLAN Membership failed: %d %s", errno,
1770+
safe_strerror(errno));
17421771

1743-
if (ret < 0)
1744-
zlog_notice("Registration for extended cmd ACK failed : %d %s",
1745-
errno, safe_strerror(errno));
1746-
1747-
one = 1;
1748-
ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK,
1749-
NETLINK_EXT_ACK, &one, sizeof(one));
1750-
1751-
if (ret < 0)
1752-
zlog_notice("Registration for extended dp ACK failed : %d %s",
1753-
errno, safe_strerror(errno));
1772+
/* Enable extended ACK on command and dplane output sockets */
1773+
netlink_enable_ext_ack(zns->netlink_cmd.sock, "cmd");
1774+
netlink_enable_ext_ack(zns->netlink_dplane_out.sock, "dp");
17541775

1776+
/* Enable extended ACK on generic netlink socket (uses flog_err
1777+
* per original behavior — protocol-level failures are significant).
1778+
*/
17551779
if (zns->ge_netlink_cmd.sock >= 0) {
1756-
one = 1;
1780+
int one = 1;
1781+
17571782
ret = setsockopt(zns->ge_netlink_cmd.sock, SOL_NETLINK,
17581783
NETLINK_EXT_ACK, &one, sizeof(one));
17591784
if (ret < 0)
17601785
flog_err(EC_ZEBRA_NETLINK_EXT_ACK_FAILED,
1761-
"Registration for extended generic netlink cmd ACK failed : %d %s",
1786+
"Registration for extended generic netlink cmd ACK failed: %d %s",
17621787
errno, safe_strerror(errno));
17631788
}
17641789

1765-
/*
1766-
* Trim off the payload of the original netlink message in the
1767-
* acknowledgment. This option is available since Linux 4.2, so if
1768-
* setsockopt fails, ignore the error.
1769-
*/
1770-
one = 1;
1771-
ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK,
1772-
NETLINK_CAP_ACK, &one, sizeof(one));
1773-
if (ret < 0)
1774-
zlog_notice(
1775-
"Registration for reduced ACK packet size failed, probably running an early kernel");
1776-
#endif
1777-
1778-
/* Register kernel socket. */
1779-
if (fcntl(zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0)
1780-
flog_err_sys(EC_LIB_SOCKET, "Can't set %s socket flags: %s",
1781-
zns->netlink.name, safe_strerror(errno));
1790+
/* Enable capped ACK to reduce ACK payload size (Linux 4.2+) */
1791+
{
1792+
int one = 1;
17821793

1783-
if (fcntl(zns->netlink_cmd.sock, F_SETFL, O_NONBLOCK) < 0)
1784-
flog_err(EC_LIB_SOCKET, "Can't set %s socket error: %s(%d)", zns->netlink_cmd.name,
1785-
safe_strerror(errno), errno);
1786-
1787-
if (fcntl(zns->netlink_dplane_out.sock, F_SETFL, O_NONBLOCK) < 0)
1788-
flog_err(EC_LIB_SOCKET, "Can't set %s socket error: %s(%d)",
1789-
zns->netlink_dplane_out.name, safe_strerror(errno), errno);
1794+
ret = setsockopt(zns->netlink_dplane_out.sock, SOL_NETLINK, NETLINK_CAP_ACK, &one,
1795+
sizeof(one));
1796+
if (ret < 0)
1797+
zlog_notice(
1798+
"Registration for reduced ACK packet size failed, probably running an early kernel");
1799+
}
1800+
#endif /* SOL_NETLINK */
17901801

1791-
if (fcntl(zns->netlink_dplane_in.sock, F_SETFL, O_NONBLOCK) < 0)
1792-
flog_err(EC_LIB_SOCKET, "Can't set %s socket error: %s(%d)",
1793-
zns->netlink_dplane_in.name, safe_strerror(errno), errno);
1802+
/* ----------------------------------------------------------------
1803+
* Set all sockets to non-blocking mode for event loop integration.
1804+
* ----------------------------------------------------------------
1805+
*/
1806+
netlink_set_nonblock(&zns->netlink);
1807+
netlink_set_nonblock(&zns->netlink_cmd);
1808+
netlink_set_nonblock(&zns->netlink_dplane_out);
1809+
netlink_set_nonblock(&zns->netlink_dplane_in);
17941810

1795-
if (zns->ge_netlink_cmd.sock >= 0) {
1796-
if (fcntl(zns->ge_netlink_cmd.sock, F_SETFL, O_NONBLOCK) < 0)
1797-
flog_err(EC_LIB_SOCKET, "Can't set %s socket error: %s(%d)",
1798-
zns->ge_netlink_cmd.name, safe_strerror(errno), errno);
1799-
}
1811+
if (zns->ge_netlink_cmd.sock >= 0)
1812+
netlink_set_nonblock(&zns->ge_netlink_cmd);
18001813

1801-
/* Set receive buffer size if it's set from command line */
1814+
/* ----------------------------------------------------------------
1815+
* Configure receive buffer sizes if specified via CLI.
1816+
* Larger buffers prevent message loss during high-volume bursts.
1817+
* ----------------------------------------------------------------
1818+
*/
18021819
if (rcvbufsize) {
18031820
netlink_recvbuf(&zns->netlink, rcvbufsize);
18041821
netlink_recvbuf(&zns->netlink_cmd, rcvbufsize);
@@ -1809,23 +1826,26 @@ void kernel_init(struct zebra_ns *zns)
18091826
netlink_recvbuf(&zns->ge_netlink_cmd, rcvbufsize);
18101827
}
18111828

1812-
/* Set filter for inbound sockets, to exclude events we've generated
1813-
* ourselves.
1829+
/* ----------------------------------------------------------------
1830+
* Install BPF filters on inbound sockets to suppress self-generated
1831+
* echo messages. Allows through: RTM_NEWADDR, RTM_DELADDR,
1832+
* RTM_NEWNETCONF, RTM_DELNETCONF (these must be processed
1833+
* regardless of origin to keep state in sync).
1834+
* ----------------------------------------------------------------
18141835
*/
18151836
netlink_install_filter(zns->netlink.sock, zns->netlink_cmd.snl.nl_pid,
18161837
zns->netlink_dplane_out.snl.nl_pid);
1817-
18181838
netlink_install_filter(zns->netlink_dplane_in.sock,
18191839
zns->netlink_cmd.snl.nl_pid,
18201840
zns->netlink_dplane_out.snl.nl_pid);
18211841

1842+
/* Register main netlink socket with the event loop */
18221843
zns->t_netlink = NULL;
1823-
18241844
event_add_read(zrouter.master, kernel_read, zns, zns->netlink.sock,
18251845
&zns->t_netlink);
18261846

1847+
/* Initialize route and generic netlink subsystems */
18271848
rt_netlink_init();
1828-
18291849
ge_netlink_init(zns);
18301850
}
18311851

0 commit comments

Comments
 (0)