1
0
mirror of https://gitlab.labs.nic.cz/labs/bird.git synced 2024-05-11 16:54:54 +00:00

Many changes in I/O and OSPF sockets and packet handling.

I/O:
 - BSD: specify src addr on IP sockets by IP_HDRINCL
 - BSD: specify src addr on UDP sockets by IP_SENDSRCADDR
 - Linux: specify src addr on IP/UDP sockets by IP_PKTINFO
 - IPv6: specify src addr on IP/UDP sockets by IPV6_PKTINFO
 - Alternative SKF_BIND flag for binding to IP address
 - Allows IP/UDP sockets without tx_hook, on these
   sockets a packet is discarded when TX queue is full
 - Use consistently SOL_ for socket layer values.

OSPF:
 - Packet src addr is always explicitly set
 - Support for secondary addresses in BSD
 - Dynamic RX/TX buffers
 - Fixes some minor buffer overruns
 - Interface option 'tx length'
 - Names for vlink pseudoifaces (vlinkX)
 - Vlinks use separate socket for TX
 - Vlinks do not use fixed associated iface
 - Fixes TTL for direct unicast packets
 - Fixes DONTROUTE for OSPF sockets
 - Use ifa->ifname instead of ifa->iface->name
This commit is contained in:
Ondrej Zajicek
2014-02-06 17:46:01 +01:00
parent f48fa14214
commit 48e5f32db6
26 changed files with 701 additions and 449 deletions

View File

@@ -23,6 +23,8 @@
#include <fcntl.h>
#include <errno.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <netinet/icmp6.h>
#include "nest/bird.h"
@@ -489,6 +491,11 @@ tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
#define SOL_IPV6 IPPROTO_IPV6
#endif
#ifndef SOL_ICMPV6
#define SOL_ICMPV6 IPPROTO_ICMPV6
#endif
static list sock_list;
static struct birdsock *current_sock;
static struct birdsock *stored_sock;
@@ -552,6 +559,43 @@ sk_free(resource *r)
}
}
void
sk_set_rbsize(sock *s, uint val)
{
ASSERT(s->rbuf_alloc == s->rbuf);
if (s->rbsize == val)
return;
s->rbsize = val;
xfree(s->rbuf_alloc);
s->rbuf_alloc = xmalloc(val);
s->rpos = s->rbuf = s->rbuf_alloc;
}
void
sk_set_tbsize(sock *s, uint val)
{
ASSERT(s->tbuf_alloc == s->tbuf);
if (s->tbsize == val)
return;
byte *old_tbuf = s->tbuf;
s->tbsize = val;
s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
s->tpos = s->tbuf + (s->tpos - old_tbuf);
s->ttx = s->tbuf + (s->ttx - old_tbuf);
}
void
sk_set_tbuf(sock *s, void *tbuf)
{
s->tbuf = tbuf ?: s->tbuf_alloc;
s->ttx = s->tpos = s->tbuf;
}
void
sk_reallocate(sock *s)
{
@@ -703,11 +747,11 @@ sysio_register_cmsgs(sock *s)
int ok = 1;
if ((s->flags & SKF_LADDR_RX) &&
(setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0))
(setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0))
return "IPV6_RECVPKTINFO";
if ((s->flags & SKF_TTL_RX) &&
(setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0))
(setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0))
return "IPV6_RECVHOPLIMIT";
return NULL;
@@ -722,10 +766,10 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
{
if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_PKTINFO)
pi = (struct in6_pktinfo *) CMSG_DATA(cm);
if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT)
if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT)
hlim = (int *) CMSG_DATA(cm);
}
@@ -749,32 +793,27 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg)
return;
}
/*
static void
sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
{
struct cmsghdr *cm;
struct in6_pktinfo *pi;
if (!(s->flags & SKF_LADDR_TX))
return;
msg->msg_control = cbuf;
msg->msg_controllen = cbuflen;
cm = CMSG_FIRSTHDR(msg);
cm->cmsg_level = IPPROTO_IPV6;
cm->cmsg_level = SOL_IPV6;
cm->cmsg_type = IPV6_PKTINFO;
cm->cmsg_len = CMSG_LEN(sizeof(*pi));
pi = (struct in6_pktinfo *) CMSG_DATA(cm);
set_inaddr(&pi->ipi6_addr, s->saddr);
pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
set_inaddr(&pi->ipi6_addr, s->saddr);
msg->msg_controllen = cm->cmsg_len;
return;
}
*/
#endif
static char *
@@ -786,11 +825,6 @@ sk_set_ttl_int(sock *s)
#else
if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0)
return "IP_TTL";
#ifdef CONFIG_UNIX_DONTROUTE
int one = 1;
if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
return "SO_DONTROUTE";
#endif
#endif
return NULL;
}
@@ -801,6 +835,7 @@ sk_set_ttl_int(sock *s)
static char *
sk_setup(sock *s)
{
int one = 1;
int fd = s->fd;
char *err = NULL;
@@ -809,6 +844,41 @@ sk_setup(sock *s)
if (s->type == SK_UNIX)
return NULL;
if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
s->flags |= SKF_PKTINFO;
#ifdef CONFIG_USE_HDRINCL
if ((s->type == SK_IP) && (s->flags & SKF_PKTINFO))
{
s->flags &= ~SKF_PKTINFO;
s->flags |= SKF_HDRINCL;
if (setsockopt(fd, SOL_IP, IP_HDRINCL, &one, sizeof(one)) < 0)
ERR("IP_HDRINCL");
}
#endif
if (s->iface)
{
#ifdef SO_BINDTODEVICE
struct ifreq ifr;
strcpy(ifr.ifr_name, s->iface->name);
if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
ERR("SO_BINDTODEVICE");
#endif
#ifdef CONFIG_UNIX_DONTROUTE
if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0)
ERR("SO_DONTROUTE");
#endif
}
if ((s->ttl >= 0) && (err = sk_set_ttl_int(s)))
goto bad;
if (err = sysio_register_cmsgs(s))
goto bad;
#ifdef IPV6
if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0)
WARN("IPV6_TCLASS");
@@ -821,15 +891,10 @@ sk_setup(sock *s)
sk_set_priority(s, s->priority);
#ifdef IPV6
int v = 1;
if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)
if ((s->flags & SKF_V6ONLY) && setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0)
WARN("IPV6_V6ONLY");
#endif
if ((s->ttl >= 0) && (err = sk_set_ttl_int(s)))
goto bad;
err = sysio_register_cmsgs(s);
bad:
return err;
}
@@ -926,7 +991,7 @@ sk_set_broadcast(sock *s, int enable)
int
sk_set_ipv6_checksum(sock *s, int offset)
{
if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
{
log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m");
return -1;
@@ -945,7 +1010,7 @@ sk_set_icmp_filter(sock *s, int p1, int p2)
ICMP6_FILTER_SETPASS(p1, &f);
ICMP6_FILTER_SETPASS(p2, &f);
if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
{
log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m");
return -1;
@@ -961,7 +1026,7 @@ sk_setup_multicast(sock *s)
int zero = 0;
int index;
ASSERT(s->iface && s->iface->addr);
ASSERT(s->iface);
index = s->iface->index;
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0)
@@ -971,9 +1036,6 @@ sk_setup_multicast(sock *s)
if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
ERR("IPV6_MULTICAST_IF");
if (err = sysio_bind_to_iface(s))
goto bad;
return 0;
bad:
@@ -981,18 +1043,17 @@ bad:
return -1;
}
#ifdef CONFIG_IPV6_GLIBC_20
#define ipv6mr_interface ipv6mr_ifindex
#endif
int
sk_join_group(sock *s, ip_addr maddr)
{
struct ipv6_mreq mreq;
set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
#ifdef CONFIG_IPV6_GLIBC_20
mreq.ipv6mr_ifindex = s->iface->index;
#else
mreq.ipv6mr_interface = s->iface->index;
#endif
if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0)
{
@@ -1009,12 +1070,7 @@ sk_leave_group(sock *s, ip_addr maddr)
struct ipv6_mreq mreq;
set_inaddr(&mreq.ipv6mr_multiaddr, maddr);
#ifdef CONFIG_IPV6_GLIBC_20
mreq.ipv6mr_ifindex = s->iface->index;
#else
mreq.ipv6mr_interface = s->iface->index;
#endif
if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0)
{
@@ -1032,7 +1088,7 @@ sk_setup_multicast(sock *s)
{
char *err;
ASSERT(s->iface && s->iface->addr);
ASSERT(s->iface);
if (err = sysio_setup_multicast(s))
{
@@ -1142,31 +1198,45 @@ int
sk_open(sock *s)
{
int fd;
sockaddr sa;
int one = 1;
int type = s->type;
int has_src = ipa_nonzero(s->saddr) || s->sport;
int do_bind = 0;
int bind_port = 0;
ip_addr bind_addr = IPA_NONE;
sockaddr sa;
char *err;
switch (type)
switch (s->type)
{
case SK_TCP_ACTIVE:
s->ttx = ""; /* Force s->ttx != s->tpos */
/* Fall thru */
case SK_TCP_PASSIVE:
fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP);
bind_port = s->sport;
bind_addr = s->saddr;
do_bind = bind_port || ipa_nonzero(bind_addr);
break;
case SK_UDP:
fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP);
bind_port = s->sport;
bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
do_bind = 1;
break;
case SK_IP:
fd = socket(BIRD_PF, SOCK_RAW, s->dport);
bind_port = 0;
bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
do_bind = ipa_nonzero(bind_addr);
break;
case SK_MAGIC:
fd = s->fd;
break;
default:
bug("sk_open() called for invalid sock type %d", type);
bug("sk_open() called for invalid sock type %d", s->type);
}
if (fd < 0)
die("sk_open: socket: %m");
@@ -1175,31 +1245,28 @@ sk_open(sock *s)
if (err = sk_setup(s))
goto bad;
if (has_src)
if (do_bind)
{
int port;
if (type == SK_IP)
port = 0;
else
if (bind_port)
{
port = s->sport;
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
ERR("SO_REUSEADDR");
#ifdef CONFIG_NO_IFACE_BIND
/* Workaround missing ability to bind to an iface */
if ((type == SK_UDP) && s->iface && ipa_zero(s->saddr))
if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
{
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
ERR("SO_REUSEPORT");
}
#endif
}
fill_in_sockaddr(&sa, s->saddr, s->iface, port);
fill_in_sockaddr(&sa, bind_addr, s->iface, bind_port);
if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
ERR("bind");
}
fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
if (s->password)
@@ -1209,7 +1276,7 @@ sk_open(sock *s)
goto bad_no_log;
}
switch (type)
switch (s->type)
{
case SK_TCP_ACTIVE:
if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0)
@@ -1287,6 +1354,79 @@ sk_open_unix(sock *s, char *name)
die("Unable to create control socket %s", name);
}
static inline int
sk_sendmsg(sock *s)
{
struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
byte cmsg_buf[CMSG_TX_SPACE];
sockaddr dst;
fill_in_sockaddr(&dst, s->daddr, s->iface, s->dport);
struct msghdr msg = {
.msg_name = &dst,
.msg_namelen = sizeof(dst),
.msg_iov = &iov,
.msg_iovlen = 1
};
#ifdef CONFIG_USE_HDRINCL
byte hdr[20];
struct iovec iov2[2] = { {hdr, 20}, iov };
if (s->flags & SKF_HDRINCL)
{
fill_ip_header(s, hdr, iov.iov_len);
msg.msg_iov = iov2;
msg.msg_iovlen = 2;
}
#endif
if (s->flags & SKF_PKTINFO)
sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
return sendmsg(s->fd, &msg, 0);
}
static inline int
sk_recvmsg(sock *s)
{
struct iovec iov = {s->rbuf, s->rbsize};
byte cmsg_buf[CMSG_RX_SPACE];
sockaddr src;
struct msghdr msg = {
.msg_name = &src,
.msg_namelen = sizeof(src),
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = cmsg_buf,
.msg_controllen = sizeof(cmsg_buf),
.msg_flags = 0
};
int rv = recvmsg(s->fd, &msg, 0);
if (rv < 0)
return rv;
//ifdef IPV4
// if (cf_type == SK_IP)
// rv = ipv4_skip_header(pbuf, rv);
//endif
get_sockaddr(&src, &s->faddr, NULL, &s->fport, 1);
sysio_process_rx_cmsgs(s, &msg);
if (msg.msg_flags & MSG_TRUNC)
s->flags |= SKF_TRUNCATED;
else
s->flags &= ~SKF_TRUNCATED;
return rv;
}
static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
static int
@@ -1323,20 +1463,7 @@ sk_maybe_write(sock *s)
if (s->tbuf == s->tpos)
return 1;
sockaddr sa;
fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport);
struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
// byte cmsg_buf[CMSG_TX_SPACE];
struct msghdr msg = {
.msg_name = &sa,
.msg_namelen = sizeof(sa),
.msg_iov = &iov,
.msg_iovlen = 1};
// sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
e = sendmsg(s->fd, &msg, 0);
e = sk_sendmsg(s);
if (e < 0)
{
@@ -1346,6 +1473,9 @@ sk_maybe_write(sock *s)
s->err_hook(s, errno);
return -1;
}
if (!s->tx_hook)
reset_tx_buffer(s);
return 0;
}
reset_tx_buffer(s);
@@ -1408,12 +1538,15 @@ sk_send(sock *s, unsigned len)
*
* This is a sk_send() replacement for connection-less packet sockets
* which allows destination of the packet to be chosen dynamically.
* Raw IP sockets should use 0 for @port.
*/
int
sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
{
s->daddr = addr;
s->dport = port;
if (port)
s->dport = port;
s->ttx = s->tbuf;
s->tpos = s->tbuf + len;
return sk_maybe_write(s);
@@ -1480,22 +1613,9 @@ sk_read(sock *s)
return s->rx_hook(s, 0);
default:
{
sockaddr sa;
int e;
struct iovec iov = {s->rbuf, s->rbsize};
byte cmsg_buf[CMSG_RX_SPACE];
struct msghdr msg = {
.msg_name = &sa,
.msg_namelen = sizeof(sa),
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = cmsg_buf,
.msg_controllen = sizeof(cmsg_buf),
.msg_flags = 0};
e = recvmsg(s->fd, &msg, 0);
e = sk_recvmsg(s);
if (e < 0)
{
@@ -1503,10 +1623,8 @@ sk_read(sock *s)
s->err_hook(s, errno);
return 0;
}
s->rpos = s->rbuf + e;
get_sockaddr(&sa, &s->faddr, NULL, &s->fport, 1);
sysio_process_rx_cmsgs(s, &msg);
s->rpos = s->rbuf + e;
s->rx_hook(s, e);
return 1;
}