nat64: Handle ICMP rewriting

Add rewriting of ICMP headers to nat64. This is specified in RFC6145, and
the implementation here follows that. The support is only partial, in
particular, in that the payload of ICMP error messages is not rewritten,
even though the RFC specifies that they should be.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
This commit is contained in:
Toke Høiland-Jørgensen
2021-10-04 23:53:16 +02:00
parent ced3d8c4bc
commit a5313d2f1b

View File

@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/compiler.h>
#include <linux/pkt_sched.h>
#include <linux/pkt_cls.h>
#include <stdbool.h>
@@ -56,6 +57,173 @@ struct {
#define DBG
#endif
struct icmpv6_pseudo {
struct in6_addr saddr;
struct in6_addr daddr;
__u32 len;
__u8 padding[3];
__u8 nh;
} __attribute__((packed));
static __always_inline void update_icmp_checksum(struct __sk_buff *skb,
struct ipv6hdr *ip6h,
void *icmp_before,
void *icmp_after,
bool add)
{
void *data = (void *)(unsigned long long)skb->data;
struct icmpv6_pseudo ph = {
.nh = IPPROTO_ICMPV6,
.saddr = ip6h->saddr,
.daddr = ip6h->daddr,
.len = ip6h->payload_len
};
__u16 h_before, h_after, offset;
__u32 csum, u_before, u_after;
/* Do checksum update in two passes: first compute the incremental
* checksum update of the ICMPv6 pseudo header, update the checksum
* using bpf_l4_csum_replace(), and then do a separate update for the
* ICMP type and code (which is two consecutive bytes, so cast them to
* u16). The bpf_csum_diff() helper can be used to compute the
* incremental update of the full block, whereas the
* bpf_l4_csum_replace() helper can do the two-byte diff and update by
* itself.
*/
csum = bpf_csum_diff((__be32 *)&ph, add ? 0 : sizeof(ph),
(__be32 *)&ph, add ? sizeof(ph) : 0,
0);
offset = ((void *)icmp_after - data) + 2;
/* first two bytes of ICMP header, type and code */
h_before = *(__u16 *)icmp_before;
h_after = *(__u16 *)icmp_after;
/* last four bytes of ICMP header, the data union */
u_before = *(__u32 *)(icmp_before + 4);
u_after = *(__u32 *)(icmp_after + 4);
bpf_l4_csum_replace(skb, offset, 0, csum, BPF_F_PSEUDO_HDR);
bpf_l4_csum_replace(skb, offset, h_before, h_after, 2);
if (u_before != u_after)
bpf_l4_csum_replace(skb, offset, u_before, u_after, 4);
}
static int rewrite_icmp(struct iphdr *iph, struct ipv6hdr *ip6h, struct __sk_buff *skb)
{
void *data_end = (void *)(unsigned long long)skb->data_end;
struct icmphdr old_icmp, *icmp = (void *)(iph + 1);
struct icmp6hdr icmp6, *new_icmp6;
__u32 mtu;
if (icmp + 1 > data_end)
return -1;
old_icmp = *icmp;
new_icmp6 = (void *)icmp;
icmp6 = *new_icmp6;
/* These translations are defined in RFC6145 section 4.2 */
switch (icmp->type) {
case ICMP_ECHO:
icmp6.icmp6_type = ICMPV6_ECHO_REQUEST;
break;
case ICMP_ECHOREPLY:
icmp6.icmp6_type = ICMPV6_ECHO_REPLY;
break;
case ICMP_DEST_UNREACH:
icmp6.icmp6_type = ICMPV6_DEST_UNREACH;
switch(icmp->code) {
case ICMP_NET_UNREACH:
case ICMP_HOST_UNREACH:
case ICMP_SR_FAILED:
case ICMP_NET_UNKNOWN:
case ICMP_HOST_UNKNOWN:
case ICMP_HOST_ISOLATED:
case ICMP_NET_UNR_TOS:
case ICMP_HOST_UNR_TOS:
icmp6.icmp6_code = ICMPV6_NOROUTE;
break;
case ICMP_PROT_UNREACH:
icmp6.icmp6_type = ICMPV6_PARAMPROB;
icmp6.icmp6_code = ICMPV6_UNK_NEXTHDR;
icmp6.icmp6_pointer = bpf_htonl(offsetof(struct ipv6hdr, nexthdr));
case ICMP_PORT_UNREACH:
icmp6.icmp6_code = ICMPV6_PORT_UNREACH;
break;
case ICMP_FRAG_NEEDED:
icmp6.icmp6_type = ICMPV6_PKT_TOOBIG;
icmp6.icmp6_code = 0;
mtu = bpf_ntohs(icmp->un.frag.mtu) + 20;
/* RFC6145 section 6, "second approach" - should not be
* necessary, but might as well do this
*/
if (mtu < 1280)
mtu = 1280;
icmp6.icmp6_mtu = bpf_htonl(mtu);
case ICMP_NET_ANO:
case ICMP_HOST_ANO:
case ICMP_PKT_FILTERED:
case ICMP_PREC_CUTOFF:
icmp6.icmp6_code = ICMPV6_ADM_PROHIBITED;
default:
return -1;
}
break;
case ICMP_PARAMETERPROB:
if (icmp->code == 1)
return -1;
icmp6.icmp6_type = ICMPV6_PARAMPROB;
icmp6.icmp6_code = ICMPV6_HDR_FIELD;
/* The pointer field not defined in the Linux header. This
* translation is from Figure 3 of RFC6145.
*/
switch (icmp->un.reserved[0]) {
case 0: /* version/IHL */
icmp6.icmp6_pointer = 0;
break;
case 1: /* Type of Service */
icmp6.icmp6_pointer = bpf_htonl(1);
break;
case 2: /* Total length */
case 3:
icmp6.icmp6_pointer = bpf_htonl(4);
break;
case 8: /* Time to Live */
icmp6.icmp6_pointer = bpf_htonl(7);
break;
case 9: /* Protocol */
icmp6.icmp6_pointer = bpf_htonl(6);
break;
case 12: /* Source address */
case 13:
case 14:
case 15:
icmp6.icmp6_pointer = bpf_htonl(8);
break;
case 16: /* Destination address */
case 17:
case 18:
case 19:
icmp6.icmp6_pointer = bpf_htonl(24);
break;
default:
return -1;
}
default:
return -1;
}
*new_icmp6 = icmp6;
update_icmp_checksum(skb, ip6h, &old_icmp, new_icmp6, true);
/* FIXME: also need to rewrite IP header embedded in ICMP error */
return 0;
}
static int nat64_handle_v4(struct __sk_buff *skb, struct hdr_cursor *nh)
{
@@ -125,6 +293,12 @@ static int nat64_handle_v4(struct __sk_buff *skb, struct hdr_cursor *nh)
dst_hdr.flow_lbl[0] = iph->tos << 4;
dst_hdr.payload_len = bpf_htons(bpf_ntohs(iph->tot_len) - iphdr_len);
if (dst_hdr.nexthdr == IPPROTO_ICMP) {
if (rewrite_icmp(iph, &dst_hdr, skb))
goto out;
dst_hdr.nexthdr = IPPROTO_ICMPV6;
}
if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
goto out;
@@ -245,6 +419,104 @@ static __always_inline __u16 csum_fold_helper(__u32 csum)
return ~sum;
}
static int rewrite_icmpv6(struct ipv6hdr *ip6h, struct __sk_buff *skb)
{
void *data_end = (void *)(unsigned long long)skb->data_end;
struct icmp6hdr old_icmp6, *icmp6 = (void *)(ip6h + 1);
struct icmphdr icmp, *new_icmp;
__u32 mtu, ptr;
if (icmp6 + 1 > data_end)
return -1;
old_icmp6 = *icmp6;
new_icmp = (void *)icmp6;
icmp = *new_icmp;
/* These translations are defined in RFC6145 section 5.2 */
switch (icmp6->icmp6_type) {
case ICMPV6_ECHO_REQUEST:
icmp.type = ICMP_ECHO;
break;
case ICMPV6_ECHO_REPLY:
icmp.type = ICMP_ECHOREPLY;
break;
case ICMPV6_DEST_UNREACH:
icmp.type = ICMP_DEST_UNREACH;
switch(icmp6->icmp6_code) {
case ICMPV6_NOROUTE:
case ICMPV6_NOT_NEIGHBOUR:
case ICMPV6_ADDR_UNREACH:
icmp.code = ICMP_HOST_UNREACH;
break;
case ICMPV6_ADM_PROHIBITED:
icmp.code = ICMP_HOST_ANO;
break;
case ICMPV6_PORT_UNREACH:
icmp.code = ICMP_PORT_UNREACH;
break;
default:
return -1;
}
break;
case ICMPV6_PKT_TOOBIG:
icmp.type = ICMP_DEST_UNREACH;
icmp.code = ICMP_FRAG_NEEDED;
mtu = bpf_htonl(icmp6->icmp6_mtu) - 20;
if (mtu > 0xffff)
return -1;
icmp.un.frag.mtu = bpf_htons(mtu);
break;
case ICMPV6_TIME_EXCEED:
icmp.type = ICMP_TIME_EXCEEDED;
break;
case ICMPV6_PARAMPROB:
switch (icmp6->icmp6_code) {
case 0:
icmp.type = ICMP_PARAMETERPROB;
icmp.code = 0;
break;
case 1:
icmp.type = ICMP_DEST_UNREACH;
icmp.code = ICMP_PROT_UNREACH;
ptr = bpf_ntohl(icmp6->icmp6_pointer);
/* Figure 6 in RFC6145 - using if statements b/c of
* range at the bottom
*/
if (ptr == 0 || ptr == 1)
icmp.un.reserved[0] = ptr;
else if (ptr == 4 || ptr == 5)
icmp.un.reserved[0] = 2;
else if (ptr == 6)
icmp.un.reserved[0] = 9;
else if (ptr == 7)
icmp.un.reserved[0] = 8;
else if (ptr >= 8 && ptr <= 23)
icmp.un.reserved[0] = 12;
else if (ptr >= 24 && ptr <= 39)
icmp.un.reserved[0] = 16;
else
return -1;
break;
default:
return -1;
}
break;
default:
return -1;
}
*new_icmp = icmp;
update_icmp_checksum(skb, ip6h, &old_icmp6, new_icmp, false);
/* FIXME: also need to rewrite IP header embedded in ICMP error */
return 0;
}
static int nat64_handle_v6(struct __sk_buff *skb, struct hdr_cursor *nh)
{
void *data_end = (void *)(unsigned long long)skb->data_end;
@@ -339,6 +611,13 @@ static int nat64_handle_v6(struct __sk_buff *skb, struct hdr_cursor *nh)
dst_hdr.ttl = ip6h->hop_limit;
dst_hdr.tos = ip6h->priority << 4 | (ip6h->flow_lbl[0] >> 4);
dst_hdr.tot_len = bpf_htons(bpf_ntohs(ip6h->payload_len) + sizeof(dst_hdr));
if (dst_hdr.protocol == IPPROTO_ICMPV6) {
if (rewrite_icmpv6(ip6h, skb))
goto out;
dst_hdr.protocol = IPPROTO_ICMP;
}
dst_hdr.check = csum_fold_helper(bpf_csum_diff((__be32 *)&dst_hdr, 0,
(__be32 *)&dst_hdr, sizeof(dst_hdr),
0));