android_kernel_xiaomi_sm8350/drivers/net/gtp.c
Jason A. Donenfeld 9875cb3c09 net: icmp: pass zeroed opts from icmp{,v6}_ndo_send before sending
commit ee576c47db60432c37e54b1e2b43a8ca6d3a8dca upstream.

The icmp{,v6}_send functions make all sorts of use of skb->cb, casting
it with IPCB or IP6CB, assuming the skb to have come directly from the
inet layer. But when the packet comes from the ndo layer, especially
when forwarded, there's no telling what might be in skb->cb at that
point. As a result, the icmp sending code risks reading bogus memory
contents, which can result in nasty stack overflows such as this one
reported by a user:

    panic+0x108/0x2ea
    __stack_chk_fail+0x14/0x20
    __icmp_send+0x5bd/0x5c0
    icmp_ndo_send+0x148/0x160

In icmp_send, skb->cb is cast with IPCB and an ip_options struct is read
from it. The optlen parameter there is of particular note, as it can
induce writes beyond bounds. There are quite a few ways that can happen
in __ip_options_echo. For example:

    // sptr/skb are attacker-controlled skb bytes
    sptr = skb_network_header(skb);
    // dptr/dopt points to stack memory allocated by __icmp_send
    dptr = dopt->__data;
    // sopt is the corrupt skb->cb in question
    if (sopt->rr) {
        optlen  = sptr[sopt->rr+1]; // corrupt skb->cb + skb->data
        soffset = sptr[sopt->rr+2]; // corrupt skb->cb + skb->data
	// this now writes potentially attacker-controlled data, over
	// flowing the stack:
        memcpy(dptr, sptr+sopt->rr, optlen);
    }

In the icmpv6_send case, the story is similar, but not as dire, as only
IP6CB(skb)->iif and IP6CB(skb)->dsthao are used. The dsthao case is
worse than the iif case, but it is passed to ipv6_find_tlv, which does
a bit of bounds checking on the value.

This is easy to simulate by doing a `memset(skb->cb, 0x41,
sizeof(skb->cb));` before calling icmp{,v6}_ndo_send, and it's only by
good fortune and the rarity of icmp sending from that context that we've
avoided reports like this until now. For example, in KASAN:

    BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0xa0e/0x12b0
    Write of size 38 at addr ffff888006f1f80e by task ping/89
    CPU: 2 PID: 89 Comm: ping Not tainted 5.10.0-rc7-debug+ #5
    Call Trace:
     dump_stack+0x9a/0xcc
     print_address_description.constprop.0+0x1a/0x160
     __kasan_report.cold+0x20/0x38
     kasan_report+0x32/0x40
     check_memory_region+0x145/0x1a0
     memcpy+0x39/0x60
     __ip_options_echo+0xa0e/0x12b0
     __icmp_send+0x744/0x1700

Actually, out of the 4 drivers that do this, only gtp zeroed the cb for
the v4 case, while the rest did not. So this commit actually removes the
gtp-specific zeroing, while putting the code where it belongs in the
shared infrastructure of icmp{,v6}_ndo_send.

This commit fixes the issue by passing an empty IPCB or IP6CB along to
the functions that actually do the work. For the icmp_send, this was
already trivial, thanks to __icmp_send providing the plumbing function.
For icmpv6_send, this required a tiny bit of refactoring to make it
behave like the v4 case, after which it was straight forward.

Fixes: a2b78e9b2c ("sunvnet: generate ICMP PTMUD messages for smaller port MTUs")
Reported-by: SinYu <liuxyon@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/netdev/CAF=yD-LOF116aHub6RMe8vB8ZpnrrnoTdqhobEx+bvoA8AsP0w@mail.gmail.com/T/
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Link: https://lore.kernel.org/r/20210223131858.72082-1-Jason@zx2c4.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2021-03-04 10:26:53 +01:00

1418 lines
32 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/* GTP according to GSM TS 09.60 / 3GPP TS 29.060
*
* (C) 2012-2014 by sysmocom - s.f.m.c. GmbH
* (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org>
*
* Author: Harald Welte <hwelte@sysmocom.de>
* Pablo Neira Ayuso <pablo@netfilter.org>
* Andreas Schultz <aschultz@travelping.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/udp.h>
#include <linux/rculist.h>
#include <linux/jhash.h>
#include <linux/if_tunnel.h>
#include <linux/net.h>
#include <linux/file.h>
#include <linux/gtp.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/ip.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/genetlink.h>
#include <net/netns/generic.h>
#include <net/gtp.h>
/* An active session for the subscriber. */
struct pdp_ctx {
struct hlist_node hlist_tid;
struct hlist_node hlist_addr;
union {
struct {
u64 tid;
u16 flow;
} v0;
struct {
u32 i_tei;
u32 o_tei;
} v1;
} u;
u8 gtp_version;
u16 af;
struct in_addr ms_addr_ip4;
struct in_addr peer_addr_ip4;
struct sock *sk;
struct net_device *dev;
atomic_t tx_seq;
struct rcu_head rcu_head;
};
/* One instance of the GTP device. */
struct gtp_dev {
struct list_head list;
struct sock *sk0;
struct sock *sk1u;
struct net_device *dev;
unsigned int role;
unsigned int hash_size;
struct hlist_head *tid_hash;
struct hlist_head *addr_hash;
};
static unsigned int gtp_net_id __read_mostly;
struct gtp_net {
struct list_head gtp_dev_list;
};
static u32 gtp_h_initval;
static void pdp_context_delete(struct pdp_ctx *pctx);
static inline u32 gtp0_hashfn(u64 tid)
{
u32 *tid32 = (u32 *) &tid;
return jhash_2words(tid32[0], tid32[1], gtp_h_initval);
}
static inline u32 gtp1u_hashfn(u32 tid)
{
return jhash_1word(tid, gtp_h_initval);
}
static inline u32 ipv4_hashfn(__be32 ip)
{
return jhash_1word((__force u32)ip, gtp_h_initval);
}
/* Resolve a PDP context structure based on the 64bit TID. */
static struct pdp_ctx *gtp0_pdp_find(struct gtp_dev *gtp, u64 tid)
{
struct hlist_head *head;
struct pdp_ctx *pdp;
head = &gtp->tid_hash[gtp0_hashfn(tid) % gtp->hash_size];
hlist_for_each_entry_rcu(pdp, head, hlist_tid) {
if (pdp->gtp_version == GTP_V0 &&
pdp->u.v0.tid == tid)
return pdp;
}
return NULL;
}
/* Resolve a PDP context structure based on the 32bit TEI. */
static struct pdp_ctx *gtp1_pdp_find(struct gtp_dev *gtp, u32 tid)
{
struct hlist_head *head;
struct pdp_ctx *pdp;
head = &gtp->tid_hash[gtp1u_hashfn(tid) % gtp->hash_size];
hlist_for_each_entry_rcu(pdp, head, hlist_tid) {
if (pdp->gtp_version == GTP_V1 &&
pdp->u.v1.i_tei == tid)
return pdp;
}
return NULL;
}
/* Resolve a PDP context based on IPv4 address of MS. */
static struct pdp_ctx *ipv4_pdp_find(struct gtp_dev *gtp, __be32 ms_addr)
{
struct hlist_head *head;
struct pdp_ctx *pdp;
head = &gtp->addr_hash[ipv4_hashfn(ms_addr) % gtp->hash_size];
hlist_for_each_entry_rcu(pdp, head, hlist_addr) {
if (pdp->af == AF_INET &&
pdp->ms_addr_ip4.s_addr == ms_addr)
return pdp;
}
return NULL;
}
static bool gtp_check_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx,
unsigned int hdrlen, unsigned int role)
{
struct iphdr *iph;
if (!pskb_may_pull(skb, hdrlen + sizeof(struct iphdr)))
return false;
iph = (struct iphdr *)(skb->data + hdrlen);
if (role == GTP_ROLE_SGSN)
return iph->daddr == pctx->ms_addr_ip4.s_addr;
else
return iph->saddr == pctx->ms_addr_ip4.s_addr;
}
/* Check if the inner IP address in this packet is assigned to any
* existing mobile subscriber.
*/
static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx,
unsigned int hdrlen, unsigned int role)
{
switch (ntohs(skb->protocol)) {
case ETH_P_IP:
return gtp_check_ms_ipv4(skb, pctx, hdrlen, role);
}
return false;
}
static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb,
unsigned int hdrlen, unsigned int role)
{
struct pcpu_sw_netstats *stats;
if (!gtp_check_ms(skb, pctx, hdrlen, role)) {
netdev_dbg(pctx->dev, "No PDP ctx for this MS\n");
return 1;
}
/* Get rid of the GTP + UDP headers. */
if (iptunnel_pull_header(skb, hdrlen, skb->protocol,
!net_eq(sock_net(pctx->sk), dev_net(pctx->dev))))
return -1;
netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n");
/* Now that the UDP and the GTP header have been removed, set up the
* new network header. This is required by the upper layer to
* calculate the transport header.
*/
skb_reset_network_header(skb);
skb->dev = pctx->dev;
stats = this_cpu_ptr(pctx->dev->tstats);
u64_stats_update_begin(&stats->syncp);
stats->rx_packets++;
stats->rx_bytes += skb->len;
u64_stats_update_end(&stats->syncp);
netif_rx(skb);
return 0;
}
/* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */
static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
{
unsigned int hdrlen = sizeof(struct udphdr) +
sizeof(struct gtp0_header);
struct gtp0_header *gtp0;
struct pdp_ctx *pctx;
if (!pskb_may_pull(skb, hdrlen))
return -1;
gtp0 = (struct gtp0_header *)(skb->data + sizeof(struct udphdr));
if ((gtp0->flags >> 5) != GTP_V0)
return 1;
if (gtp0->type != GTP_TPDU)
return 1;
pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid));
if (!pctx) {
netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
return 1;
}
return gtp_rx(pctx, skb, hdrlen, gtp->role);
}
static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb)
{
unsigned int hdrlen = sizeof(struct udphdr) +
sizeof(struct gtp1_header);
struct gtp1_header *gtp1;
struct pdp_ctx *pctx;
if (!pskb_may_pull(skb, hdrlen))
return -1;
gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
if ((gtp1->flags >> 5) != GTP_V1)
return 1;
if (gtp1->type != GTP_TPDU)
return 1;
/* From 29.060: "This field shall be present if and only if any one or
* more of the S, PN and E flags are set.".
*
* If any of the bit is set, then the remaining ones also have to be
* set.
*/
if (gtp1->flags & GTP1_F_MASK)
hdrlen += 4;
/* Make sure the header is larger enough, including extensions. */
if (!pskb_may_pull(skb, hdrlen))
return -1;
gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr));
pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid));
if (!pctx) {
netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb);
return 1;
}
return gtp_rx(pctx, skb, hdrlen, gtp->role);
}
static void __gtp_encap_destroy(struct sock *sk)
{
struct gtp_dev *gtp;
lock_sock(sk);
gtp = sk->sk_user_data;
if (gtp) {
if (gtp->sk0 == sk)
gtp->sk0 = NULL;
else
gtp->sk1u = NULL;
udp_sk(sk)->encap_type = 0;
rcu_assign_sk_user_data(sk, NULL);
sock_put(sk);
}
release_sock(sk);
}
static void gtp_encap_destroy(struct sock *sk)
{
rtnl_lock();
__gtp_encap_destroy(sk);
rtnl_unlock();
}
static void gtp_encap_disable_sock(struct sock *sk)
{
if (!sk)
return;
__gtp_encap_destroy(sk);
}
static void gtp_encap_disable(struct gtp_dev *gtp)
{
gtp_encap_disable_sock(gtp->sk0);
gtp_encap_disable_sock(gtp->sk1u);
}
/* UDP encapsulation receive handler. See net/ipv4/udp.c.
* Return codes: 0: success, <0: error, >0: pass up to userspace UDP socket.
*/
static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
struct gtp_dev *gtp;
int ret = 0;
gtp = rcu_dereference_sk_user_data(sk);
if (!gtp)
return 1;
netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk);
switch (udp_sk(sk)->encap_type) {
case UDP_ENCAP_GTP0:
netdev_dbg(gtp->dev, "received GTP0 packet\n");
ret = gtp0_udp_encap_recv(gtp, skb);
break;
case UDP_ENCAP_GTP1U:
netdev_dbg(gtp->dev, "received GTP1U packet\n");
ret = gtp1u_udp_encap_recv(gtp, skb);
break;
default:
ret = -1; /* Shouldn't happen. */
}
switch (ret) {
case 1:
netdev_dbg(gtp->dev, "pass up to the process\n");
break;
case 0:
break;
case -1:
netdev_dbg(gtp->dev, "GTP packet has been dropped\n");
kfree_skb(skb);
ret = 0;
break;
}
return ret;
}
static int gtp_dev_init(struct net_device *dev)
{
struct gtp_dev *gtp = netdev_priv(dev);
gtp->dev = dev;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
return 0;
}
static void gtp_dev_uninit(struct net_device *dev)
{
struct gtp_dev *gtp = netdev_priv(dev);
gtp_encap_disable(gtp);
free_percpu(dev->tstats);
}
static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4,
const struct sock *sk,
__be32 daddr)
{
memset(fl4, 0, sizeof(*fl4));
fl4->flowi4_oif = sk->sk_bound_dev_if;
fl4->daddr = daddr;
fl4->saddr = inet_sk(sk)->inet_saddr;
fl4->flowi4_tos = RT_CONN_FLAGS(sk);
fl4->flowi4_proto = sk->sk_protocol;
return ip_route_output_key(sock_net(sk), fl4);
}
static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
{
int payload_len = skb->len;
struct gtp0_header *gtp0;
gtp0 = skb_push(skb, sizeof(*gtp0));
gtp0->flags = 0x1e; /* v0, GTP-non-prime. */
gtp0->type = GTP_TPDU;
gtp0->length = htons(payload_len);
gtp0->seq = htons((atomic_inc_return(&pctx->tx_seq) - 1) % 0xffff);
gtp0->flow = htons(pctx->u.v0.flow);
gtp0->number = 0xff;
gtp0->spare[0] = gtp0->spare[1] = gtp0->spare[2] = 0xff;
gtp0->tid = cpu_to_be64(pctx->u.v0.tid);
}
static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx)
{
int payload_len = skb->len;
struct gtp1_header *gtp1;
gtp1 = skb_push(skb, sizeof(*gtp1));
/* Bits 8 7 6 5 4 3 2 1
* +--+--+--+--+--+--+--+--+
* |version |PT| 0| E| S|PN|
* +--+--+--+--+--+--+--+--+
* 0 0 1 1 1 0 0 0
*/
gtp1->flags = 0x30; /* v1, GTP-non-prime. */
gtp1->type = GTP_TPDU;
gtp1->length = htons(payload_len);
gtp1->tid = htonl(pctx->u.v1.o_tei);
/* TODO: Suppport for extension header, sequence number and N-PDU.
* Update the length field if any of them is available.
*/
}
struct gtp_pktinfo {
struct sock *sk;
struct iphdr *iph;
struct flowi4 fl4;
struct rtable *rt;
struct pdp_ctx *pctx;
struct net_device *dev;
__be16 gtph_port;
};
static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo)
{
switch (pktinfo->pctx->gtp_version) {
case GTP_V0:
pktinfo->gtph_port = htons(GTP0_PORT);
gtp0_push_header(skb, pktinfo->pctx);
break;
case GTP_V1:
pktinfo->gtph_port = htons(GTP1U_PORT);
gtp1_push_header(skb, pktinfo->pctx);
break;
}
}
static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo,
struct sock *sk, struct iphdr *iph,
struct pdp_ctx *pctx, struct rtable *rt,
struct flowi4 *fl4,
struct net_device *dev)
{
pktinfo->sk = sk;
pktinfo->iph = iph;
pktinfo->pctx = pctx;
pktinfo->rt = rt;
pktinfo->fl4 = *fl4;
pktinfo->dev = dev;
}
static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
struct gtp_pktinfo *pktinfo)
{
struct gtp_dev *gtp = netdev_priv(dev);
struct pdp_ctx *pctx;
struct rtable *rt;
struct flowi4 fl4;
struct iphdr *iph;
__be16 df;
int mtu;
/* Read the IP destination address and resolve the PDP context.
* Prepend PDP header with TEI/TID from PDP ctx.
*/
iph = ip_hdr(skb);
if (gtp->role == GTP_ROLE_SGSN)
pctx = ipv4_pdp_find(gtp, iph->saddr);
else
pctx = ipv4_pdp_find(gtp, iph->daddr);
if (!pctx) {
netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n",
&iph->daddr);
return -ENOENT;
}
netdev_dbg(dev, "found PDP context %p\n", pctx);
rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to SSGN %pI4\n",
&pctx->peer_addr_ip4.s_addr);
dev->stats.tx_carrier_errors++;
goto err;
}
if (rt->dst.dev == dev) {
netdev_dbg(dev, "circular route to SSGN %pI4\n",
&pctx->peer_addr_ip4.s_addr);
dev->stats.collisions++;
goto err_rt;
}
skb_dst_drop(skb);
/* This is similar to tnl_update_pmtu(). */
df = iph->frag_off;
if (df) {
mtu = dst_mtu(&rt->dst) - dev->hard_header_len -
sizeof(struct iphdr) - sizeof(struct udphdr);
switch (pctx->gtp_version) {
case GTP_V0:
mtu -= sizeof(struct gtp0_header);
break;
case GTP_V1:
mtu -= sizeof(struct gtp1_header);
break;
}
} else {
mtu = dst_mtu(&rt->dst);
}
rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
mtu < ntohs(iph->tot_len)) {
netdev_dbg(dev, "packet too big, fragmentation needed\n");
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
goto err_rt;
}
gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, iph, pctx, rt, &fl4, dev);
gtp_push_header(skb, pktinfo);
return 0;
err_rt:
ip_rt_put(rt);
err:
return -EBADMSG;
}
static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
unsigned int proto = ntohs(skb->protocol);
struct gtp_pktinfo pktinfo;
int err;
/* Ensure there is sufficient headroom. */
if (skb_cow_head(skb, dev->needed_headroom))
goto tx_err;
skb_reset_inner_headers(skb);
/* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */
rcu_read_lock();
switch (proto) {
case ETH_P_IP:
err = gtp_build_skb_ip4(skb, dev, &pktinfo);
break;
default:
err = -EOPNOTSUPP;
break;
}
rcu_read_unlock();
if (err < 0)
goto tx_err;
switch (proto) {
case ETH_P_IP:
netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI4 dst: %pI4\n",
&pktinfo.iph->saddr, &pktinfo.iph->daddr);
udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb,
pktinfo.fl4.saddr, pktinfo.fl4.daddr,
pktinfo.iph->tos,
ip4_dst_hoplimit(&pktinfo.rt->dst),
0,
pktinfo.gtph_port, pktinfo.gtph_port,
true, false);
break;
}
return NETDEV_TX_OK;
tx_err:
dev->stats.tx_errors++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
static const struct net_device_ops gtp_netdev_ops = {
.ndo_init = gtp_dev_init,
.ndo_uninit = gtp_dev_uninit,
.ndo_start_xmit = gtp_dev_xmit,
.ndo_get_stats64 = ip_tunnel_get_stats64,
};
static void gtp_link_setup(struct net_device *dev)
{
dev->netdev_ops = &gtp_netdev_ops;
dev->needs_free_netdev = true;
dev->hard_header_len = 0;
dev->addr_len = 0;
/* Zero header length. */
dev->type = ARPHRD_NONE;
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
dev->priv_flags |= IFF_NO_QUEUE;
dev->features |= NETIF_F_LLTX;
netif_keep_dst(dev);
/* Assume largest header, ie. GTPv0. */
dev->needed_headroom = LL_MAX_HEADER +
sizeof(struct iphdr) +
sizeof(struct udphdr) +
sizeof(struct gtp0_header);
}
static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
static void gtp_destructor(struct net_device *dev)
{
struct gtp_dev *gtp = netdev_priv(dev);
kfree(gtp->addr_hash);
kfree(gtp->tid_hash);
}
static int gtp_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct gtp_dev *gtp;
struct gtp_net *gn;
int hashsize, err;
if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1])
return -EINVAL;
gtp = netdev_priv(dev);
if (!data[IFLA_GTP_PDP_HASHSIZE]) {
hashsize = 1024;
} else {
hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]);
if (!hashsize)
hashsize = 1024;
}
err = gtp_hashtable_new(gtp, hashsize);
if (err < 0)
return err;
err = gtp_encap_enable(gtp, data);
if (err < 0)
goto out_hashtable;
err = register_netdevice(dev);
if (err < 0) {
netdev_dbg(dev, "failed to register new netdev %d\n", err);
goto out_encap;
}
gn = net_generic(dev_net(dev), gtp_net_id);
list_add_rcu(&gtp->list, &gn->gtp_dev_list);
dev->priv_destructor = gtp_destructor;
netdev_dbg(dev, "registered new GTP interface\n");
return 0;
out_encap:
gtp_encap_disable(gtp);
out_hashtable:
kfree(gtp->addr_hash);
kfree(gtp->tid_hash);
return err;
}
static void gtp_dellink(struct net_device *dev, struct list_head *head)
{
struct gtp_dev *gtp = netdev_priv(dev);
struct pdp_ctx *pctx;
int i;
for (i = 0; i < gtp->hash_size; i++)
hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
pdp_context_delete(pctx);
list_del_rcu(&gtp->list);
unregister_netdevice_queue(dev, head);
}
static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = {
[IFLA_GTP_FD0] = { .type = NLA_U32 },
[IFLA_GTP_FD1] = { .type = NLA_U32 },
[IFLA_GTP_PDP_HASHSIZE] = { .type = NLA_U32 },
[IFLA_GTP_ROLE] = { .type = NLA_U32 },
};
static int gtp_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
if (!data)
return -EINVAL;
return 0;
}
static size_t gtp_get_size(const struct net_device *dev)
{
return nla_total_size(sizeof(__u32)); /* IFLA_GTP_PDP_HASHSIZE */
}
static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct gtp_dev *gtp = netdev_priv(dev);
if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size))
goto nla_put_failure;
return 0;
nla_put_failure:
return -EMSGSIZE;
}
static struct rtnl_link_ops gtp_link_ops __read_mostly = {
.kind = "gtp",
.maxtype = IFLA_GTP_MAX,
.policy = gtp_policy,
.priv_size = sizeof(struct gtp_dev),
.setup = gtp_link_setup,
.validate = gtp_validate,
.newlink = gtp_newlink,
.dellink = gtp_dellink,
.get_size = gtp_get_size,
.fill_info = gtp_fill_info,
};
static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize)
{
int i;
gtp->addr_hash = kmalloc_array(hsize, sizeof(struct hlist_head),
GFP_KERNEL | __GFP_NOWARN);
if (gtp->addr_hash == NULL)
return -ENOMEM;
gtp->tid_hash = kmalloc_array(hsize, sizeof(struct hlist_head),
GFP_KERNEL | __GFP_NOWARN);
if (gtp->tid_hash == NULL)
goto err1;
gtp->hash_size = hsize;
for (i = 0; i < hsize; i++) {
INIT_HLIST_HEAD(&gtp->addr_hash[i]);
INIT_HLIST_HEAD(&gtp->tid_hash[i]);
}
return 0;
err1:
kfree(gtp->addr_hash);
return -ENOMEM;
}
static struct sock *gtp_encap_enable_socket(int fd, int type,
struct gtp_dev *gtp)
{
struct udp_tunnel_sock_cfg tuncfg = {NULL};
struct socket *sock;
struct sock *sk;
int err;
pr_debug("enable gtp on %d, %d\n", fd, type);
sock = sockfd_lookup(fd, &err);
if (!sock) {
pr_debug("gtp socket fd=%d not found\n", fd);
return NULL;
}
sk = sock->sk;
if (sk->sk_protocol != IPPROTO_UDP ||
sk->sk_type != SOCK_DGRAM ||
(sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) {
pr_debug("socket fd=%d not UDP\n", fd);
sk = ERR_PTR(-EINVAL);
goto out_sock;
}
lock_sock(sk);
if (sk->sk_user_data) {
sk = ERR_PTR(-EBUSY);
goto out_rel_sock;
}
sock_hold(sk);
tuncfg.sk_user_data = gtp;
tuncfg.encap_type = type;
tuncfg.encap_rcv = gtp_encap_recv;
tuncfg.encap_destroy = gtp_encap_destroy;
setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
out_rel_sock:
release_sock(sock->sk);
out_sock:
sockfd_put(sock);
return sk;
}
static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
{
struct sock *sk1u = NULL;
struct sock *sk0 = NULL;
unsigned int role = GTP_ROLE_GGSN;
if (data[IFLA_GTP_FD0]) {
u32 fd0 = nla_get_u32(data[IFLA_GTP_FD0]);
sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp);
if (IS_ERR(sk0))
return PTR_ERR(sk0);
}
if (data[IFLA_GTP_FD1]) {
u32 fd1 = nla_get_u32(data[IFLA_GTP_FD1]);
sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp);
if (IS_ERR(sk1u)) {
if (sk0)
gtp_encap_disable_sock(sk0);
return PTR_ERR(sk1u);
}
}
if (data[IFLA_GTP_ROLE]) {
role = nla_get_u32(data[IFLA_GTP_ROLE]);
if (role > GTP_ROLE_SGSN) {
if (sk0)
gtp_encap_disable_sock(sk0);
if (sk1u)
gtp_encap_disable_sock(sk1u);
return -EINVAL;
}
}
gtp->sk0 = sk0;
gtp->sk1u = sk1u;
gtp->role = role;
return 0;
}
static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[])
{
struct gtp_dev *gtp = NULL;
struct net_device *dev;
struct net *net;
/* Examine the link attributes and figure out which network namespace
* we are talking about.
*/
if (nla[GTPA_NET_NS_FD])
net = get_net_ns_by_fd(nla_get_u32(nla[GTPA_NET_NS_FD]));
else
net = get_net(src_net);
if (IS_ERR(net))
return NULL;
/* Check if there's an existing gtpX device to configure */
dev = dev_get_by_index_rcu(net, nla_get_u32(nla[GTPA_LINK]));
if (dev && dev->netdev_ops == &gtp_netdev_ops)
gtp = netdev_priv(dev);
put_net(net);
return gtp;
}
static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
{
pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]);
pctx->af = AF_INET;
pctx->peer_addr_ip4.s_addr =
nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]);
pctx->ms_addr_ip4.s_addr =
nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
switch (pctx->gtp_version) {
case GTP_V0:
/* According to TS 09.60, sections 7.5.1 and 7.5.2, the flow
* label needs to be the same for uplink and downlink packets,
* so let's annotate this.
*/
pctx->u.v0.tid = nla_get_u64(info->attrs[GTPA_TID]);
pctx->u.v0.flow = nla_get_u16(info->attrs[GTPA_FLOW]);
break;
case GTP_V1:
pctx->u.v1.i_tei = nla_get_u32(info->attrs[GTPA_I_TEI]);
pctx->u.v1.o_tei = nla_get_u32(info->attrs[GTPA_O_TEI]);
break;
default:
break;
}
}
static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
struct genl_info *info)
{
struct pdp_ctx *pctx, *pctx_tid = NULL;
struct net_device *dev = gtp->dev;
u32 hash_ms, hash_tid = 0;
unsigned int version;
bool found = false;
__be32 ms_addr;
ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size;
version = nla_get_u32(info->attrs[GTPA_VERSION]);
pctx = ipv4_pdp_find(gtp, ms_addr);
if (pctx)
found = true;
if (version == GTP_V0)
pctx_tid = gtp0_pdp_find(gtp,
nla_get_u64(info->attrs[GTPA_TID]));
else if (version == GTP_V1)
pctx_tid = gtp1_pdp_find(gtp,
nla_get_u32(info->attrs[GTPA_I_TEI]));
if (pctx_tid)
found = true;
if (found) {
if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
return -EEXIST;
if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
if (pctx && pctx_tid)
return -EEXIST;
if (!pctx)
pctx = pctx_tid;
ipv4_pdp_fill(pctx, info);
if (pctx->gtp_version == GTP_V0)
netdev_dbg(dev, "GTPv0-U: update tunnel id = %llx (pdp %p)\n",
pctx->u.v0.tid, pctx);
else if (pctx->gtp_version == GTP_V1)
netdev_dbg(dev, "GTPv1-U: update tunnel id = %x/%x (pdp %p)\n",
pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx);
return 0;
}
pctx = kmalloc(sizeof(*pctx), GFP_ATOMIC);
if (pctx == NULL)
return -ENOMEM;
sock_hold(sk);
pctx->sk = sk;
pctx->dev = gtp->dev;
ipv4_pdp_fill(pctx, info);
atomic_set(&pctx->tx_seq, 0);
switch (pctx->gtp_version) {
case GTP_V0:
/* TS 09.60: "The flow label identifies unambiguously a GTP
* flow.". We use the tid for this instead, I cannot find a
* situation in which this doesn't unambiguosly identify the
* PDP context.
*/
hash_tid = gtp0_hashfn(pctx->u.v0.tid) % gtp->hash_size;
break;
case GTP_V1:
hash_tid = gtp1u_hashfn(pctx->u.v1.i_tei) % gtp->hash_size;
break;
}
hlist_add_head_rcu(&pctx->hlist_addr, &gtp->addr_hash[hash_ms]);
hlist_add_head_rcu(&pctx->hlist_tid, &gtp->tid_hash[hash_tid]);
switch (pctx->gtp_version) {
case GTP_V0:
netdev_dbg(dev, "GTPv0-U: new PDP ctx id=%llx ssgn=%pI4 ms=%pI4 (pdp=%p)\n",
pctx->u.v0.tid, &pctx->peer_addr_ip4,
&pctx->ms_addr_ip4, pctx);
break;
case GTP_V1:
netdev_dbg(dev, "GTPv1-U: new PDP ctx id=%x/%x ssgn=%pI4 ms=%pI4 (pdp=%p)\n",
pctx->u.v1.i_tei, pctx->u.v1.o_tei,
&pctx->peer_addr_ip4, &pctx->ms_addr_ip4, pctx);
break;
}
return 0;
}
static void pdp_context_free(struct rcu_head *head)
{
struct pdp_ctx *pctx = container_of(head, struct pdp_ctx, rcu_head);
sock_put(pctx->sk);
kfree(pctx);
}
static void pdp_context_delete(struct pdp_ctx *pctx)
{
hlist_del_rcu(&pctx->hlist_tid);
hlist_del_rcu(&pctx->hlist_addr);
call_rcu(&pctx->rcu_head, pdp_context_free);
}
static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
{
unsigned int version;
struct gtp_dev *gtp;
struct sock *sk;
int err;
if (!info->attrs[GTPA_VERSION] ||
!info->attrs[GTPA_LINK] ||
!info->attrs[GTPA_PEER_ADDRESS] ||
!info->attrs[GTPA_MS_ADDRESS])
return -EINVAL;
version = nla_get_u32(info->attrs[GTPA_VERSION]);
switch (version) {
case GTP_V0:
if (!info->attrs[GTPA_TID] ||
!info->attrs[GTPA_FLOW])
return -EINVAL;
break;
case GTP_V1:
if (!info->attrs[GTPA_I_TEI] ||
!info->attrs[GTPA_O_TEI])
return -EINVAL;
break;
default:
return -EINVAL;
}
rtnl_lock();
rcu_read_lock();
gtp = gtp_find_dev(sock_net(skb->sk), info->attrs);
if (!gtp) {
err = -ENODEV;
goto out_unlock;
}
if (version == GTP_V0)
sk = gtp->sk0;
else if (version == GTP_V1)
sk = gtp->sk1u;
else
sk = NULL;
if (!sk) {
err = -ENODEV;
goto out_unlock;
}
err = gtp_pdp_add(gtp, sk, info);
out_unlock:
rcu_read_unlock();
rtnl_unlock();
return err;
}
static struct pdp_ctx *gtp_find_pdp_by_link(struct net *net,
struct nlattr *nla[])
{
struct gtp_dev *gtp;
gtp = gtp_find_dev(net, nla);
if (!gtp)
return ERR_PTR(-ENODEV);
if (nla[GTPA_MS_ADDRESS]) {
__be32 ip = nla_get_be32(nla[GTPA_MS_ADDRESS]);
return ipv4_pdp_find(gtp, ip);
} else if (nla[GTPA_VERSION]) {
u32 gtp_version = nla_get_u32(nla[GTPA_VERSION]);
if (gtp_version == GTP_V0 && nla[GTPA_TID])
return gtp0_pdp_find(gtp, nla_get_u64(nla[GTPA_TID]));
else if (gtp_version == GTP_V1 && nla[GTPA_I_TEI])
return gtp1_pdp_find(gtp, nla_get_u32(nla[GTPA_I_TEI]));
}
return ERR_PTR(-EINVAL);
}
static struct pdp_ctx *gtp_find_pdp(struct net *net, struct nlattr *nla[])
{
struct pdp_ctx *pctx;
if (nla[GTPA_LINK])
pctx = gtp_find_pdp_by_link(net, nla);
else
pctx = ERR_PTR(-EINVAL);
if (!pctx)
pctx = ERR_PTR(-ENOENT);
return pctx;
}
static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
{
struct pdp_ctx *pctx;
int err = 0;
if (!info->attrs[GTPA_VERSION])
return -EINVAL;
rcu_read_lock();
pctx = gtp_find_pdp(sock_net(skb->sk), info->attrs);
if (IS_ERR(pctx)) {
err = PTR_ERR(pctx);
goto out_unlock;
}
if (pctx->gtp_version == GTP_V0)
netdev_dbg(pctx->dev, "GTPv0-U: deleting tunnel id = %llx (pdp %p)\n",
pctx->u.v0.tid, pctx);
else if (pctx->gtp_version == GTP_V1)
netdev_dbg(pctx->dev, "GTPv1-U: deleting tunnel id = %x/%x (pdp %p)\n",
pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx);
pdp_context_delete(pctx);
out_unlock:
rcu_read_unlock();
return err;
}
static struct genl_family gtp_genl_family;
static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
int flags, u32 type, struct pdp_ctx *pctx)
{
void *genlh;
genlh = genlmsg_put(skb, snd_portid, snd_seq, &gtp_genl_family, flags,
type);
if (genlh == NULL)
goto nlmsg_failure;
if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) ||
nla_put_u32(skb, GTPA_LINK, pctx->dev->ifindex) ||
nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer_addr_ip4.s_addr) ||
nla_put_be32(skb, GTPA_MS_ADDRESS, pctx->ms_addr_ip4.s_addr))
goto nla_put_failure;
switch (pctx->gtp_version) {
case GTP_V0:
if (nla_put_u64_64bit(skb, GTPA_TID, pctx->u.v0.tid, GTPA_PAD) ||
nla_put_u16(skb, GTPA_FLOW, pctx->u.v0.flow))
goto nla_put_failure;
break;
case GTP_V1:
if (nla_put_u32(skb, GTPA_I_TEI, pctx->u.v1.i_tei) ||
nla_put_u32(skb, GTPA_O_TEI, pctx->u.v1.o_tei))
goto nla_put_failure;
break;
}
genlmsg_end(skb, genlh);
return 0;
nlmsg_failure:
nla_put_failure:
genlmsg_cancel(skb, genlh);
return -EMSGSIZE;
}
static int gtp_genl_get_pdp(struct sk_buff *skb, struct genl_info *info)
{
struct pdp_ctx *pctx = NULL;
struct sk_buff *skb2;
int err;
if (!info->attrs[GTPA_VERSION])
return -EINVAL;
rcu_read_lock();
pctx = gtp_find_pdp(sock_net(skb->sk), info->attrs);
if (IS_ERR(pctx)) {
err = PTR_ERR(pctx);
goto err_unlock;
}
skb2 = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb2 == NULL) {
err = -ENOMEM;
goto err_unlock;
}
err = gtp_genl_fill_info(skb2, NETLINK_CB(skb).portid, info->snd_seq,
0, info->nlhdr->nlmsg_type, pctx);
if (err < 0)
goto err_unlock_free;
rcu_read_unlock();
return genlmsg_unicast(genl_info_net(info), skb2, info->snd_portid);
err_unlock_free:
kfree_skb(skb2);
err_unlock:
rcu_read_unlock();
return err;
}
static int gtp_genl_dump_pdp(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp;
int i, j, bucket = cb->args[0], skip = cb->args[1];
struct net *net = sock_net(skb->sk);
struct pdp_ctx *pctx;
struct gtp_net *gn;
gn = net_generic(net, gtp_net_id);
if (cb->args[4])
return 0;
rcu_read_lock();
list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
if (last_gtp && last_gtp != gtp)
continue;
else
last_gtp = NULL;
for (i = bucket; i < gtp->hash_size; i++) {
j = 0;
hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i],
hlist_tid) {
if (j >= skip &&
gtp_genl_fill_info(skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
cb->nlh->nlmsg_type, pctx)) {
cb->args[0] = i;
cb->args[1] = j;
cb->args[2] = (unsigned long)gtp;
goto out;
}
j++;
}
skip = 0;
}
bucket = 0;
}
cb->args[4] = 1;
out:
rcu_read_unlock();
return skb->len;
}
static const struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = {
[GTPA_LINK] = { .type = NLA_U32, },
[GTPA_VERSION] = { .type = NLA_U32, },
[GTPA_TID] = { .type = NLA_U64, },
[GTPA_PEER_ADDRESS] = { .type = NLA_U32, },
[GTPA_MS_ADDRESS] = { .type = NLA_U32, },
[GTPA_FLOW] = { .type = NLA_U16, },
[GTPA_NET_NS_FD] = { .type = NLA_U32, },
[GTPA_I_TEI] = { .type = NLA_U32, },
[GTPA_O_TEI] = { .type = NLA_U32, },
};
static const struct genl_ops gtp_genl_ops[] = {
{
.cmd = GTP_CMD_NEWPDP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = gtp_genl_new_pdp,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = GTP_CMD_DELPDP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = gtp_genl_del_pdp,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = GTP_CMD_GETPDP,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = gtp_genl_get_pdp,
.dumpit = gtp_genl_dump_pdp,
.flags = GENL_ADMIN_PERM,
},
};
static struct genl_family gtp_genl_family __ro_after_init = {
.name = "gtp",
.version = 0,
.hdrsize = 0,
.maxattr = GTPA_MAX,
.policy = gtp_genl_policy,
.netnsok = true,
.module = THIS_MODULE,
.ops = gtp_genl_ops,
.n_ops = ARRAY_SIZE(gtp_genl_ops),
};
static int __net_init gtp_net_init(struct net *net)
{
struct gtp_net *gn = net_generic(net, gtp_net_id);
INIT_LIST_HEAD(&gn->gtp_dev_list);
return 0;
}
static void __net_exit gtp_net_exit(struct net *net)
{
struct gtp_net *gn = net_generic(net, gtp_net_id);
struct gtp_dev *gtp;
LIST_HEAD(list);
rtnl_lock();
list_for_each_entry(gtp, &gn->gtp_dev_list, list)
gtp_dellink(gtp->dev, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations gtp_net_ops = {
.init = gtp_net_init,
.exit = gtp_net_exit,
.id = &gtp_net_id,
.size = sizeof(struct gtp_net),
};
static int __init gtp_init(void)
{
int err;
get_random_bytes(&gtp_h_initval, sizeof(gtp_h_initval));
err = rtnl_link_register(&gtp_link_ops);
if (err < 0)
goto error_out;
err = genl_register_family(&gtp_genl_family);
if (err < 0)
goto unreg_rtnl_link;
err = register_pernet_subsys(&gtp_net_ops);
if (err < 0)
goto unreg_genl_family;
pr_info("GTP module loaded (pdp ctx size %zd bytes)\n",
sizeof(struct pdp_ctx));
return 0;
unreg_genl_family:
genl_unregister_family(&gtp_genl_family);
unreg_rtnl_link:
rtnl_link_unregister(&gtp_link_ops);
error_out:
pr_err("error loading GTP module loaded\n");
return err;
}
late_initcall(gtp_init);
static void __exit gtp_fini(void)
{
genl_unregister_family(&gtp_genl_family);
rtnl_link_unregister(&gtp_link_ops);
unregister_pernet_subsys(&gtp_net_ops);
pr_info("GTP module unloaded\n");
}
module_exit(gtp_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <hwelte@sysmocom.de>");
MODULE_DESCRIPTION("Interface driver for GTP encapsulated traffic");
MODULE_ALIAS_RTNL_LINK("gtp");
MODULE_ALIAS_GENL_FAMILY("gtp");