Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * This is a module which is used for logging packets to userspace via
0004  * nfetlink.
0005  *
0006  * (C) 2005 by Harald Welte <laforge@netfilter.org>
0007  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
0008  *
0009  * Based on the old ipv4-only ipt_ULOG.c:
0010  * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
0011  */
0012 
0013 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0014 
0015 #include <linux/module.h>
0016 #include <linux/skbuff.h>
0017 #include <linux/if_arp.h>
0018 #include <linux/init.h>
0019 #include <linux/ip.h>
0020 #include <linux/ipv6.h>
0021 #include <linux/netdevice.h>
0022 #include <linux/netfilter.h>
0023 #include <linux/netfilter_bridge.h>
0024 #include <net/netlink.h>
0025 #include <linux/netfilter/nfnetlink.h>
0026 #include <linux/netfilter/nfnetlink_log.h>
0027 #include <linux/netfilter/nf_conntrack_common.h>
0028 #include <linux/spinlock.h>
0029 #include <linux/sysctl.h>
0030 #include <linux/proc_fs.h>
0031 #include <linux/security.h>
0032 #include <linux/list.h>
0033 #include <linux/slab.h>
0034 #include <net/sock.h>
0035 #include <net/netfilter/nf_log.h>
0036 #include <net/netns/generic.h>
0037 
0038 #include <linux/atomic.h>
0039 #include <linux/refcount.h>
0040 
0041 
0042 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
0043 #include "../bridge/br_private.h"
0044 #endif
0045 
0046 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
0047 #include <net/netfilter/nf_conntrack.h>
0048 #endif
0049 
0050 #define NFULNL_COPY_DISABLED    0xff
0051 #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE
0052 #define NFULNL_TIMEOUT_DEFAULT  100 /* every second */
0053 #define NFULNL_QTHRESH_DEFAULT  100 /* 100 packets */
0054 /* max packet size is limited by 16-bit struct nfattr nfa_len field */
0055 #define NFULNL_COPY_RANGE_MAX   (0xFFFF - NLA_HDRLEN)
0056 
0057 #define PRINTR(x, args...)  do { if (net_ratelimit()) \
0058                      printk(x, ## args); } while (0);
0059 
0060 struct nfulnl_instance {
0061     struct hlist_node hlist;    /* global list of instances */
0062     spinlock_t lock;
0063     refcount_t use;         /* use count */
0064 
0065     unsigned int qlen;      /* number of nlmsgs in skb */
0066     struct sk_buff *skb;        /* pre-allocatd skb */
0067     struct timer_list timer;
0068     struct net *net;
0069     netns_tracker ns_tracker;
0070     struct user_namespace *peer_user_ns;    /* User namespace of the peer process */
0071     u32 peer_portid;        /* PORTID of the peer process */
0072 
0073     /* configurable parameters */
0074     unsigned int flushtimeout;  /* timeout until queue flush */
0075     unsigned int nlbufsiz;      /* netlink buffer allocation size */
0076     unsigned int qthreshold;    /* threshold of the queue */
0077     u_int32_t copy_range;
0078     u_int32_t seq;          /* instance-local sequential counter */
0079     u_int16_t group_num;        /* number of this queue */
0080     u_int16_t flags;
0081     u_int8_t copy_mode;
0082     struct rcu_head rcu;
0083 };
0084 
0085 #define INSTANCE_BUCKETS    16
0086 
0087 static unsigned int nfnl_log_net_id __read_mostly;
0088 
0089 struct nfnl_log_net {
0090     spinlock_t instances_lock;
0091     struct hlist_head instance_table[INSTANCE_BUCKETS];
0092     atomic_t global_seq;
0093 };
0094 
0095 static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
0096 {
0097     return net_generic(net, nfnl_log_net_id);
0098 }
0099 
0100 static inline u_int8_t instance_hashfn(u_int16_t group_num)
0101 {
0102     return ((group_num & 0xff) % INSTANCE_BUCKETS);
0103 }
0104 
0105 static struct nfulnl_instance *
0106 __instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
0107 {
0108     struct hlist_head *head;
0109     struct nfulnl_instance *inst;
0110 
0111     head = &log->instance_table[instance_hashfn(group_num)];
0112     hlist_for_each_entry_rcu(inst, head, hlist) {
0113         if (inst->group_num == group_num)
0114             return inst;
0115     }
0116     return NULL;
0117 }
0118 
0119 static inline void
0120 instance_get(struct nfulnl_instance *inst)
0121 {
0122     refcount_inc(&inst->use);
0123 }
0124 
0125 static struct nfulnl_instance *
0126 instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
0127 {
0128     struct nfulnl_instance *inst;
0129 
0130     rcu_read_lock_bh();
0131     inst = __instance_lookup(log, group_num);
0132     if (inst && !refcount_inc_not_zero(&inst->use))
0133         inst = NULL;
0134     rcu_read_unlock_bh();
0135 
0136     return inst;
0137 }
0138 
0139 static void nfulnl_instance_free_rcu(struct rcu_head *head)
0140 {
0141     struct nfulnl_instance *inst =
0142         container_of(head, struct nfulnl_instance, rcu);
0143 
0144     put_net_track(inst->net, &inst->ns_tracker);
0145     kfree(inst);
0146     module_put(THIS_MODULE);
0147 }
0148 
0149 static void
0150 instance_put(struct nfulnl_instance *inst)
0151 {
0152     if (inst && refcount_dec_and_test(&inst->use))
0153         call_rcu(&inst->rcu, nfulnl_instance_free_rcu);
0154 }
0155 
0156 static void nfulnl_timer(struct timer_list *t);
0157 
0158 static struct nfulnl_instance *
0159 instance_create(struct net *net, u_int16_t group_num,
0160         u32 portid, struct user_namespace *user_ns)
0161 {
0162     struct nfulnl_instance *inst;
0163     struct nfnl_log_net *log = nfnl_log_pernet(net);
0164     int err;
0165 
0166     spin_lock_bh(&log->instances_lock);
0167     if (__instance_lookup(log, group_num)) {
0168         err = -EEXIST;
0169         goto out_unlock;
0170     }
0171 
0172     inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
0173     if (!inst) {
0174         err = -ENOMEM;
0175         goto out_unlock;
0176     }
0177 
0178     if (!try_module_get(THIS_MODULE)) {
0179         kfree(inst);
0180         err = -EAGAIN;
0181         goto out_unlock;
0182     }
0183 
0184     INIT_HLIST_NODE(&inst->hlist);
0185     spin_lock_init(&inst->lock);
0186     /* needs to be two, since we _put() after creation */
0187     refcount_set(&inst->use, 2);
0188 
0189     timer_setup(&inst->timer, nfulnl_timer, 0);
0190 
0191     inst->net = get_net_track(net, &inst->ns_tracker, GFP_ATOMIC);
0192     inst->peer_user_ns = user_ns;
0193     inst->peer_portid = portid;
0194     inst->group_num = group_num;
0195 
0196     inst->qthreshold    = NFULNL_QTHRESH_DEFAULT;
0197     inst->flushtimeout  = NFULNL_TIMEOUT_DEFAULT;
0198     inst->nlbufsiz      = NFULNL_NLBUFSIZ_DEFAULT;
0199     inst->copy_mode     = NFULNL_COPY_PACKET;
0200     inst->copy_range    = NFULNL_COPY_RANGE_MAX;
0201 
0202     hlist_add_head_rcu(&inst->hlist,
0203                &log->instance_table[instance_hashfn(group_num)]);
0204 
0205 
0206     spin_unlock_bh(&log->instances_lock);
0207 
0208     return inst;
0209 
0210 out_unlock:
0211     spin_unlock_bh(&log->instances_lock);
0212     return ERR_PTR(err);
0213 }
0214 
0215 static void __nfulnl_flush(struct nfulnl_instance *inst);
0216 
0217 /* called with BH disabled */
0218 static void
0219 __instance_destroy(struct nfulnl_instance *inst)
0220 {
0221     /* first pull it out of the global list */
0222     hlist_del_rcu(&inst->hlist);
0223 
0224     /* then flush all pending packets from skb */
0225 
0226     spin_lock(&inst->lock);
0227 
0228     /* lockless readers wont be able to use us */
0229     inst->copy_mode = NFULNL_COPY_DISABLED;
0230 
0231     if (inst->skb)
0232         __nfulnl_flush(inst);
0233     spin_unlock(&inst->lock);
0234 
0235     /* and finally put the refcount */
0236     instance_put(inst);
0237 }
0238 
0239 static inline void
0240 instance_destroy(struct nfnl_log_net *log,
0241          struct nfulnl_instance *inst)
0242 {
0243     spin_lock_bh(&log->instances_lock);
0244     __instance_destroy(inst);
0245     spin_unlock_bh(&log->instances_lock);
0246 }
0247 
0248 static int
0249 nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode,
0250           unsigned int range)
0251 {
0252     int status = 0;
0253 
0254     spin_lock_bh(&inst->lock);
0255 
0256     switch (mode) {
0257     case NFULNL_COPY_NONE:
0258     case NFULNL_COPY_META:
0259         inst->copy_mode = mode;
0260         inst->copy_range = 0;
0261         break;
0262 
0263     case NFULNL_COPY_PACKET:
0264         inst->copy_mode = mode;
0265         if (range == 0)
0266             range = NFULNL_COPY_RANGE_MAX;
0267         inst->copy_range = min_t(unsigned int,
0268                      range, NFULNL_COPY_RANGE_MAX);
0269         break;
0270 
0271     default:
0272         status = -EINVAL;
0273         break;
0274     }
0275 
0276     spin_unlock_bh(&inst->lock);
0277 
0278     return status;
0279 }
0280 
0281 static int
0282 nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
0283 {
0284     int status;
0285 
0286     spin_lock_bh(&inst->lock);
0287     if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT)
0288         status = -ERANGE;
0289     else if (nlbufsiz > 131072)
0290         status = -ERANGE;
0291     else {
0292         inst->nlbufsiz = nlbufsiz;
0293         status = 0;
0294     }
0295     spin_unlock_bh(&inst->lock);
0296 
0297     return status;
0298 }
0299 
0300 static void
0301 nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
0302 {
0303     spin_lock_bh(&inst->lock);
0304     inst->flushtimeout = timeout;
0305     spin_unlock_bh(&inst->lock);
0306 }
0307 
0308 static void
0309 nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
0310 {
0311     spin_lock_bh(&inst->lock);
0312     inst->qthreshold = qthresh;
0313     spin_unlock_bh(&inst->lock);
0314 }
0315 
0316 static int
0317 nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
0318 {
0319     spin_lock_bh(&inst->lock);
0320     inst->flags = flags;
0321     spin_unlock_bh(&inst->lock);
0322 
0323     return 0;
0324 }
0325 
0326 static struct sk_buff *
0327 nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
0328          unsigned int pkt_size)
0329 {
0330     struct sk_buff *skb;
0331     unsigned int n;
0332 
0333     /* alloc skb which should be big enough for a whole multipart
0334      * message.  WARNING: has to be <= 128k due to slab restrictions */
0335 
0336     n = max(inst_size, pkt_size);
0337     skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
0338     if (!skb) {
0339         if (n > pkt_size) {
0340             /* try to allocate only as much as we need for current
0341              * packet */
0342 
0343             skb = alloc_skb(pkt_size, GFP_ATOMIC);
0344         }
0345     }
0346 
0347     return skb;
0348 }
0349 
0350 static void
0351 __nfulnl_send(struct nfulnl_instance *inst)
0352 {
0353     if (inst->qlen > 1) {
0354         struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0,
0355                          NLMSG_DONE,
0356                          sizeof(struct nfgenmsg),
0357                          0);
0358         if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n",
0359                   inst->skb->len, skb_tailroom(inst->skb))) {
0360             kfree_skb(inst->skb);
0361             goto out;
0362         }
0363     }
0364     nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid);
0365 out:
0366     inst->qlen = 0;
0367     inst->skb = NULL;
0368 }
0369 
0370 static void
0371 __nfulnl_flush(struct nfulnl_instance *inst)
0372 {
0373     /* timer holds a reference */
0374     if (del_timer(&inst->timer))
0375         instance_put(inst);
0376     if (inst->skb)
0377         __nfulnl_send(inst);
0378 }
0379 
0380 static void
0381 nfulnl_timer(struct timer_list *t)
0382 {
0383     struct nfulnl_instance *inst = from_timer(inst, t, timer);
0384 
0385     spin_lock_bh(&inst->lock);
0386     if (inst->skb)
0387         __nfulnl_send(inst);
0388     spin_unlock_bh(&inst->lock);
0389     instance_put(inst);
0390 }
0391 
0392 static u32 nfulnl_get_bridge_size(const struct sk_buff *skb)
0393 {
0394     u32 size = 0;
0395 
0396     if (!skb_mac_header_was_set(skb))
0397         return 0;
0398 
0399     if (skb_vlan_tag_present(skb)) {
0400         size += nla_total_size(0); /* nested */
0401         size += nla_total_size(sizeof(u16)); /* id */
0402         size += nla_total_size(sizeof(u16)); /* tag */
0403     }
0404 
0405     if (skb->network_header > skb->mac_header)
0406         size += nla_total_size(skb->network_header - skb->mac_header);
0407 
0408     return size;
0409 }
0410 
0411 static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb)
0412 {
0413     if (!skb_mac_header_was_set(skb))
0414         return 0;
0415 
0416     if (skb_vlan_tag_present(skb)) {
0417         struct nlattr *nest;
0418 
0419         nest = nla_nest_start(inst->skb, NFULA_VLAN);
0420         if (!nest)
0421             goto nla_put_failure;
0422 
0423         if (nla_put_be16(inst->skb, NFULA_VLAN_TCI, htons(skb->vlan_tci)) ||
0424             nla_put_be16(inst->skb, NFULA_VLAN_PROTO, skb->vlan_proto))
0425             goto nla_put_failure;
0426 
0427         nla_nest_end(inst->skb, nest);
0428     }
0429 
0430     if (skb->mac_header < skb->network_header) {
0431         int len = (int)(skb->network_header - skb->mac_header);
0432 
0433         if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb)))
0434             goto nla_put_failure;
0435     }
0436 
0437     return 0;
0438 
0439 nla_put_failure:
0440     return -1;
0441 }
0442 
0443 /* This is an inline function, we don't really care about a long
0444  * list of arguments */
0445 static inline int
0446 __build_packet_message(struct nfnl_log_net *log,
0447             struct nfulnl_instance *inst,
0448             const struct sk_buff *skb,
0449             unsigned int data_len,
0450             u_int8_t pf,
0451             unsigned int hooknum,
0452             const struct net_device *indev,
0453             const struct net_device *outdev,
0454             const char *prefix, unsigned int plen,
0455             const struct nfnl_ct_hook *nfnl_ct,
0456             struct nf_conn *ct, enum ip_conntrack_info ctinfo)
0457 {
0458     struct nfulnl_msg_packet_hdr pmsg;
0459     struct nlmsghdr *nlh;
0460     sk_buff_data_t old_tail = inst->skb->tail;
0461     struct sock *sk;
0462     const unsigned char *hwhdrp;
0463     ktime_t tstamp;
0464 
0465     nlh = nfnl_msg_put(inst->skb, 0, 0,
0466                nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
0467                0, pf, NFNETLINK_V0, htons(inst->group_num));
0468     if (!nlh)
0469         return -1;
0470 
0471     memset(&pmsg, 0, sizeof(pmsg));
0472     pmsg.hw_protocol    = skb->protocol;
0473     pmsg.hook       = hooknum;
0474 
0475     if (nla_put(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg))
0476         goto nla_put_failure;
0477 
0478     if (prefix &&
0479         nla_put(inst->skb, NFULA_PREFIX, plen, prefix))
0480         goto nla_put_failure;
0481 
0482     if (indev) {
0483 #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
0484         if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
0485                  htonl(indev->ifindex)))
0486             goto nla_put_failure;
0487 #else
0488         if (pf == PF_BRIDGE) {
0489             /* Case 1: outdev is physical input device, we need to
0490              * look for bridge group (when called from
0491              * netfilter_bridge) */
0492             if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
0493                      htonl(indev->ifindex)) ||
0494             /* this is the bridge group "brX" */
0495             /* rcu_read_lock()ed by nf_hook_thresh or
0496              * nf_log_packet.
0497              */
0498                 nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
0499                      htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
0500                 goto nla_put_failure;
0501         } else {
0502             struct net_device *physindev;
0503 
0504             /* Case 2: indev is bridge group, we need to look for
0505              * physical device (when called from ipv4) */
0506             if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
0507                      htonl(indev->ifindex)))
0508                 goto nla_put_failure;
0509 
0510             physindev = nf_bridge_get_physindev(skb);
0511             if (physindev &&
0512                 nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
0513                      htonl(physindev->ifindex)))
0514                 goto nla_put_failure;
0515         }
0516 #endif
0517     }
0518 
0519     if (outdev) {
0520 #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
0521         if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
0522                  htonl(outdev->ifindex)))
0523             goto nla_put_failure;
0524 #else
0525         if (pf == PF_BRIDGE) {
0526             /* Case 1: outdev is physical output device, we need to
0527              * look for bridge group (when called from
0528              * netfilter_bridge) */
0529             if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
0530                      htonl(outdev->ifindex)) ||
0531             /* this is the bridge group "brX" */
0532             /* rcu_read_lock()ed by nf_hook_thresh or
0533              * nf_log_packet.
0534              */
0535                 nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
0536                      htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
0537                 goto nla_put_failure;
0538         } else {
0539             struct net_device *physoutdev;
0540 
0541             /* Case 2: indev is a bridge group, we need to look
0542              * for physical device (when called from ipv4) */
0543             if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
0544                      htonl(outdev->ifindex)))
0545                 goto nla_put_failure;
0546 
0547             physoutdev = nf_bridge_get_physoutdev(skb);
0548             if (physoutdev &&
0549                 nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
0550                      htonl(physoutdev->ifindex)))
0551                 goto nla_put_failure;
0552         }
0553 #endif
0554     }
0555 
0556     if (skb->mark &&
0557         nla_put_be32(inst->skb, NFULA_MARK, htonl(skb->mark)))
0558         goto nla_put_failure;
0559 
0560     if (indev && skb->dev &&
0561         skb_mac_header_was_set(skb) &&
0562         skb_mac_header_len(skb) != 0) {
0563         struct nfulnl_msg_packet_hw phw;
0564         int len;
0565 
0566         memset(&phw, 0, sizeof(phw));
0567         len = dev_parse_header(skb, phw.hw_addr);
0568         if (len > 0) {
0569             phw.hw_addrlen = htons(len);
0570             if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
0571                 goto nla_put_failure;
0572         }
0573     }
0574 
0575     if (indev && skb_mac_header_was_set(skb)) {
0576         if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
0577             nla_put_be16(inst->skb, NFULA_HWLEN,
0578                  htons(skb->dev->hard_header_len)))
0579             goto nla_put_failure;
0580 
0581         hwhdrp = skb_mac_header(skb);
0582 
0583         if (skb->dev->type == ARPHRD_SIT)
0584             hwhdrp -= ETH_HLEN;
0585 
0586         if (hwhdrp >= skb->head &&
0587             nla_put(inst->skb, NFULA_HWHEADER,
0588                 skb->dev->hard_header_len, hwhdrp))
0589             goto nla_put_failure;
0590     }
0591 
0592     tstamp = skb_tstamp_cond(skb, false);
0593     if (hooknum <= NF_INET_FORWARD && tstamp) {
0594         struct nfulnl_msg_packet_timestamp ts;
0595         struct timespec64 kts = ktime_to_timespec64(tstamp);
0596         ts.sec = cpu_to_be64(kts.tv_sec);
0597         ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
0598 
0599         if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
0600             goto nla_put_failure;
0601     }
0602 
0603     /* UID */
0604     sk = skb->sk;
0605     if (sk && sk_fullsock(sk)) {
0606         read_lock_bh(&sk->sk_callback_lock);
0607         if (sk->sk_socket && sk->sk_socket->file) {
0608             struct file *file = sk->sk_socket->file;
0609             const struct cred *cred = file->f_cred;
0610             struct user_namespace *user_ns = inst->peer_user_ns;
0611             __be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid));
0612             __be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid));
0613             read_unlock_bh(&sk->sk_callback_lock);
0614             if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
0615                 nla_put_be32(inst->skb, NFULA_GID, gid))
0616                 goto nla_put_failure;
0617         } else
0618             read_unlock_bh(&sk->sk_callback_lock);
0619     }
0620 
0621     /* local sequence number */
0622     if ((inst->flags & NFULNL_CFG_F_SEQ) &&
0623         nla_put_be32(inst->skb, NFULA_SEQ, htonl(inst->seq++)))
0624         goto nla_put_failure;
0625 
0626     /* global sequence number */
0627     if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
0628         nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
0629              htonl(atomic_inc_return(&log->global_seq))))
0630         goto nla_put_failure;
0631 
0632     if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
0633                  NFULA_CT, NFULA_CT_INFO) < 0)
0634         goto nla_put_failure;
0635 
0636     if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) &&
0637         nfulnl_put_bridge(inst, skb) < 0)
0638         goto nla_put_failure;
0639 
0640     if (data_len) {
0641         struct nlattr *nla;
0642         int size = nla_attr_size(data_len);
0643 
0644         if (skb_tailroom(inst->skb) < nla_total_size(data_len))
0645             goto nla_put_failure;
0646 
0647         nla = skb_put(inst->skb, nla_total_size(data_len));
0648         nla->nla_type = NFULA_PAYLOAD;
0649         nla->nla_len = size;
0650 
0651         if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
0652             BUG();
0653     }
0654 
0655     nlh->nlmsg_len = inst->skb->tail - old_tail;
0656     return 0;
0657 
0658 nla_put_failure:
0659     PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
0660     return -1;
0661 }
0662 
0663 static const struct nf_loginfo default_loginfo = {
0664     .type =     NF_LOG_TYPE_ULOG,
0665     .u = {
0666         .ulog = {
0667             .copy_len   = 0xffff,
0668             .group      = 0,
0669             .qthreshold = 1,
0670         },
0671     },
0672 };
0673 
0674 /* log handler for internal netfilter logging api */
0675 static void
0676 nfulnl_log_packet(struct net *net,
0677           u_int8_t pf,
0678           unsigned int hooknum,
0679           const struct sk_buff *skb,
0680           const struct net_device *in,
0681           const struct net_device *out,
0682           const struct nf_loginfo *li_user,
0683           const char *prefix)
0684 {
0685     size_t size;
0686     unsigned int data_len;
0687     struct nfulnl_instance *inst;
0688     const struct nf_loginfo *li;
0689     unsigned int qthreshold;
0690     unsigned int plen = 0;
0691     struct nfnl_log_net *log = nfnl_log_pernet(net);
0692     const struct nfnl_ct_hook *nfnl_ct = NULL;
0693     struct nf_conn *ct = NULL;
0694     enum ip_conntrack_info ctinfo;
0695 
0696     if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
0697         li = li_user;
0698     else
0699         li = &default_loginfo;
0700 
0701     inst = instance_lookup_get(log, li->u.ulog.group);
0702     if (!inst)
0703         return;
0704 
0705     if (prefix)
0706         plen = strlen(prefix) + 1;
0707 
0708     /* FIXME: do we want to make the size calculation conditional based on
0709      * what is actually present?  way more branches and checks, but more
0710      * memory efficient... */
0711     size = nlmsg_total_size(sizeof(struct nfgenmsg))
0712         + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
0713         + nla_total_size(sizeof(u_int32_t)) /* ifindex */
0714         + nla_total_size(sizeof(u_int32_t)) /* ifindex */
0715 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
0716         + nla_total_size(sizeof(u_int32_t)) /* ifindex */
0717         + nla_total_size(sizeof(u_int32_t)) /* ifindex */
0718 #endif
0719         + nla_total_size(sizeof(u_int32_t)) /* mark */
0720         + nla_total_size(sizeof(u_int32_t)) /* uid */
0721         + nla_total_size(sizeof(u_int32_t)) /* gid */
0722         + nla_total_size(plen)          /* prefix */
0723         + nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
0724         + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp))
0725         + nla_total_size(sizeof(struct nfgenmsg));  /* NLMSG_DONE */
0726 
0727     if (in && skb_mac_header_was_set(skb)) {
0728         size += nla_total_size(skb->dev->hard_header_len)
0729             + nla_total_size(sizeof(u_int16_t)) /* hwtype */
0730             + nla_total_size(sizeof(u_int16_t));    /* hwlen */
0731     }
0732 
0733     spin_lock_bh(&inst->lock);
0734 
0735     if (inst->flags & NFULNL_CFG_F_SEQ)
0736         size += nla_total_size(sizeof(u_int32_t));
0737     if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
0738         size += nla_total_size(sizeof(u_int32_t));
0739 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
0740     if (inst->flags & NFULNL_CFG_F_CONNTRACK) {
0741         nfnl_ct = rcu_dereference(nfnl_ct_hook);
0742         if (nfnl_ct != NULL) {
0743             ct = nf_ct_get(skb, &ctinfo);
0744             if (ct != NULL)
0745                 size += nfnl_ct->build_size(ct);
0746         }
0747     }
0748 #endif
0749     if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE)
0750         size += nfulnl_get_bridge_size(skb);
0751 
0752     qthreshold = inst->qthreshold;
0753     /* per-rule qthreshold overrides per-instance */
0754     if (li->u.ulog.qthreshold)
0755         if (qthreshold > li->u.ulog.qthreshold)
0756             qthreshold = li->u.ulog.qthreshold;
0757 
0758 
0759     switch (inst->copy_mode) {
0760     case NFULNL_COPY_META:
0761     case NFULNL_COPY_NONE:
0762         data_len = 0;
0763         break;
0764 
0765     case NFULNL_COPY_PACKET:
0766         data_len = inst->copy_range;
0767         if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) &&
0768             (li->u.ulog.copy_len < data_len))
0769             data_len = li->u.ulog.copy_len;
0770 
0771         if (data_len > skb->len)
0772             data_len = skb->len;
0773 
0774         size += nla_total_size(data_len);
0775         break;
0776 
0777     case NFULNL_COPY_DISABLED:
0778     default:
0779         goto unlock_and_release;
0780     }
0781 
0782     if (inst->skb && size > skb_tailroom(inst->skb)) {
0783         /* either the queue len is too high or we don't have
0784          * enough room in the skb left. flush to userspace. */
0785         __nfulnl_flush(inst);
0786     }
0787 
0788     if (!inst->skb) {
0789         inst->skb = nfulnl_alloc_skb(net, inst->peer_portid,
0790                          inst->nlbufsiz, size);
0791         if (!inst->skb)
0792             goto alloc_failure;
0793     }
0794 
0795     inst->qlen++;
0796 
0797     __build_packet_message(log, inst, skb, data_len, pf,
0798                 hooknum, in, out, prefix, plen,
0799                 nfnl_ct, ct, ctinfo);
0800 
0801     if (inst->qlen >= qthreshold)
0802         __nfulnl_flush(inst);
0803     /* timer_pending always called within inst->lock, so there
0804      * is no chance of a race here */
0805     else if (!timer_pending(&inst->timer)) {
0806         instance_get(inst);
0807         inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100);
0808         add_timer(&inst->timer);
0809     }
0810 
0811 unlock_and_release:
0812     spin_unlock_bh(&inst->lock);
0813     instance_put(inst);
0814     return;
0815 
0816 alloc_failure:
0817     /* FIXME: statistics */
0818     goto unlock_and_release;
0819 }
0820 
0821 static int
0822 nfulnl_rcv_nl_event(struct notifier_block *this,
0823            unsigned long event, void *ptr)
0824 {
0825     struct netlink_notify *n = ptr;
0826     struct nfnl_log_net *log = nfnl_log_pernet(n->net);
0827 
0828     if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
0829         int i;
0830 
0831         /* destroy all instances for this portid */
0832         spin_lock_bh(&log->instances_lock);
0833         for  (i = 0; i < INSTANCE_BUCKETS; i++) {
0834             struct hlist_node *t2;
0835             struct nfulnl_instance *inst;
0836             struct hlist_head *head = &log->instance_table[i];
0837 
0838             hlist_for_each_entry_safe(inst, t2, head, hlist) {
0839                 if (n->portid == inst->peer_portid)
0840                     __instance_destroy(inst);
0841             }
0842         }
0843         spin_unlock_bh(&log->instances_lock);
0844     }
0845     return NOTIFY_DONE;
0846 }
0847 
0848 static struct notifier_block nfulnl_rtnl_notifier = {
0849     .notifier_call  = nfulnl_rcv_nl_event,
0850 };
0851 
0852 static int nfulnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
0853                   const struct nlattr * const nfula[])
0854 {
0855     return -ENOTSUPP;
0856 }
0857 
0858 static struct nf_logger nfulnl_logger __read_mostly = {
0859     .name   = "nfnetlink_log",
0860     .type   = NF_LOG_TYPE_ULOG,
0861     .logfn  = nfulnl_log_packet,
0862     .me = THIS_MODULE,
0863 };
0864 
0865 static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
0866     [NFULA_CFG_CMD]     = { .len = sizeof(struct nfulnl_msg_config_cmd) },
0867     [NFULA_CFG_MODE]    = { .len = sizeof(struct nfulnl_msg_config_mode) },
0868     [NFULA_CFG_TIMEOUT] = { .type = NLA_U32 },
0869     [NFULA_CFG_QTHRESH] = { .type = NLA_U32 },
0870     [NFULA_CFG_NLBUFSIZ]    = { .type = NLA_U32 },
0871     [NFULA_CFG_FLAGS]   = { .type = NLA_U16 },
0872 };
0873 
0874 static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
0875                   const struct nlattr * const nfula[])
0876 {
0877     struct nfnl_log_net *log = nfnl_log_pernet(info->net);
0878     u_int16_t group_num = ntohs(info->nfmsg->res_id);
0879     struct nfulnl_msg_config_cmd *cmd = NULL;
0880     struct nfulnl_instance *inst;
0881     u16 flags = 0;
0882     int ret = 0;
0883 
0884     if (nfula[NFULA_CFG_CMD]) {
0885         u_int8_t pf = info->nfmsg->nfgen_family;
0886         cmd = nla_data(nfula[NFULA_CFG_CMD]);
0887 
0888         /* Commands without queue context */
0889         switch (cmd->command) {
0890         case NFULNL_CFG_CMD_PF_BIND:
0891             return nf_log_bind_pf(info->net, pf, &nfulnl_logger);
0892         case NFULNL_CFG_CMD_PF_UNBIND:
0893             nf_log_unbind_pf(info->net, pf);
0894             return 0;
0895         }
0896     }
0897 
0898     inst = instance_lookup_get(log, group_num);
0899     if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
0900         ret = -EPERM;
0901         goto out_put;
0902     }
0903 
0904     /* Check if we support these flags in first place, dependencies should
0905      * be there too not to break atomicity.
0906      */
0907     if (nfula[NFULA_CFG_FLAGS]) {
0908         flags = ntohs(nla_get_be16(nfula[NFULA_CFG_FLAGS]));
0909 
0910         if ((flags & NFULNL_CFG_F_CONNTRACK) &&
0911             !rcu_access_pointer(nfnl_ct_hook)) {
0912 #ifdef CONFIG_MODULES
0913             nfnl_unlock(NFNL_SUBSYS_ULOG);
0914             request_module("ip_conntrack_netlink");
0915             nfnl_lock(NFNL_SUBSYS_ULOG);
0916             if (rcu_access_pointer(nfnl_ct_hook)) {
0917                 ret = -EAGAIN;
0918                 goto out_put;
0919             }
0920 #endif
0921             ret = -EOPNOTSUPP;
0922             goto out_put;
0923         }
0924     }
0925 
0926     if (cmd != NULL) {
0927         switch (cmd->command) {
0928         case NFULNL_CFG_CMD_BIND:
0929             if (inst) {
0930                 ret = -EBUSY;
0931                 goto out_put;
0932             }
0933 
0934             inst = instance_create(info->net, group_num,
0935                            NETLINK_CB(skb).portid,
0936                            sk_user_ns(NETLINK_CB(skb).sk));
0937             if (IS_ERR(inst)) {
0938                 ret = PTR_ERR(inst);
0939                 goto out;
0940             }
0941             break;
0942         case NFULNL_CFG_CMD_UNBIND:
0943             if (!inst) {
0944                 ret = -ENODEV;
0945                 goto out;
0946             }
0947 
0948             instance_destroy(log, inst);
0949             goto out_put;
0950         default:
0951             ret = -ENOTSUPP;
0952             goto out_put;
0953         }
0954     } else if (!inst) {
0955         ret = -ENODEV;
0956         goto out;
0957     }
0958 
0959     if (nfula[NFULA_CFG_MODE]) {
0960         struct nfulnl_msg_config_mode *params =
0961             nla_data(nfula[NFULA_CFG_MODE]);
0962 
0963         nfulnl_set_mode(inst, params->copy_mode,
0964                 ntohl(params->copy_range));
0965     }
0966 
0967     if (nfula[NFULA_CFG_TIMEOUT]) {
0968         __be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
0969 
0970         nfulnl_set_timeout(inst, ntohl(timeout));
0971     }
0972 
0973     if (nfula[NFULA_CFG_NLBUFSIZ]) {
0974         __be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
0975 
0976         nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
0977     }
0978 
0979     if (nfula[NFULA_CFG_QTHRESH]) {
0980         __be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
0981 
0982         nfulnl_set_qthresh(inst, ntohl(qthresh));
0983     }
0984 
0985     if (nfula[NFULA_CFG_FLAGS])
0986         nfulnl_set_flags(inst, flags);
0987 
0988 out_put:
0989     instance_put(inst);
0990 out:
0991     return ret;
0992 }
0993 
0994 static const struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = {
0995     [NFULNL_MSG_PACKET] = {
0996         .call       = nfulnl_recv_unsupp,
0997         .type       = NFNL_CB_MUTEX,
0998         .attr_count = NFULA_MAX,
0999     },
1000     [NFULNL_MSG_CONFIG] = {
1001         .call       = nfulnl_recv_config,
1002         .type       = NFNL_CB_MUTEX,
1003         .attr_count = NFULA_CFG_MAX,
1004         .policy     = nfula_cfg_policy
1005     },
1006 };
1007 
1008 static const struct nfnetlink_subsystem nfulnl_subsys = {
1009     .name       = "log",
1010     .subsys_id  = NFNL_SUBSYS_ULOG,
1011     .cb_count   = NFULNL_MSG_MAX,
1012     .cb     = nfulnl_cb,
1013 };
1014 
1015 #ifdef CONFIG_PROC_FS
1016 struct iter_state {
1017     struct seq_net_private p;
1018     unsigned int bucket;
1019 };
1020 
1021 static struct hlist_node *get_first(struct net *net, struct iter_state *st)
1022 {
1023     struct nfnl_log_net *log;
1024     if (!st)
1025         return NULL;
1026 
1027     log = nfnl_log_pernet(net);
1028 
1029     for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
1030         struct hlist_head *head = &log->instance_table[st->bucket];
1031 
1032         if (!hlist_empty(head))
1033             return rcu_dereference_bh(hlist_first_rcu(head));
1034     }
1035     return NULL;
1036 }
1037 
1038 static struct hlist_node *get_next(struct net *net, struct iter_state *st,
1039                    struct hlist_node *h)
1040 {
1041     h = rcu_dereference_bh(hlist_next_rcu(h));
1042     while (!h) {
1043         struct nfnl_log_net *log;
1044         struct hlist_head *head;
1045 
1046         if (++st->bucket >= INSTANCE_BUCKETS)
1047             return NULL;
1048 
1049         log = nfnl_log_pernet(net);
1050         head = &log->instance_table[st->bucket];
1051         h = rcu_dereference_bh(hlist_first_rcu(head));
1052     }
1053     return h;
1054 }
1055 
1056 static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
1057                   loff_t pos)
1058 {
1059     struct hlist_node *head;
1060     head = get_first(net, st);
1061 
1062     if (head)
1063         while (pos && (head = get_next(net, st, head)))
1064             pos--;
1065     return pos ? NULL : head;
1066 }
1067 
1068 static void *seq_start(struct seq_file *s, loff_t *pos)
1069     __acquires(rcu_bh)
1070 {
1071     rcu_read_lock_bh();
1072     return get_idx(seq_file_net(s), s->private, *pos);
1073 }
1074 
1075 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
1076 {
1077     (*pos)++;
1078     return get_next(seq_file_net(s), s->private, v);
1079 }
1080 
1081 static void seq_stop(struct seq_file *s, void *v)
1082     __releases(rcu_bh)
1083 {
1084     rcu_read_unlock_bh();
1085 }
1086 
1087 static int seq_show(struct seq_file *s, void *v)
1088 {
1089     const struct nfulnl_instance *inst = v;
1090 
1091     seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n",
1092            inst->group_num,
1093            inst->peer_portid, inst->qlen,
1094            inst->copy_mode, inst->copy_range,
1095            inst->flushtimeout, refcount_read(&inst->use));
1096 
1097     return 0;
1098 }
1099 
1100 static const struct seq_operations nful_seq_ops = {
1101     .start  = seq_start,
1102     .next   = seq_next,
1103     .stop   = seq_stop,
1104     .show   = seq_show,
1105 };
1106 #endif /* PROC_FS */
1107 
1108 static int __net_init nfnl_log_net_init(struct net *net)
1109 {
1110     unsigned int i;
1111     struct nfnl_log_net *log = nfnl_log_pernet(net);
1112 #ifdef CONFIG_PROC_FS
1113     struct proc_dir_entry *proc;
1114     kuid_t root_uid;
1115     kgid_t root_gid;
1116 #endif
1117 
1118     for (i = 0; i < INSTANCE_BUCKETS; i++)
1119         INIT_HLIST_HEAD(&log->instance_table[i]);
1120     spin_lock_init(&log->instances_lock);
1121 
1122 #ifdef CONFIG_PROC_FS
1123     proc = proc_create_net("nfnetlink_log", 0440, net->nf.proc_netfilter,
1124             &nful_seq_ops, sizeof(struct iter_state));
1125     if (!proc)
1126         return -ENOMEM;
1127 
1128     root_uid = make_kuid(net->user_ns, 0);
1129     root_gid = make_kgid(net->user_ns, 0);
1130     if (uid_valid(root_uid) && gid_valid(root_gid))
1131         proc_set_user(proc, root_uid, root_gid);
1132 #endif
1133     return 0;
1134 }
1135 
1136 static void __net_exit nfnl_log_net_exit(struct net *net)
1137 {
1138     struct nfnl_log_net *log = nfnl_log_pernet(net);
1139     unsigned int i;
1140 
1141 #ifdef CONFIG_PROC_FS
1142     remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
1143 #endif
1144     nf_log_unset(net, &nfulnl_logger);
1145     for (i = 0; i < INSTANCE_BUCKETS; i++)
1146         WARN_ON_ONCE(!hlist_empty(&log->instance_table[i]));
1147 }
1148 
1149 static struct pernet_operations nfnl_log_net_ops = {
1150     .init   = nfnl_log_net_init,
1151     .exit   = nfnl_log_net_exit,
1152     .id = &nfnl_log_net_id,
1153     .size   = sizeof(struct nfnl_log_net),
1154 };
1155 
1156 static int __init nfnetlink_log_init(void)
1157 {
1158     int status;
1159 
1160     status = register_pernet_subsys(&nfnl_log_net_ops);
1161     if (status < 0) {
1162         pr_err("failed to register pernet ops\n");
1163         goto out;
1164     }
1165 
1166     netlink_register_notifier(&nfulnl_rtnl_notifier);
1167     status = nfnetlink_subsys_register(&nfulnl_subsys);
1168     if (status < 0) {
1169         pr_err("failed to create netlink socket\n");
1170         goto cleanup_netlink_notifier;
1171     }
1172 
1173     status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
1174     if (status < 0) {
1175         pr_err("failed to register logger\n");
1176         goto cleanup_subsys;
1177     }
1178 
1179     return status;
1180 
1181 cleanup_subsys:
1182     nfnetlink_subsys_unregister(&nfulnl_subsys);
1183 cleanup_netlink_notifier:
1184     netlink_unregister_notifier(&nfulnl_rtnl_notifier);
1185     unregister_pernet_subsys(&nfnl_log_net_ops);
1186 out:
1187     return status;
1188 }
1189 
1190 static void __exit nfnetlink_log_fini(void)
1191 {
1192     nfnetlink_subsys_unregister(&nfulnl_subsys);
1193     netlink_unregister_notifier(&nfulnl_rtnl_notifier);
1194     unregister_pernet_subsys(&nfnl_log_net_ops);
1195     nf_log_unregister(&nfulnl_logger);
1196 }
1197 
1198 MODULE_DESCRIPTION("netfilter userspace logging");
1199 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
1200 MODULE_LICENSE("GPL");
1201 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ULOG);
1202 MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
1203 MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
1204 MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
1205 MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
1206 MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */
1207 
1208 module_init(nfnetlink_log_init);
1209 module_exit(nfnetlink_log_fini);