Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Monitoring code for network dropped packet alerts
0004  *
0005  * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
0006  */
0007 
0008 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0009 
0010 #include <linux/netdevice.h>
0011 #include <linux/etherdevice.h>
0012 #include <linux/string.h>
0013 #include <linux/if_arp.h>
0014 #include <linux/inetdevice.h>
0015 #include <linux/inet.h>
0016 #include <linux/interrupt.h>
0017 #include <linux/netpoll.h>
0018 #include <linux/sched.h>
0019 #include <linux/delay.h>
0020 #include <linux/types.h>
0021 #include <linux/workqueue.h>
0022 #include <linux/netlink.h>
0023 #include <linux/net_dropmon.h>
0024 #include <linux/percpu.h>
0025 #include <linux/timer.h>
0026 #include <linux/bitops.h>
0027 #include <linux/slab.h>
0028 #include <linux/module.h>
0029 #include <net/genetlink.h>
0030 #include <net/netevent.h>
0031 #include <net/flow_offload.h>
0032 #include <net/devlink.h>
0033 
0034 #include <trace/events/skb.h>
0035 #include <trace/events/napi.h>
0036 #include <trace/events/devlink.h>
0037 
0038 #include <asm/unaligned.h>
0039 
0040 #define TRACE_ON 1
0041 #define TRACE_OFF 0
0042 
0043 /*
0044  * Globals, our netlink socket pointer
0045  * and the work handle that will send up
0046  * netlink alerts
0047  */
0048 static int trace_state = TRACE_OFF;
0049 static bool monitor_hw;
0050 
0051 /* net_dm_mutex
0052  *
0053  * An overall lock guarding every operation coming from userspace.
0054  */
0055 static DEFINE_MUTEX(net_dm_mutex);
0056 
0057 struct net_dm_stats {
0058     u64_stats_t dropped;
0059     struct u64_stats_sync syncp;
0060 };
0061 
0062 #define NET_DM_MAX_HW_TRAP_NAME_LEN 40
0063 
0064 struct net_dm_hw_entry {
0065     char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
0066     u32 count;
0067 };
0068 
0069 struct net_dm_hw_entries {
0070     u32 num_entries;
0071     struct net_dm_hw_entry entries[];
0072 };
0073 
0074 struct per_cpu_dm_data {
0075     spinlock_t      lock;   /* Protects 'skb', 'hw_entries' and
0076                      * 'send_timer'
0077                      */
0078     union {
0079         struct sk_buff          *skb;
0080         struct net_dm_hw_entries    *hw_entries;
0081     };
0082     struct sk_buff_head drop_queue;
0083     struct work_struct  dm_alert_work;
0084     struct timer_list   send_timer;
0085     struct net_dm_stats stats;
0086 };
0087 
0088 struct dm_hw_stat_delta {
0089     unsigned long last_rx;
0090     unsigned long last_drop_val;
0091     struct rcu_head rcu;
0092 };
0093 
0094 static struct genl_family net_drop_monitor_family;
0095 
0096 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
0097 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
0098 
0099 static int dm_hit_limit = 64;
0100 static int dm_delay = 1;
0101 static unsigned long dm_hw_check_delta = 2*HZ;
0102 
0103 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
0104 static u32 net_dm_trunc_len;
0105 static u32 net_dm_queue_len = 1000;
0106 
0107 struct net_dm_alert_ops {
0108     void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
0109                 void *location,
0110                 enum skb_drop_reason reason);
0111     void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
0112                 int work, int budget);
0113     void (*work_item_func)(struct work_struct *work);
0114     void (*hw_work_item_func)(struct work_struct *work);
0115     void (*hw_trap_probe)(void *ignore, const struct devlink *devlink,
0116                   struct sk_buff *skb,
0117                   const struct devlink_trap_metadata *metadata);
0118 };
0119 
0120 struct net_dm_skb_cb {
0121     union {
0122         struct devlink_trap_metadata *hw_metadata;
0123         void *pc;
0124     };
0125     enum skb_drop_reason reason;
0126 };
0127 
0128 #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
0129 
0130 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
0131 {
0132     size_t al;
0133     struct net_dm_alert_msg *msg;
0134     struct nlattr *nla;
0135     struct sk_buff *skb;
0136     unsigned long flags;
0137     void *msg_header;
0138 
0139     al = sizeof(struct net_dm_alert_msg);
0140     al += dm_hit_limit * sizeof(struct net_dm_drop_point);
0141     al += sizeof(struct nlattr);
0142 
0143     skb = genlmsg_new(al, GFP_KERNEL);
0144 
0145     if (!skb)
0146         goto err;
0147 
0148     msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family,
0149                  0, NET_DM_CMD_ALERT);
0150     if (!msg_header) {
0151         nlmsg_free(skb);
0152         skb = NULL;
0153         goto err;
0154     }
0155     nla = nla_reserve(skb, NLA_UNSPEC,
0156               sizeof(struct net_dm_alert_msg));
0157     if (!nla) {
0158         nlmsg_free(skb);
0159         skb = NULL;
0160         goto err;
0161     }
0162     msg = nla_data(nla);
0163     memset(msg, 0, al);
0164     goto out;
0165 
0166 err:
0167     mod_timer(&data->send_timer, jiffies + HZ / 10);
0168 out:
0169     spin_lock_irqsave(&data->lock, flags);
0170     swap(data->skb, skb);
0171     spin_unlock_irqrestore(&data->lock, flags);
0172 
0173     if (skb) {
0174         struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
0175         struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh);
0176 
0177         genlmsg_end(skb, genlmsg_data(gnlh));
0178     }
0179 
0180     return skb;
0181 }
0182 
0183 static const struct genl_multicast_group dropmon_mcgrps[] = {
0184     { .name = "events", },
0185 };
0186 
0187 static void send_dm_alert(struct work_struct *work)
0188 {
0189     struct sk_buff *skb;
0190     struct per_cpu_dm_data *data;
0191 
0192     data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
0193 
0194     skb = reset_per_cpu_data(data);
0195 
0196     if (skb)
0197         genlmsg_multicast(&net_drop_monitor_family, skb, 0,
0198                   0, GFP_KERNEL);
0199 }
0200 
0201 /*
0202  * This is the timer function to delay the sending of an alert
0203  * in the event that more drops will arrive during the
0204  * hysteresis period.
0205  */
0206 static void sched_send_work(struct timer_list *t)
0207 {
0208     struct per_cpu_dm_data *data = from_timer(data, t, send_timer);
0209 
0210     schedule_work(&data->dm_alert_work);
0211 }
0212 
0213 static void trace_drop_common(struct sk_buff *skb, void *location)
0214 {
0215     struct net_dm_alert_msg *msg;
0216     struct net_dm_drop_point *point;
0217     struct nlmsghdr *nlh;
0218     struct nlattr *nla;
0219     int i;
0220     struct sk_buff *dskb;
0221     struct per_cpu_dm_data *data;
0222     unsigned long flags;
0223 
0224     local_irq_save(flags);
0225     data = this_cpu_ptr(&dm_cpu_data);
0226     spin_lock(&data->lock);
0227     dskb = data->skb;
0228 
0229     if (!dskb)
0230         goto out;
0231 
0232     nlh = (struct nlmsghdr *)dskb->data;
0233     nla = genlmsg_data(nlmsg_data(nlh));
0234     msg = nla_data(nla);
0235     point = msg->points;
0236     for (i = 0; i < msg->entries; i++) {
0237         if (!memcmp(&location, &point->pc, sizeof(void *))) {
0238             point->count++;
0239             goto out;
0240         }
0241         point++;
0242     }
0243     if (msg->entries == dm_hit_limit)
0244         goto out;
0245     /*
0246      * We need to create a new entry
0247      */
0248     __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point));
0249     nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
0250     memcpy(point->pc, &location, sizeof(void *));
0251     point->count = 1;
0252     msg->entries++;
0253 
0254     if (!timer_pending(&data->send_timer)) {
0255         data->send_timer.expires = jiffies + dm_delay * HZ;
0256         add_timer(&data->send_timer);
0257     }
0258 
0259 out:
0260     spin_unlock_irqrestore(&data->lock, flags);
0261 }
0262 
0263 static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
0264                 void *location,
0265                 enum skb_drop_reason reason)
0266 {
0267     trace_drop_common(skb, location);
0268 }
0269 
0270 static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
0271                 int work, int budget)
0272 {
0273     struct net_device *dev = napi->dev;
0274     struct dm_hw_stat_delta *stat;
0275     /*
0276      * Don't check napi structures with no associated device
0277      */
0278     if (!dev)
0279         return;
0280 
0281     rcu_read_lock();
0282     stat = rcu_dereference(dev->dm_private);
0283     if (stat) {
0284         /*
0285          * only add a note to our monitor buffer if:
0286          * 1) its after the last_rx delta
0287          * 2) our rx_dropped count has gone up
0288          */
0289         if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
0290             (dev->stats.rx_dropped != stat->last_drop_val)) {
0291             trace_drop_common(NULL, NULL);
0292             stat->last_drop_val = dev->stats.rx_dropped;
0293             stat->last_rx = jiffies;
0294         }
0295     }
0296     rcu_read_unlock();
0297 }
0298 
0299 static struct net_dm_hw_entries *
0300 net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
0301 {
0302     struct net_dm_hw_entries *hw_entries;
0303     unsigned long flags;
0304 
0305     hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
0306                  GFP_KERNEL);
0307     if (!hw_entries) {
0308         /* If the memory allocation failed, we try to perform another
0309          * allocation in 1/10 second. Otherwise, the probe function
0310          * will constantly bail out.
0311          */
0312         mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
0313     }
0314 
0315     spin_lock_irqsave(&hw_data->lock, flags);
0316     swap(hw_data->hw_entries, hw_entries);
0317     spin_unlock_irqrestore(&hw_data->lock, flags);
0318 
0319     return hw_entries;
0320 }
0321 
0322 static int net_dm_hw_entry_put(struct sk_buff *msg,
0323                    const struct net_dm_hw_entry *hw_entry)
0324 {
0325     struct nlattr *attr;
0326 
0327     attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
0328     if (!attr)
0329         return -EMSGSIZE;
0330 
0331     if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
0332         goto nla_put_failure;
0333 
0334     if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
0335         goto nla_put_failure;
0336 
0337     nla_nest_end(msg, attr);
0338 
0339     return 0;
0340 
0341 nla_put_failure:
0342     nla_nest_cancel(msg, attr);
0343     return -EMSGSIZE;
0344 }
0345 
0346 static int net_dm_hw_entries_put(struct sk_buff *msg,
0347                  const struct net_dm_hw_entries *hw_entries)
0348 {
0349     struct nlattr *attr;
0350     int i;
0351 
0352     attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
0353     if (!attr)
0354         return -EMSGSIZE;
0355 
0356     for (i = 0; i < hw_entries->num_entries; i++) {
0357         int rc;
0358 
0359         rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
0360         if (rc)
0361             goto nla_put_failure;
0362     }
0363 
0364     nla_nest_end(msg, attr);
0365 
0366     return 0;
0367 
0368 nla_put_failure:
0369     nla_nest_cancel(msg, attr);
0370     return -EMSGSIZE;
0371 }
0372 
0373 static int
0374 net_dm_hw_summary_report_fill(struct sk_buff *msg,
0375                   const struct net_dm_hw_entries *hw_entries)
0376 {
0377     struct net_dm_alert_msg anc_hdr = { 0 };
0378     void *hdr;
0379     int rc;
0380 
0381     hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
0382               NET_DM_CMD_ALERT);
0383     if (!hdr)
0384         return -EMSGSIZE;
0385 
0386     /* We need to put the ancillary header in order not to break user
0387      * space.
0388      */
0389     if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
0390         goto nla_put_failure;
0391 
0392     rc = net_dm_hw_entries_put(msg, hw_entries);
0393     if (rc)
0394         goto nla_put_failure;
0395 
0396     genlmsg_end(msg, hdr);
0397 
0398     return 0;
0399 
0400 nla_put_failure:
0401     genlmsg_cancel(msg, hdr);
0402     return -EMSGSIZE;
0403 }
0404 
0405 static void net_dm_hw_summary_work(struct work_struct *work)
0406 {
0407     struct net_dm_hw_entries *hw_entries;
0408     struct per_cpu_dm_data *hw_data;
0409     struct sk_buff *msg;
0410     int rc;
0411 
0412     hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
0413 
0414     hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
0415     if (!hw_entries)
0416         return;
0417 
0418     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
0419     if (!msg)
0420         goto out;
0421 
0422     rc = net_dm_hw_summary_report_fill(msg, hw_entries);
0423     if (rc) {
0424         nlmsg_free(msg);
0425         goto out;
0426     }
0427 
0428     genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
0429 
0430 out:
0431     kfree(hw_entries);
0432 }
0433 
0434 static void
0435 net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink,
0436                  struct sk_buff *skb,
0437                  const struct devlink_trap_metadata *metadata)
0438 {
0439     struct net_dm_hw_entries *hw_entries;
0440     struct net_dm_hw_entry *hw_entry;
0441     struct per_cpu_dm_data *hw_data;
0442     unsigned long flags;
0443     int i;
0444 
0445     if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL)
0446         return;
0447 
0448     hw_data = this_cpu_ptr(&dm_hw_cpu_data);
0449     spin_lock_irqsave(&hw_data->lock, flags);
0450     hw_entries = hw_data->hw_entries;
0451 
0452     if (!hw_entries)
0453         goto out;
0454 
0455     for (i = 0; i < hw_entries->num_entries; i++) {
0456         hw_entry = &hw_entries->entries[i];
0457         if (!strncmp(hw_entry->trap_name, metadata->trap_name,
0458                  NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
0459             hw_entry->count++;
0460             goto out;
0461         }
0462     }
0463     if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
0464         goto out;
0465 
0466     hw_entry = &hw_entries->entries[hw_entries->num_entries];
0467     strlcpy(hw_entry->trap_name, metadata->trap_name,
0468         NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
0469     hw_entry->count = 1;
0470     hw_entries->num_entries++;
0471 
0472     if (!timer_pending(&hw_data->send_timer)) {
0473         hw_data->send_timer.expires = jiffies + dm_delay * HZ;
0474         add_timer(&hw_data->send_timer);
0475     }
0476 
0477 out:
0478     spin_unlock_irqrestore(&hw_data->lock, flags);
0479 }
0480 
0481 static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
0482     .kfree_skb_probe    = trace_kfree_skb_hit,
0483     .napi_poll_probe    = trace_napi_poll_hit,
0484     .work_item_func     = send_dm_alert,
0485     .hw_work_item_func  = net_dm_hw_summary_work,
0486     .hw_trap_probe      = net_dm_hw_trap_summary_probe,
0487 };
0488 
0489 static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
0490                           struct sk_buff *skb,
0491                           void *location,
0492                           enum skb_drop_reason reason)
0493 {
0494     ktime_t tstamp = ktime_get_real();
0495     struct per_cpu_dm_data *data;
0496     struct net_dm_skb_cb *cb;
0497     struct sk_buff *nskb;
0498     unsigned long flags;
0499 
0500     if (!skb_mac_header_was_set(skb))
0501         return;
0502 
0503     nskb = skb_clone(skb, GFP_ATOMIC);
0504     if (!nskb)
0505         return;
0506 
0507     if (unlikely(reason >= SKB_DROP_REASON_MAX || reason <= 0))
0508         reason = SKB_DROP_REASON_NOT_SPECIFIED;
0509     cb = NET_DM_SKB_CB(nskb);
0510     cb->reason = reason;
0511     cb->pc = location;
0512     /* Override the timestamp because we care about the time when the
0513      * packet was dropped.
0514      */
0515     nskb->tstamp = tstamp;
0516 
0517     data = this_cpu_ptr(&dm_cpu_data);
0518 
0519     spin_lock_irqsave(&data->drop_queue.lock, flags);
0520     if (skb_queue_len(&data->drop_queue) < net_dm_queue_len)
0521         __skb_queue_tail(&data->drop_queue, nskb);
0522     else
0523         goto unlock_free;
0524     spin_unlock_irqrestore(&data->drop_queue.lock, flags);
0525 
0526     schedule_work(&data->dm_alert_work);
0527 
0528     return;
0529 
0530 unlock_free:
0531     spin_unlock_irqrestore(&data->drop_queue.lock, flags);
0532     u64_stats_update_begin(&data->stats.syncp);
0533     u64_stats_inc(&data->stats.dropped);
0534     u64_stats_update_end(&data->stats.syncp);
0535     consume_skb(nskb);
0536 }
0537 
0538 static void net_dm_packet_trace_napi_poll_hit(void *ignore,
0539                           struct napi_struct *napi,
0540                           int work, int budget)
0541 {
0542 }
0543 
0544 static size_t net_dm_in_port_size(void)
0545 {
0546            /* NET_DM_ATTR_IN_PORT nest */
0547     return nla_total_size(0) +
0548            /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
0549            nla_total_size(sizeof(u32)) +
0550            /* NET_DM_ATTR_PORT_NETDEV_NAME */
0551            nla_total_size(IFNAMSIZ + 1);
0552 }
0553 
0554 #define NET_DM_MAX_SYMBOL_LEN 40
0555 
0556 static size_t net_dm_packet_report_size(size_t payload_len,
0557                     enum skb_drop_reason reason)
0558 {
0559     size_t size;
0560 
0561     size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
0562 
0563     return NLMSG_ALIGN(size) +
0564            /* NET_DM_ATTR_ORIGIN */
0565            nla_total_size(sizeof(u16)) +
0566            /* NET_DM_ATTR_PC */
0567            nla_total_size(sizeof(u64)) +
0568            /* NET_DM_ATTR_SYMBOL */
0569            nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
0570            /* NET_DM_ATTR_IN_PORT */
0571            net_dm_in_port_size() +
0572            /* NET_DM_ATTR_TIMESTAMP */
0573            nla_total_size(sizeof(u64)) +
0574            /* NET_DM_ATTR_ORIG_LEN */
0575            nla_total_size(sizeof(u32)) +
0576            /* NET_DM_ATTR_PROTO */
0577            nla_total_size(sizeof(u16)) +
0578            /* NET_DM_ATTR_REASON */
0579            nla_total_size(strlen(drop_reasons[reason]) + 1) +
0580            /* NET_DM_ATTR_PAYLOAD */
0581            nla_total_size(payload_len);
0582 }
0583 
0584 static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
0585                         const char *name)
0586 {
0587     struct nlattr *attr;
0588 
0589     attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT);
0590     if (!attr)
0591         return -EMSGSIZE;
0592 
0593     if (ifindex &&
0594         nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
0595         goto nla_put_failure;
0596 
0597     if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
0598         goto nla_put_failure;
0599 
0600     nla_nest_end(msg, attr);
0601 
0602     return 0;
0603 
0604 nla_put_failure:
0605     nla_nest_cancel(msg, attr);
0606     return -EMSGSIZE;
0607 }
0608 
0609 static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
0610                      size_t payload_len)
0611 {
0612     struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb);
0613     char buf[NET_DM_MAX_SYMBOL_LEN];
0614     struct nlattr *attr;
0615     void *hdr;
0616     int rc;
0617 
0618     hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
0619               NET_DM_CMD_PACKET_ALERT);
0620     if (!hdr)
0621         return -EMSGSIZE;
0622 
0623     if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
0624         goto nla_put_failure;
0625 
0626     if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc,
0627                   NET_DM_ATTR_PAD))
0628         goto nla_put_failure;
0629 
0630     if (nla_put_string(msg, NET_DM_ATTR_REASON,
0631                drop_reasons[cb->reason]))
0632         goto nla_put_failure;
0633 
0634     snprintf(buf, sizeof(buf), "%pS", cb->pc);
0635     if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
0636         goto nla_put_failure;
0637 
0638     rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
0639     if (rc)
0640         goto nla_put_failure;
0641 
0642     if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
0643                   ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
0644         goto nla_put_failure;
0645 
0646     if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
0647         goto nla_put_failure;
0648 
0649     if (!payload_len)
0650         goto out;
0651 
0652     if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
0653         goto nla_put_failure;
0654 
0655     attr = skb_put(msg, nla_total_size(payload_len));
0656     attr->nla_type = NET_DM_ATTR_PAYLOAD;
0657     attr->nla_len = nla_attr_size(payload_len);
0658     if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
0659         goto nla_put_failure;
0660 
0661 out:
0662     genlmsg_end(msg, hdr);
0663 
0664     return 0;
0665 
0666 nla_put_failure:
0667     genlmsg_cancel(msg, hdr);
0668     return -EMSGSIZE;
0669 }
0670 
0671 #define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
0672 
0673 static void net_dm_packet_report(struct sk_buff *skb)
0674 {
0675     struct sk_buff *msg;
0676     size_t payload_len;
0677     int rc;
0678 
0679     /* Make sure we start copying the packet from the MAC header */
0680     if (skb->data > skb_mac_header(skb))
0681         skb_push(skb, skb->data - skb_mac_header(skb));
0682     else
0683         skb_pull(skb, skb_mac_header(skb) - skb->data);
0684 
0685     /* Ensure packet fits inside a single netlink attribute */
0686     payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
0687     if (net_dm_trunc_len)
0688         payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
0689 
0690     msg = nlmsg_new(net_dm_packet_report_size(payload_len,
0691                           NET_DM_SKB_CB(skb)->reason),
0692             GFP_KERNEL);
0693     if (!msg)
0694         goto out;
0695 
0696     rc = net_dm_packet_report_fill(msg, skb, payload_len);
0697     if (rc) {
0698         nlmsg_free(msg);
0699         goto out;
0700     }
0701 
0702     genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
0703 
0704 out:
0705     consume_skb(skb);
0706 }
0707 
0708 static void net_dm_packet_work(struct work_struct *work)
0709 {
0710     struct per_cpu_dm_data *data;
0711     struct sk_buff_head list;
0712     struct sk_buff *skb;
0713     unsigned long flags;
0714 
0715     data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
0716 
0717     __skb_queue_head_init(&list);
0718 
0719     spin_lock_irqsave(&data->drop_queue.lock, flags);
0720     skb_queue_splice_tail_init(&data->drop_queue, &list);
0721     spin_unlock_irqrestore(&data->drop_queue.lock, flags);
0722 
0723     while ((skb = __skb_dequeue(&list)))
0724         net_dm_packet_report(skb);
0725 }
0726 
0727 static size_t
0728 net_dm_flow_action_cookie_size(const struct devlink_trap_metadata *hw_metadata)
0729 {
0730     return hw_metadata->fa_cookie ?
0731            nla_total_size(hw_metadata->fa_cookie->cookie_len) : 0;
0732 }
0733 
0734 static size_t
0735 net_dm_hw_packet_report_size(size_t payload_len,
0736                  const struct devlink_trap_metadata *hw_metadata)
0737 {
0738     size_t size;
0739 
0740     size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
0741 
0742     return NLMSG_ALIGN(size) +
0743            /* NET_DM_ATTR_ORIGIN */
0744            nla_total_size(sizeof(u16)) +
0745            /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
0746            nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
0747            /* NET_DM_ATTR_HW_TRAP_NAME */
0748            nla_total_size(strlen(hw_metadata->trap_name) + 1) +
0749            /* NET_DM_ATTR_IN_PORT */
0750            net_dm_in_port_size() +
0751            /* NET_DM_ATTR_FLOW_ACTION_COOKIE */
0752            net_dm_flow_action_cookie_size(hw_metadata) +
0753            /* NET_DM_ATTR_TIMESTAMP */
0754            nla_total_size(sizeof(u64)) +
0755            /* NET_DM_ATTR_ORIG_LEN */
0756            nla_total_size(sizeof(u32)) +
0757            /* NET_DM_ATTR_PROTO */
0758            nla_total_size(sizeof(u16)) +
0759            /* NET_DM_ATTR_PAYLOAD */
0760            nla_total_size(payload_len);
0761 }
0762 
0763 static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
0764                     struct sk_buff *skb, size_t payload_len)
0765 {
0766     struct devlink_trap_metadata *hw_metadata;
0767     struct nlattr *attr;
0768     void *hdr;
0769 
0770     hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
0771 
0772     hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
0773               NET_DM_CMD_PACKET_ALERT);
0774     if (!hdr)
0775         return -EMSGSIZE;
0776 
0777     if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
0778         goto nla_put_failure;
0779 
0780     if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
0781                hw_metadata->trap_group_name))
0782         goto nla_put_failure;
0783 
0784     if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
0785                hw_metadata->trap_name))
0786         goto nla_put_failure;
0787 
0788     if (hw_metadata->input_dev) {
0789         struct net_device *dev = hw_metadata->input_dev;
0790         int rc;
0791 
0792         rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
0793                               dev->name);
0794         if (rc)
0795             goto nla_put_failure;
0796     }
0797 
0798     if (hw_metadata->fa_cookie &&
0799         nla_put(msg, NET_DM_ATTR_FLOW_ACTION_COOKIE,
0800             hw_metadata->fa_cookie->cookie_len,
0801             hw_metadata->fa_cookie->cookie))
0802         goto nla_put_failure;
0803 
0804     if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
0805                   ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
0806         goto nla_put_failure;
0807 
0808     if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
0809         goto nla_put_failure;
0810 
0811     if (!payload_len)
0812         goto out;
0813 
0814     if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
0815         goto nla_put_failure;
0816 
0817     attr = skb_put(msg, nla_total_size(payload_len));
0818     attr->nla_type = NET_DM_ATTR_PAYLOAD;
0819     attr->nla_len = nla_attr_size(payload_len);
0820     if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
0821         goto nla_put_failure;
0822 
0823 out:
0824     genlmsg_end(msg, hdr);
0825 
0826     return 0;
0827 
0828 nla_put_failure:
0829     genlmsg_cancel(msg, hdr);
0830     return -EMSGSIZE;
0831 }
0832 
0833 static struct devlink_trap_metadata *
0834 net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
0835 {
0836     const struct flow_action_cookie *fa_cookie;
0837     struct devlink_trap_metadata *hw_metadata;
0838     const char *trap_group_name;
0839     const char *trap_name;
0840 
0841     hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC);
0842     if (!hw_metadata)
0843         return NULL;
0844 
0845     trap_group_name = kstrdup(metadata->trap_group_name, GFP_ATOMIC);
0846     if (!trap_group_name)
0847         goto free_hw_metadata;
0848     hw_metadata->trap_group_name = trap_group_name;
0849 
0850     trap_name = kstrdup(metadata->trap_name, GFP_ATOMIC);
0851     if (!trap_name)
0852         goto free_trap_group;
0853     hw_metadata->trap_name = trap_name;
0854 
0855     if (metadata->fa_cookie) {
0856         size_t cookie_size = sizeof(*fa_cookie) +
0857                      metadata->fa_cookie->cookie_len;
0858 
0859         fa_cookie = kmemdup(metadata->fa_cookie, cookie_size,
0860                     GFP_ATOMIC);
0861         if (!fa_cookie)
0862             goto free_trap_name;
0863         hw_metadata->fa_cookie = fa_cookie;
0864     }
0865 
0866     hw_metadata->input_dev = metadata->input_dev;
0867     netdev_hold(hw_metadata->input_dev, &hw_metadata->dev_tracker,
0868             GFP_ATOMIC);
0869 
0870     return hw_metadata;
0871 
0872 free_trap_name:
0873     kfree(trap_name);
0874 free_trap_group:
0875     kfree(trap_group_name);
0876 free_hw_metadata:
0877     kfree(hw_metadata);
0878     return NULL;
0879 }
0880 
0881 static void
0882 net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata)
0883 {
0884     netdev_put(hw_metadata->input_dev, &hw_metadata->dev_tracker);
0885     kfree(hw_metadata->fa_cookie);
0886     kfree(hw_metadata->trap_name);
0887     kfree(hw_metadata->trap_group_name);
0888     kfree(hw_metadata);
0889 }
0890 
0891 static void net_dm_hw_packet_report(struct sk_buff *skb)
0892 {
0893     struct devlink_trap_metadata *hw_metadata;
0894     struct sk_buff *msg;
0895     size_t payload_len;
0896     int rc;
0897 
0898     if (skb->data > skb_mac_header(skb))
0899         skb_push(skb, skb->data - skb_mac_header(skb));
0900     else
0901         skb_pull(skb, skb_mac_header(skb) - skb->data);
0902 
0903     payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
0904     if (net_dm_trunc_len)
0905         payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
0906 
0907     hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
0908     msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
0909             GFP_KERNEL);
0910     if (!msg)
0911         goto out;
0912 
0913     rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
0914     if (rc) {
0915         nlmsg_free(msg);
0916         goto out;
0917     }
0918 
0919     genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
0920 
0921 out:
0922     net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
0923     consume_skb(skb);
0924 }
0925 
0926 static void net_dm_hw_packet_work(struct work_struct *work)
0927 {
0928     struct per_cpu_dm_data *hw_data;
0929     struct sk_buff_head list;
0930     struct sk_buff *skb;
0931     unsigned long flags;
0932 
0933     hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
0934 
0935     __skb_queue_head_init(&list);
0936 
0937     spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
0938     skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
0939     spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
0940 
0941     while ((skb = __skb_dequeue(&list)))
0942         net_dm_hw_packet_report(skb);
0943 }
0944 
0945 static void
0946 net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink,
0947                 struct sk_buff *skb,
0948                 const struct devlink_trap_metadata *metadata)
0949 {
0950     struct devlink_trap_metadata *n_hw_metadata;
0951     ktime_t tstamp = ktime_get_real();
0952     struct per_cpu_dm_data *hw_data;
0953     struct sk_buff *nskb;
0954     unsigned long flags;
0955 
0956     if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL)
0957         return;
0958 
0959     if (!skb_mac_header_was_set(skb))
0960         return;
0961 
0962     nskb = skb_clone(skb, GFP_ATOMIC);
0963     if (!nskb)
0964         return;
0965 
0966     n_hw_metadata = net_dm_hw_metadata_copy(metadata);
0967     if (!n_hw_metadata)
0968         goto free;
0969 
0970     NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
0971     nskb->tstamp = tstamp;
0972 
0973     hw_data = this_cpu_ptr(&dm_hw_cpu_data);
0974 
0975     spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
0976     if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
0977         __skb_queue_tail(&hw_data->drop_queue, nskb);
0978     else
0979         goto unlock_free;
0980     spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
0981 
0982     schedule_work(&hw_data->dm_alert_work);
0983 
0984     return;
0985 
0986 unlock_free:
0987     spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
0988     u64_stats_update_begin(&hw_data->stats.syncp);
0989     u64_stats_inc(&hw_data->stats.dropped);
0990     u64_stats_update_end(&hw_data->stats.syncp);
0991     net_dm_hw_metadata_free(n_hw_metadata);
0992 free:
0993     consume_skb(nskb);
0994 }
0995 
0996 static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
0997     .kfree_skb_probe    = net_dm_packet_trace_kfree_skb_hit,
0998     .napi_poll_probe    = net_dm_packet_trace_napi_poll_hit,
0999     .work_item_func     = net_dm_packet_work,
1000     .hw_work_item_func  = net_dm_hw_packet_work,
1001     .hw_trap_probe      = net_dm_hw_trap_packet_probe,
1002 };
1003 
1004 static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
1005     [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops,
1006     [NET_DM_ALERT_MODE_PACKET]  = &net_dm_alert_packet_ops,
1007 };
1008 
1009 #if IS_ENABLED(CONFIG_NET_DEVLINK)
1010 static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops)
1011 {
1012     return register_trace_devlink_trap_report(ops->hw_trap_probe, NULL);
1013 }
1014 
1015 static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops)
1016 {
1017     unregister_trace_devlink_trap_report(ops->hw_trap_probe, NULL);
1018     tracepoint_synchronize_unregister();
1019 }
1020 #else
1021 static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops)
1022 {
1023     return -EOPNOTSUPP;
1024 }
1025 
1026 static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops)
1027 {
1028 }
1029 #endif
1030 
1031 static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
1032 {
1033     const struct net_dm_alert_ops *ops;
1034     int cpu, rc;
1035 
1036     if (monitor_hw) {
1037         NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
1038         return -EAGAIN;
1039     }
1040 
1041     ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1042 
1043     if (!try_module_get(THIS_MODULE)) {
1044         NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
1045         return -ENODEV;
1046     }
1047 
1048     for_each_possible_cpu(cpu) {
1049         struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1050         struct net_dm_hw_entries *hw_entries;
1051 
1052         INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
1053         timer_setup(&hw_data->send_timer, sched_send_work, 0);
1054         hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
1055         kfree(hw_entries);
1056     }
1057 
1058     rc = net_dm_hw_probe_register(ops);
1059     if (rc) {
1060         NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to devlink_trap_probe() tracepoint");
1061         goto err_module_put;
1062     }
1063 
1064     monitor_hw = true;
1065 
1066     return 0;
1067 
1068 err_module_put:
1069     for_each_possible_cpu(cpu) {
1070         struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1071         struct sk_buff *skb;
1072 
1073         del_timer_sync(&hw_data->send_timer);
1074         cancel_work_sync(&hw_data->dm_alert_work);
1075         while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
1076             struct devlink_trap_metadata *hw_metadata;
1077 
1078             hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
1079             net_dm_hw_metadata_free(hw_metadata);
1080             consume_skb(skb);
1081         }
1082     }
1083     module_put(THIS_MODULE);
1084     return rc;
1085 }
1086 
1087 static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
1088 {
1089     const struct net_dm_alert_ops *ops;
1090     int cpu;
1091 
1092     if (!monitor_hw) {
1093         NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
1094         return;
1095     }
1096 
1097     ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1098 
1099     monitor_hw = false;
1100 
1101     net_dm_hw_probe_unregister(ops);
1102 
1103     for_each_possible_cpu(cpu) {
1104         struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1105         struct sk_buff *skb;
1106 
1107         del_timer_sync(&hw_data->send_timer);
1108         cancel_work_sync(&hw_data->dm_alert_work);
1109         while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
1110             struct devlink_trap_metadata *hw_metadata;
1111 
1112             hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
1113             net_dm_hw_metadata_free(hw_metadata);
1114             consume_skb(skb);
1115         }
1116     }
1117 
1118     module_put(THIS_MODULE);
1119 }
1120 
1121 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
1122 {
1123     const struct net_dm_alert_ops *ops;
1124     int cpu, rc;
1125 
1126     ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1127 
1128     if (!try_module_get(THIS_MODULE)) {
1129         NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
1130         return -ENODEV;
1131     }
1132 
1133     for_each_possible_cpu(cpu) {
1134         struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1135         struct sk_buff *skb;
1136 
1137         INIT_WORK(&data->dm_alert_work, ops->work_item_func);
1138         timer_setup(&data->send_timer, sched_send_work, 0);
1139         /* Allocate a new per-CPU skb for the summary alert message and
1140          * free the old one which might contain stale data from
1141          * previous tracing.
1142          */
1143         skb = reset_per_cpu_data(data);
1144         consume_skb(skb);
1145     }
1146 
1147     rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1148     if (rc) {
1149         NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
1150         goto err_module_put;
1151     }
1152 
1153     rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
1154     if (rc) {
1155         NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
1156         goto err_unregister_trace;
1157     }
1158 
1159     return 0;
1160 
1161 err_unregister_trace:
1162     unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1163 err_module_put:
1164     for_each_possible_cpu(cpu) {
1165         struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1166         struct sk_buff *skb;
1167 
1168         del_timer_sync(&data->send_timer);
1169         cancel_work_sync(&data->dm_alert_work);
1170         while ((skb = __skb_dequeue(&data->drop_queue)))
1171             consume_skb(skb);
1172     }
1173     module_put(THIS_MODULE);
1174     return rc;
1175 }
1176 
1177 static void net_dm_trace_off_set(void)
1178 {
1179     const struct net_dm_alert_ops *ops;
1180     int cpu;
1181 
1182     ops = net_dm_alert_ops_arr[net_dm_alert_mode];
1183 
1184     unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
1185     unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
1186 
1187     tracepoint_synchronize_unregister();
1188 
1189     /* Make sure we do not send notifications to user space after request
1190      * to stop tracing returns.
1191      */
1192     for_each_possible_cpu(cpu) {
1193         struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1194         struct sk_buff *skb;
1195 
1196         del_timer_sync(&data->send_timer);
1197         cancel_work_sync(&data->dm_alert_work);
1198         while ((skb = __skb_dequeue(&data->drop_queue)))
1199             consume_skb(skb);
1200     }
1201 
1202     module_put(THIS_MODULE);
1203 }
1204 
1205 static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
1206 {
1207     int rc = 0;
1208 
1209     if (state == trace_state) {
1210         NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state");
1211         return -EAGAIN;
1212     }
1213 
1214     switch (state) {
1215     case TRACE_ON:
1216         rc = net_dm_trace_on_set(extack);
1217         break;
1218     case TRACE_OFF:
1219         net_dm_trace_off_set();
1220         break;
1221     default:
1222         rc = 1;
1223         break;
1224     }
1225 
1226     if (!rc)
1227         trace_state = state;
1228     else
1229         rc = -EINPROGRESS;
1230 
1231     return rc;
1232 }
1233 
1234 static bool net_dm_is_monitoring(void)
1235 {
1236     return trace_state == TRACE_ON || monitor_hw;
1237 }
1238 
1239 static int net_dm_alert_mode_get_from_info(struct genl_info *info,
1240                        enum net_dm_alert_mode *p_alert_mode)
1241 {
1242     u8 val;
1243 
1244     val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
1245 
1246     switch (val) {
1247     case NET_DM_ALERT_MODE_SUMMARY:
1248     case NET_DM_ALERT_MODE_PACKET:
1249         *p_alert_mode = val;
1250         break;
1251     default:
1252         return -EINVAL;
1253     }
1254 
1255     return 0;
1256 }
1257 
1258 static int net_dm_alert_mode_set(struct genl_info *info)
1259 {
1260     struct netlink_ext_ack *extack = info->extack;
1261     enum net_dm_alert_mode alert_mode;
1262     int rc;
1263 
1264     if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
1265         return 0;
1266 
1267     rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
1268     if (rc) {
1269         NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
1270         return -EINVAL;
1271     }
1272 
1273     net_dm_alert_mode = alert_mode;
1274 
1275     return 0;
1276 }
1277 
1278 static void net_dm_trunc_len_set(struct genl_info *info)
1279 {
1280     if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
1281         return;
1282 
1283     net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
1284 }
1285 
1286 static void net_dm_queue_len_set(struct genl_info *info)
1287 {
1288     if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
1289         return;
1290 
1291     net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]);
1292 }
1293 
1294 static int net_dm_cmd_config(struct sk_buff *skb,
1295             struct genl_info *info)
1296 {
1297     struct netlink_ext_ack *extack = info->extack;
1298     int rc;
1299 
1300     if (net_dm_is_monitoring()) {
1301         NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring");
1302         return -EBUSY;
1303     }
1304 
1305     rc = net_dm_alert_mode_set(info);
1306     if (rc)
1307         return rc;
1308 
1309     net_dm_trunc_len_set(info);
1310 
1311     net_dm_queue_len_set(info);
1312 
1313     return 0;
1314 }
1315 
1316 static int net_dm_monitor_start(bool set_sw, bool set_hw,
1317                 struct netlink_ext_ack *extack)
1318 {
1319     bool sw_set = false;
1320     int rc;
1321 
1322     if (set_sw) {
1323         rc = set_all_monitor_traces(TRACE_ON, extack);
1324         if (rc)
1325             return rc;
1326         sw_set = true;
1327     }
1328 
1329     if (set_hw) {
1330         rc = net_dm_hw_monitor_start(extack);
1331         if (rc)
1332             goto err_monitor_hw;
1333     }
1334 
1335     return 0;
1336 
1337 err_monitor_hw:
1338     if (sw_set)
1339         set_all_monitor_traces(TRACE_OFF, extack);
1340     return rc;
1341 }
1342 
1343 static void net_dm_monitor_stop(bool set_sw, bool set_hw,
1344                 struct netlink_ext_ack *extack)
1345 {
1346     if (set_hw)
1347         net_dm_hw_monitor_stop(extack);
1348     if (set_sw)
1349         set_all_monitor_traces(TRACE_OFF, extack);
1350 }
1351 
1352 static int net_dm_cmd_trace(struct sk_buff *skb,
1353             struct genl_info *info)
1354 {
1355     bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
1356     bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
1357     struct netlink_ext_ack *extack = info->extack;
1358 
1359     /* To maintain backward compatibility, we start / stop monitoring of
1360      * software drops if no flag is specified.
1361      */
1362     if (!set_sw && !set_hw)
1363         set_sw = true;
1364 
1365     switch (info->genlhdr->cmd) {
1366     case NET_DM_CMD_START:
1367         return net_dm_monitor_start(set_sw, set_hw, extack);
1368     case NET_DM_CMD_STOP:
1369         net_dm_monitor_stop(set_sw, set_hw, extack);
1370         return 0;
1371     }
1372 
1373     return -EOPNOTSUPP;
1374 }
1375 
1376 static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
1377 {
1378     void *hdr;
1379 
1380     hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
1381               &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW);
1382     if (!hdr)
1383         return -EMSGSIZE;
1384 
1385     if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode))
1386         goto nla_put_failure;
1387 
1388     if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
1389         goto nla_put_failure;
1390 
1391     if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len))
1392         goto nla_put_failure;
1393 
1394     genlmsg_end(msg, hdr);
1395 
1396     return 0;
1397 
1398 nla_put_failure:
1399     genlmsg_cancel(msg, hdr);
1400     return -EMSGSIZE;
1401 }
1402 
1403 static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
1404 {
1405     struct sk_buff *msg;
1406     int rc;
1407 
1408     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1409     if (!msg)
1410         return -ENOMEM;
1411 
1412     rc = net_dm_config_fill(msg, info);
1413     if (rc)
1414         goto free_msg;
1415 
1416     return genlmsg_reply(msg, info);
1417 
1418 free_msg:
1419     nlmsg_free(msg);
1420     return rc;
1421 }
1422 
1423 static void net_dm_stats_read(struct net_dm_stats *stats)
1424 {
1425     int cpu;
1426 
1427     memset(stats, 0, sizeof(*stats));
1428     for_each_possible_cpu(cpu) {
1429         struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
1430         struct net_dm_stats *cpu_stats = &data->stats;
1431         unsigned int start;
1432         u64 dropped;
1433 
1434         do {
1435             start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
1436             dropped = u64_stats_read(&cpu_stats->dropped);
1437         } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
1438 
1439         u64_stats_add(&stats->dropped, dropped);
1440     }
1441 }
1442 
1443 static int net_dm_stats_put(struct sk_buff *msg)
1444 {
1445     struct net_dm_stats stats;
1446     struct nlattr *attr;
1447 
1448     net_dm_stats_read(&stats);
1449 
1450     attr = nla_nest_start(msg, NET_DM_ATTR_STATS);
1451     if (!attr)
1452         return -EMSGSIZE;
1453 
1454     if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
1455                   u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD))
1456         goto nla_put_failure;
1457 
1458     nla_nest_end(msg, attr);
1459 
1460     return 0;
1461 
1462 nla_put_failure:
1463     nla_nest_cancel(msg, attr);
1464     return -EMSGSIZE;
1465 }
1466 
1467 static void net_dm_hw_stats_read(struct net_dm_stats *stats)
1468 {
1469     int cpu;
1470 
1471     memset(stats, 0, sizeof(*stats));
1472     for_each_possible_cpu(cpu) {
1473         struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1474         struct net_dm_stats *cpu_stats = &hw_data->stats;
1475         unsigned int start;
1476         u64 dropped;
1477 
1478         do {
1479             start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
1480             dropped = u64_stats_read(&cpu_stats->dropped);
1481         } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
1482 
1483         u64_stats_add(&stats->dropped, dropped);
1484     }
1485 }
1486 
1487 static int net_dm_hw_stats_put(struct sk_buff *msg)
1488 {
1489     struct net_dm_stats stats;
1490     struct nlattr *attr;
1491 
1492     net_dm_hw_stats_read(&stats);
1493 
1494     attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
1495     if (!attr)
1496         return -EMSGSIZE;
1497 
1498     if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
1499                   u64_stats_read(&stats.dropped), NET_DM_ATTR_PAD))
1500         goto nla_put_failure;
1501 
1502     nla_nest_end(msg, attr);
1503 
1504     return 0;
1505 
1506 nla_put_failure:
1507     nla_nest_cancel(msg, attr);
1508     return -EMSGSIZE;
1509 }
1510 
1511 static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
1512 {
1513     void *hdr;
1514     int rc;
1515 
1516     hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
1517               &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW);
1518     if (!hdr)
1519         return -EMSGSIZE;
1520 
1521     rc = net_dm_stats_put(msg);
1522     if (rc)
1523         goto nla_put_failure;
1524 
1525     rc = net_dm_hw_stats_put(msg);
1526     if (rc)
1527         goto nla_put_failure;
1528 
1529     genlmsg_end(msg, hdr);
1530 
1531     return 0;
1532 
1533 nla_put_failure:
1534     genlmsg_cancel(msg, hdr);
1535     return -EMSGSIZE;
1536 }
1537 
1538 static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info)
1539 {
1540     struct sk_buff *msg;
1541     int rc;
1542 
1543     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1544     if (!msg)
1545         return -ENOMEM;
1546 
1547     rc = net_dm_stats_fill(msg, info);
1548     if (rc)
1549         goto free_msg;
1550 
1551     return genlmsg_reply(msg, info);
1552 
1553 free_msg:
1554     nlmsg_free(msg);
1555     return rc;
1556 }
1557 
1558 static int dropmon_net_event(struct notifier_block *ev_block,
1559                  unsigned long event, void *ptr)
1560 {
1561     struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1562     struct dm_hw_stat_delta *stat;
1563 
1564     switch (event) {
1565     case NETDEV_REGISTER:
1566         if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
1567             break;
1568         stat = kzalloc(sizeof(*stat), GFP_KERNEL);
1569         if (!stat)
1570             break;
1571 
1572         stat->last_rx = jiffies;
1573         rcu_assign_pointer(dev->dm_private, stat);
1574 
1575         break;
1576     case NETDEV_UNREGISTER:
1577         stat = rtnl_dereference(dev->dm_private);
1578         if (stat) {
1579             rcu_assign_pointer(dev->dm_private, NULL);
1580             kfree_rcu(stat, rcu);
1581         }
1582         break;
1583     }
1584     return NOTIFY_DONE;
1585 }
1586 
1587 static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
1588     [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
1589     [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
1590     [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
1591     [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
1592     [NET_DM_ATTR_SW_DROPS]  = {. type = NLA_FLAG },
1593     [NET_DM_ATTR_HW_DROPS]  = {. type = NLA_FLAG },
1594 };
1595 
1596 static const struct genl_small_ops dropmon_ops[] = {
1597     {
1598         .cmd = NET_DM_CMD_CONFIG,
1599         .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1600         .doit = net_dm_cmd_config,
1601         .flags = GENL_ADMIN_PERM,
1602     },
1603     {
1604         .cmd = NET_DM_CMD_START,
1605         .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1606         .doit = net_dm_cmd_trace,
1607     },
1608     {
1609         .cmd = NET_DM_CMD_STOP,
1610         .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1611         .doit = net_dm_cmd_trace,
1612     },
1613     {
1614         .cmd = NET_DM_CMD_CONFIG_GET,
1615         .doit = net_dm_cmd_config_get,
1616     },
1617     {
1618         .cmd = NET_DM_CMD_STATS_GET,
1619         .doit = net_dm_cmd_stats_get,
1620     },
1621 };
1622 
1623 static int net_dm_nl_pre_doit(const struct genl_ops *ops,
1624                   struct sk_buff *skb, struct genl_info *info)
1625 {
1626     mutex_lock(&net_dm_mutex);
1627 
1628     return 0;
1629 }
1630 
1631 static void net_dm_nl_post_doit(const struct genl_ops *ops,
1632                 struct sk_buff *skb, struct genl_info *info)
1633 {
1634     mutex_unlock(&net_dm_mutex);
1635 }
1636 
1637 static struct genl_family net_drop_monitor_family __ro_after_init = {
1638     .hdrsize        = 0,
1639     .name           = "NET_DM",
1640     .version        = 2,
1641     .maxattr    = NET_DM_ATTR_MAX,
1642     .policy     = net_dm_nl_policy,
1643     .pre_doit   = net_dm_nl_pre_doit,
1644     .post_doit  = net_dm_nl_post_doit,
1645     .module     = THIS_MODULE,
1646     .small_ops  = dropmon_ops,
1647     .n_small_ops    = ARRAY_SIZE(dropmon_ops),
1648     .mcgrps     = dropmon_mcgrps,
1649     .n_mcgrps   = ARRAY_SIZE(dropmon_mcgrps),
1650 };
1651 
1652 static struct notifier_block dropmon_net_notifier = {
1653     .notifier_call = dropmon_net_event
1654 };
1655 
1656 static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data)
1657 {
1658     spin_lock_init(&data->lock);
1659     skb_queue_head_init(&data->drop_queue);
1660     u64_stats_init(&data->stats.syncp);
1661 }
1662 
1663 static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data)
1664 {
1665     WARN_ON(!skb_queue_empty(&data->drop_queue));
1666 }
1667 
1668 static void net_dm_cpu_data_init(int cpu)
1669 {
1670     struct per_cpu_dm_data *data;
1671 
1672     data = &per_cpu(dm_cpu_data, cpu);
1673     __net_dm_cpu_data_init(data);
1674 }
1675 
1676 static void net_dm_cpu_data_fini(int cpu)
1677 {
1678     struct per_cpu_dm_data *data;
1679 
1680     data = &per_cpu(dm_cpu_data, cpu);
1681     /* At this point, we should have exclusive access
1682      * to this struct and can free the skb inside it.
1683      */
1684     consume_skb(data->skb);
1685     __net_dm_cpu_data_fini(data);
1686 }
1687 
1688 static void net_dm_hw_cpu_data_init(int cpu)
1689 {
1690     struct per_cpu_dm_data *hw_data;
1691 
1692     hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1693     __net_dm_cpu_data_init(hw_data);
1694 }
1695 
1696 static void net_dm_hw_cpu_data_fini(int cpu)
1697 {
1698     struct per_cpu_dm_data *hw_data;
1699 
1700     hw_data = &per_cpu(dm_hw_cpu_data, cpu);
1701     kfree(hw_data->hw_entries);
1702     __net_dm_cpu_data_fini(hw_data);
1703 }
1704 
1705 static int __init init_net_drop_monitor(void)
1706 {
1707     int cpu, rc;
1708 
1709     pr_info("Initializing network drop monitor service\n");
1710 
1711     if (sizeof(void *) > 8) {
1712         pr_err("Unable to store program counters on this arch, Drop monitor failed\n");
1713         return -ENOSPC;
1714     }
1715 
1716     rc = genl_register_family(&net_drop_monitor_family);
1717     if (rc) {
1718         pr_err("Could not create drop monitor netlink family\n");
1719         return rc;
1720     }
1721     WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT);
1722 
1723     rc = register_netdevice_notifier(&dropmon_net_notifier);
1724     if (rc < 0) {
1725         pr_crit("Failed to register netdevice notifier\n");
1726         goto out_unreg;
1727     }
1728 
1729     rc = 0;
1730 
1731     for_each_possible_cpu(cpu) {
1732         net_dm_cpu_data_init(cpu);
1733         net_dm_hw_cpu_data_init(cpu);
1734     }
1735 
1736     goto out;
1737 
1738 out_unreg:
1739     genl_unregister_family(&net_drop_monitor_family);
1740 out:
1741     return rc;
1742 }
1743 
1744 static void exit_net_drop_monitor(void)
1745 {
1746     int cpu;
1747 
1748     BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier));
1749 
1750     /*
1751      * Because of the module_get/put we do in the trace state change path
1752      * we are guaranteed not to have any current users when we get here
1753      */
1754 
1755     for_each_possible_cpu(cpu) {
1756         net_dm_hw_cpu_data_fini(cpu);
1757         net_dm_cpu_data_fini(cpu);
1758     }
1759 
1760     BUG_ON(genl_unregister_family(&net_drop_monitor_family));
1761 }
1762 
1763 module_init(init_net_drop_monitor);
1764 module_exit(exit_net_drop_monitor);
1765 
1766 MODULE_LICENSE("GPL v2");
1767 MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>");
1768 MODULE_ALIAS_GENL_FAMILY("NET_DM");
1769 MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts");