Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* getdelays.c
0003  *
0004  * Utility to get per-pid and per-tgid delay accounting statistics
0005  * Also illustrates usage of the taskstats interface
0006  *
0007  * Copyright (C) Shailabh Nagar, IBM Corp. 2005
0008  * Copyright (C) Balbir Singh, IBM Corp. 2006
0009  * Copyright (c) Jay Lan, SGI. 2006
0010  *
0011  * Compile with
0012  *  gcc -I/usr/src/linux/include getdelays.c -o getdelays
0013  */
0014 
0015 #include <stdio.h>
0016 #include <stdlib.h>
0017 #include <errno.h>
0018 #include <unistd.h>
0019 #include <poll.h>
0020 #include <string.h>
0021 #include <fcntl.h>
0022 #include <sys/types.h>
0023 #include <sys/stat.h>
0024 #include <sys/socket.h>
0025 #include <sys/wait.h>
0026 #include <signal.h>
0027 
0028 #include <linux/genetlink.h>
0029 #include <linux/taskstats.h>
0030 #include <linux/cgroupstats.h>
0031 
0032 /*
0033  * Generic macros for dealing with netlink sockets. Might be duplicated
0034  * elsewhere. It is recommended that commercial grade applications use
0035  * libnl or libnetlink and use the interfaces provided by the library
0036  */
0037 #define GENLMSG_DATA(glh)   ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
0038 #define GENLMSG_PAYLOAD(glh)    (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
0039 #define NLA_DATA(na)        ((void *)((char*)(na) + NLA_HDRLEN))
0040 #define NLA_PAYLOAD(len)    (len - NLA_HDRLEN)
0041 
0042 #define err(code, fmt, arg...)          \
0043     do {                    \
0044         fprintf(stderr, fmt, ##arg);    \
0045         exit(code);         \
0046     } while (0)
0047 
0048 int rcvbufsz;
0049 char name[100];
0050 int dbg;
0051 int print_delays;
0052 int print_io_accounting;
0053 int print_task_context_switch_counts;
0054 
0055 #define PRINTF(fmt, arg...) {           \
0056         if (dbg) {              \
0057         printf(fmt, ##arg);     \
0058         }                   \
0059     }
0060 
0061 /* Maximum size of response requested or message sent */
0062 #define MAX_MSG_SIZE    1024
0063 /* Maximum number of cpus expected to be specified in a cpumask */
0064 #define MAX_CPUS    32
0065 
0066 struct msgtemplate {
0067     struct nlmsghdr n;
0068     struct genlmsghdr g;
0069     char buf[MAX_MSG_SIZE];
0070 };
0071 
0072 char cpumask[100+6*MAX_CPUS];
0073 
0074 static void usage(void)
0075 {
0076     fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
0077             "[-m cpumask] [-t tgid] [-p pid]\n");
0078     fprintf(stderr, "  -d: print delayacct stats\n");
0079     fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
0080     fprintf(stderr, "  -l: listen forever\n");
0081     fprintf(stderr, "  -v: debug on\n");
0082     fprintf(stderr, "  -C: container path\n");
0083 }
0084 
0085 /*
0086  * Create a raw netlink socket and bind
0087  */
0088 static int create_nl_socket(int protocol)
0089 {
0090     int fd;
0091     struct sockaddr_nl local;
0092 
0093     fd = socket(AF_NETLINK, SOCK_RAW, protocol);
0094     if (fd < 0)
0095         return -1;
0096 
0097     if (rcvbufsz)
0098         if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
0099                 &rcvbufsz, sizeof(rcvbufsz)) < 0) {
0100             fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
0101                 rcvbufsz);
0102             goto error;
0103         }
0104 
0105     memset(&local, 0, sizeof(local));
0106     local.nl_family = AF_NETLINK;
0107 
0108     if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
0109         goto error;
0110 
0111     return fd;
0112 error:
0113     close(fd);
0114     return -1;
0115 }
0116 
0117 
0118 static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
0119          __u8 genl_cmd, __u16 nla_type,
0120          void *nla_data, int nla_len)
0121 {
0122     struct nlattr *na;
0123     struct sockaddr_nl nladdr;
0124     int r, buflen;
0125     char *buf;
0126 
0127     struct msgtemplate msg;
0128 
0129     msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
0130     msg.n.nlmsg_type = nlmsg_type;
0131     msg.n.nlmsg_flags = NLM_F_REQUEST;
0132     msg.n.nlmsg_seq = 0;
0133     msg.n.nlmsg_pid = nlmsg_pid;
0134     msg.g.cmd = genl_cmd;
0135     msg.g.version = 0x1;
0136     na = (struct nlattr *) GENLMSG_DATA(&msg);
0137     na->nla_type = nla_type;
0138     na->nla_len = nla_len + NLA_HDRLEN;
0139     memcpy(NLA_DATA(na), nla_data, nla_len);
0140     msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
0141 
0142     buf = (char *) &msg;
0143     buflen = msg.n.nlmsg_len ;
0144     memset(&nladdr, 0, sizeof(nladdr));
0145     nladdr.nl_family = AF_NETLINK;
0146     while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
0147                sizeof(nladdr))) < buflen) {
0148         if (r > 0) {
0149             buf += r;
0150             buflen -= r;
0151         } else if (errno != EAGAIN)
0152             return -1;
0153     }
0154     return 0;
0155 }
0156 
0157 
0158 /*
0159  * Probe the controller in genetlink to find the family id
0160  * for the TASKSTATS family
0161  */
0162 static int get_family_id(int sd)
0163 {
0164     struct {
0165         struct nlmsghdr n;
0166         struct genlmsghdr g;
0167         char buf[256];
0168     } ans;
0169 
0170     int id = 0, rc;
0171     struct nlattr *na;
0172     int rep_len;
0173 
0174     strcpy(name, TASKSTATS_GENL_NAME);
0175     rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
0176             CTRL_ATTR_FAMILY_NAME, (void *)name,
0177             strlen(TASKSTATS_GENL_NAME)+1);
0178     if (rc < 0)
0179         return 0;   /* sendto() failure? */
0180 
0181     rep_len = recv(sd, &ans, sizeof(ans), 0);
0182     if (ans.n.nlmsg_type == NLMSG_ERROR ||
0183         (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
0184         return 0;
0185 
0186     na = (struct nlattr *) GENLMSG_DATA(&ans);
0187     na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
0188     if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
0189         id = *(__u16 *) NLA_DATA(na);
0190     }
0191     return id;
0192 }
0193 
0194 #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
0195 
0196 static void print_delayacct(struct taskstats *t)
0197 {
0198     printf("\n\nCPU   %15s%15s%15s%15s%15s\n"
0199            "      %15llu%15llu%15llu%15llu%15.3fms\n"
0200            "IO    %15s%15s%15s\n"
0201            "      %15llu%15llu%15llums\n"
0202            "SWAP  %15s%15s%15s\n"
0203            "      %15llu%15llu%15llums\n"
0204            "RECLAIM  %12s%15s%15s\n"
0205            "      %15llu%15llu%15llums\n"
0206            "THRASHING%12s%15s%15s\n"
0207            "      %15llu%15llu%15llums\n"
0208            "COMPACT  %12s%15s%15s\n"
0209            "      %15llu%15llu%15llums\n"
0210            "WPCOPY   %12s%15s%15s\n"
0211            "      %15llu%15llu%15llums\n",
0212            "count", "real total", "virtual total",
0213            "delay total", "delay average",
0214            (unsigned long long)t->cpu_count,
0215            (unsigned long long)t->cpu_run_real_total,
0216            (unsigned long long)t->cpu_run_virtual_total,
0217            (unsigned long long)t->cpu_delay_total,
0218            average_ms((double)t->cpu_delay_total, t->cpu_count),
0219            "count", "delay total", "delay average",
0220            (unsigned long long)t->blkio_count,
0221            (unsigned long long)t->blkio_delay_total,
0222            average_ms(t->blkio_delay_total, t->blkio_count),
0223            "count", "delay total", "delay average",
0224            (unsigned long long)t->swapin_count,
0225            (unsigned long long)t->swapin_delay_total,
0226            average_ms(t->swapin_delay_total, t->swapin_count),
0227            "count", "delay total", "delay average",
0228            (unsigned long long)t->freepages_count,
0229            (unsigned long long)t->freepages_delay_total,
0230            average_ms(t->freepages_delay_total, t->freepages_count),
0231            "count", "delay total", "delay average",
0232            (unsigned long long)t->thrashing_count,
0233            (unsigned long long)t->thrashing_delay_total,
0234            average_ms(t->thrashing_delay_total, t->thrashing_count),
0235            "count", "delay total", "delay average",
0236            (unsigned long long)t->compact_count,
0237            (unsigned long long)t->compact_delay_total,
0238            average_ms(t->compact_delay_total, t->compact_count),
0239            "count", "delay total", "delay average",
0240            (unsigned long long)t->wpcopy_count,
0241            (unsigned long long)t->wpcopy_delay_total,
0242            average_ms(t->wpcopy_delay_total, t->wpcopy_count));
0243 }
0244 
0245 static void task_context_switch_counts(struct taskstats *t)
0246 {
0247     printf("\n\nTask   %15s%15s\n"
0248            "       %15llu%15llu\n",
0249            "voluntary", "nonvoluntary",
0250            (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
0251 }
0252 
0253 static void print_cgroupstats(struct cgroupstats *c)
0254 {
0255     printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
0256         "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
0257         (unsigned long long)c->nr_io_wait,
0258         (unsigned long long)c->nr_running,
0259         (unsigned long long)c->nr_stopped,
0260         (unsigned long long)c->nr_uninterruptible);
0261 }
0262 
0263 
0264 static void print_ioacct(struct taskstats *t)
0265 {
0266     printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
0267         t->ac_comm,
0268         (unsigned long long)t->read_bytes,
0269         (unsigned long long)t->write_bytes,
0270         (unsigned long long)t->cancelled_write_bytes);
0271 }
0272 
0273 int main(int argc, char *argv[])
0274 {
0275     int c, rc, rep_len, aggr_len, len2;
0276     int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
0277     __u16 id;
0278     __u32 mypid;
0279 
0280     struct nlattr *na;
0281     int nl_sd = -1;
0282     int len = 0;
0283     pid_t tid = 0;
0284     pid_t rtid = 0;
0285 
0286     int fd = 0;
0287     int write_file = 0;
0288     int maskset = 0;
0289     char *logfile = NULL;
0290     int loop = 0;
0291     int containerset = 0;
0292     char *containerpath = NULL;
0293     int cfd = 0;
0294     int forking = 0;
0295     sigset_t sigset;
0296 
0297     struct msgtemplate msg;
0298 
0299     while (!forking) {
0300         c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:");
0301         if (c < 0)
0302             break;
0303 
0304         switch (c) {
0305         case 'd':
0306             printf("print delayacct stats ON\n");
0307             print_delays = 1;
0308             break;
0309         case 'i':
0310             printf("printing IO accounting\n");
0311             print_io_accounting = 1;
0312             break;
0313         case 'q':
0314             printf("printing task/process context switch rates\n");
0315             print_task_context_switch_counts = 1;
0316             break;
0317         case 'C':
0318             containerset = 1;
0319             containerpath = optarg;
0320             break;
0321         case 'w':
0322             logfile = strdup(optarg);
0323             printf("write to file %s\n", logfile);
0324             write_file = 1;
0325             break;
0326         case 'r':
0327             rcvbufsz = atoi(optarg);
0328             printf("receive buf size %d\n", rcvbufsz);
0329             if (rcvbufsz < 0)
0330                 err(1, "Invalid rcv buf size\n");
0331             break;
0332         case 'm':
0333             strncpy(cpumask, optarg, sizeof(cpumask));
0334             cpumask[sizeof(cpumask) - 1] = '\0';
0335             maskset = 1;
0336             printf("cpumask %s maskset %d\n", cpumask, maskset);
0337             break;
0338         case 't':
0339             tid = atoi(optarg);
0340             if (!tid)
0341                 err(1, "Invalid tgid\n");
0342             cmd_type = TASKSTATS_CMD_ATTR_TGID;
0343             break;
0344         case 'p':
0345             tid = atoi(optarg);
0346             if (!tid)
0347                 err(1, "Invalid pid\n");
0348             cmd_type = TASKSTATS_CMD_ATTR_PID;
0349             break;
0350         case 'c':
0351 
0352             /* Block SIGCHLD for sigwait() later */
0353             if (sigemptyset(&sigset) == -1)
0354                 err(1, "Failed to empty sigset");
0355             if (sigaddset(&sigset, SIGCHLD))
0356                 err(1, "Failed to set sigchld in sigset");
0357             sigprocmask(SIG_BLOCK, &sigset, NULL);
0358 
0359             /* fork/exec a child */
0360             tid = fork();
0361             if (tid < 0)
0362                 err(1, "Fork failed\n");
0363             if (tid == 0)
0364                 if (execvp(argv[optind - 1],
0365                     &argv[optind - 1]) < 0)
0366                     exit(-1);
0367 
0368             /* Set the command type and avoid further processing */
0369             cmd_type = TASKSTATS_CMD_ATTR_PID;
0370             forking = 1;
0371             break;
0372         case 'v':
0373             printf("debug on\n");
0374             dbg = 1;
0375             break;
0376         case 'l':
0377             printf("listen forever\n");
0378             loop = 1;
0379             break;
0380         default:
0381             usage();
0382             exit(-1);
0383         }
0384     }
0385 
0386     if (write_file) {
0387         fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
0388               S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
0389         if (fd == -1) {
0390             perror("Cannot open output file\n");
0391             exit(1);
0392         }
0393     }
0394 
0395     nl_sd = create_nl_socket(NETLINK_GENERIC);
0396     if (nl_sd < 0)
0397         err(1, "error creating Netlink socket\n");
0398 
0399 
0400     mypid = getpid();
0401     id = get_family_id(nl_sd);
0402     if (!id) {
0403         fprintf(stderr, "Error getting family id, errno %d\n", errno);
0404         goto err;
0405     }
0406     PRINTF("family id %d\n", id);
0407 
0408     if (maskset) {
0409         rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
0410                   TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
0411                   &cpumask, strlen(cpumask) + 1);
0412         PRINTF("Sent register cpumask, retval %d\n", rc);
0413         if (rc < 0) {
0414             fprintf(stderr, "error sending register cpumask\n");
0415             goto err;
0416         }
0417     }
0418 
0419     if (tid && containerset) {
0420         fprintf(stderr, "Select either -t or -C, not both\n");
0421         goto err;
0422     }
0423 
0424     /*
0425      * If we forked a child, wait for it to exit. Cannot use waitpid()
0426      * as all the delicious data would be reaped as part of the wait
0427      */
0428     if (tid && forking) {
0429         int sig_received;
0430         sigwait(&sigset, &sig_received);
0431     }
0432 
0433     if (tid) {
0434         rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
0435                   cmd_type, &tid, sizeof(__u32));
0436         PRINTF("Sent pid/tgid, retval %d\n", rc);
0437         if (rc < 0) {
0438             fprintf(stderr, "error sending tid/tgid cmd\n");
0439             goto done;
0440         }
0441     }
0442 
0443     if (containerset) {
0444         cfd = open(containerpath, O_RDONLY);
0445         if (cfd < 0) {
0446             perror("error opening container file");
0447             goto err;
0448         }
0449         rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
0450                   CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
0451         if (rc < 0) {
0452             perror("error sending cgroupstats command");
0453             goto err;
0454         }
0455     }
0456     if (!maskset && !tid && !containerset) {
0457         usage();
0458         goto err;
0459     }
0460 
0461     do {
0462         rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
0463         PRINTF("received %d bytes\n", rep_len);
0464 
0465         if (rep_len < 0) {
0466             fprintf(stderr, "nonfatal reply error: errno %d\n",
0467                 errno);
0468             continue;
0469         }
0470         if (msg.n.nlmsg_type == NLMSG_ERROR ||
0471             !NLMSG_OK((&msg.n), rep_len)) {
0472             struct nlmsgerr *err = NLMSG_DATA(&msg);
0473             fprintf(stderr, "fatal reply error,  errno %d\n",
0474                 err->error);
0475             goto done;
0476         }
0477 
0478         PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
0479                sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
0480 
0481 
0482         rep_len = GENLMSG_PAYLOAD(&msg.n);
0483 
0484         na = (struct nlattr *) GENLMSG_DATA(&msg);
0485         len = 0;
0486         while (len < rep_len) {
0487             len += NLA_ALIGN(na->nla_len);
0488             switch (na->nla_type) {
0489             case TASKSTATS_TYPE_AGGR_TGID:
0490                 /* Fall through */
0491             case TASKSTATS_TYPE_AGGR_PID:
0492                 aggr_len = NLA_PAYLOAD(na->nla_len);
0493                 len2 = 0;
0494                 /* For nested attributes, na follows */
0495                 na = (struct nlattr *) NLA_DATA(na);
0496                 while (len2 < aggr_len) {
0497                     switch (na->nla_type) {
0498                     case TASKSTATS_TYPE_PID:
0499                         rtid = *(int *) NLA_DATA(na);
0500                         if (print_delays)
0501                             printf("PID\t%d\n", rtid);
0502                         break;
0503                     case TASKSTATS_TYPE_TGID:
0504                         rtid = *(int *) NLA_DATA(na);
0505                         if (print_delays)
0506                             printf("TGID\t%d\n", rtid);
0507                         break;
0508                     case TASKSTATS_TYPE_STATS:
0509                         if (print_delays)
0510                             print_delayacct((struct taskstats *) NLA_DATA(na));
0511                         if (print_io_accounting)
0512                             print_ioacct((struct taskstats *) NLA_DATA(na));
0513                         if (print_task_context_switch_counts)
0514                             task_context_switch_counts((struct taskstats *) NLA_DATA(na));
0515                         if (fd) {
0516                             if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
0517                                 err(1,"write error\n");
0518                             }
0519                         }
0520                         if (!loop)
0521                             goto done;
0522                         break;
0523                     case TASKSTATS_TYPE_NULL:
0524                         break;
0525                     default:
0526                         fprintf(stderr, "Unknown nested"
0527                             " nla_type %d\n",
0528                             na->nla_type);
0529                         break;
0530                     }
0531                     len2 += NLA_ALIGN(na->nla_len);
0532                     na = (struct nlattr *)((char *)na +
0533                                    NLA_ALIGN(na->nla_len));
0534                 }
0535                 break;
0536 
0537             case CGROUPSTATS_TYPE_CGROUP_STATS:
0538                 print_cgroupstats(NLA_DATA(na));
0539                 break;
0540             default:
0541                 fprintf(stderr, "Unknown nla_type %d\n",
0542                     na->nla_type);
0543             case TASKSTATS_TYPE_NULL:
0544                 break;
0545             }
0546             na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
0547         }
0548     } while (loop);
0549 done:
0550     if (maskset) {
0551         rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
0552                   TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
0553                   &cpumask, strlen(cpumask) + 1);
0554         printf("Sent deregister mask, retval %d\n", rc);
0555         if (rc < 0)
0556             err(rc, "error sending deregister cpumask\n");
0557     }
0558 err:
0559     close(nl_sd);
0560     if (fd)
0561         close(fd);
0562     if (cfd)
0563         close(cfd);
0564     return 0;
0565 }