Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env perl
0002 # This is a POC (proof of concept or piece of crap, take your pick) for reading the
0003 # text representation of trace output related to page allocation. It makes an attempt
0004 # to extract some high-level information on what is going on. The accuracy of the parser
0005 # may vary considerably
0006 #
0007 # Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
0008 # other options
0009 #   --prepend-parent    Report on the parent proc and PID
0010 #   --read-procstat If the trace lacks process info, get it from /proc
0011 #   --ignore-pid    Aggregate processes of the same name together
0012 #
0013 # Copyright (c) IBM Corporation 2009
0014 # Author: Mel Gorman <mel@csn.ul.ie>
0015 use strict;
0016 use Getopt::Long;
0017 
0018 # Tracepoint events
0019 use constant MM_PAGE_ALLOC      => 1;
0020 use constant MM_PAGE_FREE       => 2;
0021 use constant MM_PAGE_FREE_BATCHED   => 3;
0022 use constant MM_PAGE_PCPU_DRAIN     => 4;
0023 use constant MM_PAGE_ALLOC_ZONE_LOCKED  => 5;
0024 use constant MM_PAGE_ALLOC_EXTFRAG  => 6;
0025 use constant EVENT_UNKNOWN      => 7;
0026 
0027 # Constants used to track state
0028 use constant STATE_PCPU_PAGES_DRAINED   => 8;
0029 use constant STATE_PCPU_PAGES_REFILLED  => 9;
0030 
0031 # High-level events extrapolated from tracepoints
0032 use constant HIGH_PCPU_DRAINS       => 10;
0033 use constant HIGH_PCPU_REFILLS      => 11;
0034 use constant HIGH_EXT_FRAGMENT      => 12;
0035 use constant HIGH_EXT_FRAGMENT_SEVERE   => 13;
0036 use constant HIGH_EXT_FRAGMENT_MODERATE => 14;
0037 use constant HIGH_EXT_FRAGMENT_CHANGED  => 15;
0038 
0039 my %perprocesspid;
0040 my %perprocess;
0041 my $opt_ignorepid;
0042 my $opt_read_procstat;
0043 my $opt_prepend_parent;
0044 
0045 # Catch sigint and exit on request
0046 my $sigint_report = 0;
0047 my $sigint_exit = 0;
0048 my $sigint_pending = 0;
0049 my $sigint_received = 0;
0050 sub sigint_handler {
0051     my $current_time = time;
0052     if ($current_time - 2 > $sigint_received) {
0053         print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
0054         $sigint_report = 1;
0055     } else {
0056         if (!$sigint_exit) {
0057             print "Second SIGINT received quickly, exiting\n";
0058         }
0059         $sigint_exit++;
0060     }
0061 
0062     if ($sigint_exit > 3) {
0063         print "Many SIGINTs received, exiting now without report\n";
0064         exit;
0065     }
0066 
0067     $sigint_received = $current_time;
0068     $sigint_pending = 1;
0069 }
0070 $SIG{INT} = "sigint_handler";
0071 
0072 # Parse command line options
0073 GetOptions(
0074     'ignore-pid'     => \$opt_ignorepid,
0075     'read-procstat'  => \$opt_read_procstat,
0076     'prepend-parent' => \$opt_prepend_parent,
0077 );
0078 
0079 # Defaults for dynamically discovered regex's
0080 my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
0081 
0082 # Dyanically discovered regex
0083 my $regex_fragdetails;
0084 
0085 # Static regex used. Specified like this for readability and for use with /o
0086 #                      (process_pid)     (cpus      )   ( time  )   (tpoint    ) (details)
0087 my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
0088 my $regex_statname = '[-0-9]*\s\((.*)\).*';
0089 my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
0090 
0091 sub generate_traceevent_regex {
0092     my $event = shift;
0093     my $default = shift;
0094     my $regex;
0095 
0096     # Read the event format or use the default
0097     if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
0098         $regex = $default;
0099     } else {
0100         my $line;
0101         while (!eof(FORMAT)) {
0102             $line = <FORMAT>;
0103             if ($line =~ /^print fmt:\s"(.*)",.*/) {
0104                 $regex = $1;
0105                 $regex =~ s/%p/\([0-9a-f]*\)/g;
0106                 $regex =~ s/%d/\([-0-9]*\)/g;
0107                 $regex =~ s/%lu/\([0-9]*\)/g;
0108             }
0109         }
0110     }
0111 
0112     # Verify fields are in the right order
0113     my $tuple;
0114     foreach $tuple (split /\s/, $regex) {
0115         my ($key, $value) = split(/=/, $tuple);
0116         my $expected = shift;
0117         if ($key ne $expected) {
0118             print("WARNING: Format not as expected '$key' != '$expected'");
0119             $regex =~ s/$key=\((.*)\)/$key=$1/;
0120         }
0121     }
0122 
0123     if (defined shift) {
0124         die("Fewer fields than expected in format");
0125     }
0126 
0127     return $regex;
0128 }
0129 $regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
0130             $regex_fragdetails_default,
0131             "page", "pfn",
0132             "alloc_order", "fallback_order", "pageblock_order",
0133             "alloc_migratetype", "fallback_migratetype",
0134             "fragmenting", "change_ownership");
0135 
0136 sub read_statline($) {
0137     my $pid = $_[0];
0138     my $statline;
0139 
0140     if (open(STAT, "/proc/$pid/stat")) {
0141         $statline = <STAT>;
0142         close(STAT);
0143     }
0144 
0145     if ($statline eq '') {
0146         $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
0147     }
0148 
0149     return $statline;
0150 }
0151 
0152 sub guess_process_pid($$) {
0153     my $pid = $_[0];
0154     my $statline = $_[1];
0155 
0156     if ($pid == 0) {
0157         return "swapper-0";
0158     }
0159 
0160     if ($statline !~ /$regex_statname/o) {
0161         die("Failed to math stat line for process name :: $statline");
0162     }
0163     return "$1-$pid";
0164 }
0165 
0166 sub parent_info($$) {
0167     my $pid = $_[0];
0168     my $statline = $_[1];
0169     my $ppid;
0170 
0171     if ($pid == 0) {
0172         return "NOPARENT-0";
0173     }
0174 
0175     if ($statline !~ /$regex_statppid/o) {
0176         die("Failed to match stat line process ppid:: $statline");
0177     }
0178 
0179     # Read the ppid stat line
0180     $ppid = $1;
0181     return guess_process_pid($ppid, read_statline($ppid));
0182 }
0183 
0184 sub process_events {
0185     my $traceevent;
0186     my $process_pid;
0187     my $cpus;
0188     my $timestamp;
0189     my $tracepoint;
0190     my $details;
0191     my $statline;
0192 
0193     # Read each line of the event log
0194 EVENT_PROCESS:
0195     while ($traceevent = <STDIN>) {
0196         if ($traceevent =~ /$regex_traceevent/o) {
0197             $process_pid = $1;
0198             $tracepoint = $4;
0199 
0200             if ($opt_read_procstat || $opt_prepend_parent) {
0201                 $process_pid =~ /(.*)-([0-9]*)$/;
0202                 my $process = $1;
0203                 my $pid = $2;
0204 
0205                 $statline = read_statline($pid);
0206 
0207                 if ($opt_read_procstat && $process eq '') {
0208                     $process_pid = guess_process_pid($pid, $statline);
0209                 }
0210 
0211                 if ($opt_prepend_parent) {
0212                     $process_pid = parent_info($pid, $statline) . " :: $process_pid";
0213                 }
0214             }
0215 
0216             # Unnecessary in this script. Uncomment if required
0217             # $cpus = $2;
0218             # $timestamp = $3;
0219         } else {
0220             next;
0221         }
0222 
0223         # Perl Switch() sucks majorly
0224         if ($tracepoint eq "mm_page_alloc") {
0225             $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
0226         } elsif ($tracepoint eq "mm_page_free") {
0227             $perprocesspid{$process_pid}->{MM_PAGE_FREE}++
0228         } elsif ($tracepoint eq "mm_page_free_batched") {
0229             $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
0230         } elsif ($tracepoint eq "mm_page_pcpu_drain") {
0231             $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
0232             $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
0233         } elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
0234             $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
0235             $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
0236         } elsif ($tracepoint eq "mm_page_alloc_extfrag") {
0237 
0238             # Extract the details of the event now
0239             $details = $5;
0240 
0241             my ($page, $pfn);
0242             my ($alloc_order, $fallback_order, $pageblock_order);
0243             my ($alloc_migratetype, $fallback_migratetype);
0244             my ($fragmenting, $change_ownership);
0245 
0246             if ($details !~ /$regex_fragdetails/o) {
0247                 print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
0248                 next;
0249             }
0250 
0251             $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
0252             $page = $1;
0253             $pfn = $2;
0254             $alloc_order = $3;
0255             $fallback_order = $4;
0256             $pageblock_order = $5;
0257             $alloc_migratetype = $6;
0258             $fallback_migratetype = $7;
0259             $fragmenting = $8;
0260             $change_ownership = $9;
0261 
0262             if ($fragmenting) {
0263                 $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
0264                 if ($fallback_order <= 3) {
0265                     $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
0266                 } else {
0267                     $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
0268                 }
0269             }
0270             if ($change_ownership) {
0271                 $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
0272             }
0273         } else {
0274             $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
0275         }
0276 
0277         # Catch a full pcpu drain event
0278         if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
0279                 $tracepoint ne "mm_page_pcpu_drain") {
0280 
0281             $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
0282             $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
0283         }
0284 
0285         # Catch a full pcpu refill event
0286         if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
0287                 $tracepoint ne "mm_page_alloc_zone_locked") {
0288             $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
0289             $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
0290         }
0291 
0292         if ($sigint_pending) {
0293             last EVENT_PROCESS;
0294         }
0295     }
0296 }
0297 
0298 sub dump_stats {
0299     my $hashref = shift;
0300     my %stats = %$hashref;
0301 
0302     # Dump per-process stats
0303     my $process_pid;
0304     my $max_strlen = 0;
0305 
0306     # Get the maximum process name
0307     foreach $process_pid (keys %perprocesspid) {
0308         my $len = length($process_pid);
0309         if ($len > $max_strlen) {
0310             $max_strlen = $len;
0311         }
0312     }
0313     $max_strlen += 2;
0314 
0315     printf("\n");
0316     printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
0317         "Process", "Pages",  "Pages",      "Pages", "Pages", "PCPU",  "PCPU",   "PCPU",    "Fragment",  "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
0318     printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
0319         "details", "allocd", "allocd",     "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing",   "Changed", "Severe", "Moderate", "");
0320 
0321     printf("%-" . $max_strlen . "s %8s %10s   %8s %8s   %8s %8s %8s   %8s %8s %8s %8s %8s %8s\n",
0322         "",        "",       "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
0323 
0324     foreach $process_pid (keys %stats) {
0325         # Dump final aggregates
0326         if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
0327             $stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
0328             $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
0329         }
0330         if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
0331             $stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
0332             $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
0333         }
0334 
0335         printf("%-" . $max_strlen . "s %8d %10d   %8d %8d   %8d %8d %8d   %8d %8d %8d %8d %8d %8d\n",
0336             $process_pid,
0337             $stats{$process_pid}->{MM_PAGE_ALLOC},
0338             $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
0339             $stats{$process_pid}->{MM_PAGE_FREE},
0340             $stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
0341             $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
0342             $stats{$process_pid}->{HIGH_PCPU_DRAINS},
0343             $stats{$process_pid}->{HIGH_PCPU_REFILLS},
0344             $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
0345             $stats{$process_pid}->{HIGH_EXT_FRAG},
0346             $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
0347             $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
0348             $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
0349             $stats{$process_pid}->{EVENT_UNKNOWN});
0350     }
0351 }
0352 
0353 sub aggregate_perprocesspid() {
0354     my $process_pid;
0355     my $process;
0356     undef %perprocess;
0357 
0358     foreach $process_pid (keys %perprocesspid) {
0359         $process = $process_pid;
0360         $process =~ s/-([0-9])*$//;
0361         if ($process eq '') {
0362             $process = "NO_PROCESS_NAME";
0363         }
0364 
0365         $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
0366         $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
0367         $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
0368         $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
0369         $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
0370         $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
0371         $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
0372         $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
0373         $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
0374         $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
0375         $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
0376         $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
0377         $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
0378     }
0379 }
0380 
0381 sub report() {
0382     if (!$opt_ignorepid) {
0383         dump_stats(\%perprocesspid);
0384     } else {
0385         aggregate_perprocesspid();
0386         dump_stats(\%perprocess);
0387     }
0388 }
0389 
0390 # Process events or signals until neither is available
0391 sub signal_loop() {
0392     my $sigint_processed;
0393     do {
0394         $sigint_processed = 0;
0395         process_events();
0396 
0397         # Handle pending signals if any
0398         if ($sigint_pending) {
0399             my $current_time = time;
0400 
0401             if ($sigint_exit) {
0402                 print "Received exit signal\n";
0403                 $sigint_pending = 0;
0404             }
0405             if ($sigint_report) {
0406                 if ($current_time >= $sigint_received + 2) {
0407                     report();
0408                     $sigint_report = 0;
0409                     $sigint_pending = 0;
0410                     $sigint_processed = 1;
0411                 }
0412             }
0413         }
0414     } while ($sigint_pending || $sigint_processed);
0415 }
0416 
0417 signal_loop();
0418 report();