Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Licensed to the Apache Software Foundation (ASF) under one or more
0003  * contributor license agreements.  See the NOTICE file distributed with
0004  * this work for additional information regarding copyright ownership.
0005  * The ASF licenses this file to You under the Apache License, Version 2.0
0006  * (the "License"); you may not use this file except in compliance with
0007  * the License.  You may obtain a copy of the License at
0008  *
0009  *    http://www.apache.org/licenses/LICENSE-2.0
0010  *
0011  * Unless required by applicable law or agreed to in writing, software
0012  * distributed under the License is distributed on an "AS IS" BASIS,
0013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0014  * See the License for the specific language governing permissions and
0015  * limitations under the License.
0016  */
0017 
0018 /*
0019  * This file contains the logic to render the RDD DAG visualization in the UI.
0020  *
0021  * This DAG describes the relationships between
0022  *   (1) an RDD and its dependencies,
0023  *   (2) an RDD and its operation scopes, and
0024  *   (3) an RDD's operation scopes and the stage / job hierarchy
0025  *
0026  * An operation scope is a general, named code block that instantiates RDDs
0027  * (e.g. filter, textFile, reduceByKey). An operation scope can be nested inside
0028  * of other scopes if the corresponding RDD operation invokes other such operations
0029  * (for more detail, see o.a.s.rdd.RDDOperationScope).
0030  *
0031  * A stage may include one or more operation scopes if the RDD operations are
0032  * streamlined into one stage (e.g. rdd.map(...).filter(...).flatMap(...)).
0033  * On the flip side, an operation scope may also include one or many stages,
0034  * or even jobs if the RDD operation is higher level than Spark's scheduling
0035  * primitives (e.g. take, any SQL query).
0036  *
0037  * In the visualization, an RDD is expressed as a node, and its dependencies
0038  * as directed edges (from parent to child). Operation scopes, stages, and
0039  * jobs are expressed as clusters that may contain one or many nodes. These
0040  * clusters may be nested inside of each other in the scenarios described
0041  * above.
0042  *
0043  * The visualization is rendered in an SVG contained in "div#dag-viz-graph",
0044  * and its input data is expected to be populated in "div#dag-viz-metadata"
0045  * by Spark's UI code. This is currently used only on the stage page and on
0046  * the job page.
0047  *
0048  * This requires jQuery, d3, and dagre-d3. Note that we use a custom release
0049  * of dagre-d3 (http://github.com/andrewor14/dagre-d3) for some specific
0050  * functionality. For more detail, please track the changes in that project
0051  * since it was forked (commit 101503833a8ce5fe369547f6addf3e71172ce10b).
0052  */
0053 
0054 var VizConstants = {
0055   svgMarginX: 16,
0056   svgMarginY: 16,
0057   stageSep: 40,
0058   graphPrefix: "graph_",
0059   nodePrefix: "node_",
0060   clusterPrefix: "cluster_"
0061 };
0062 
0063 var JobPageVizConstants = {
0064   clusterLabelSize: 12,
0065   stageClusterLabelSize: 14,
0066   rankSep: 40
0067 };
0068 
0069 var StagePageVizConstants = {
0070   clusterLabelSize: 14,
0071   stageClusterLabelSize: 14,
0072   rankSep: 40
0073 };
0074 
0075 /*
0076  * Return "expand-dag-viz-arrow-job" if forJob is true.
0077  * Otherwise, return "expand-dag-viz-arrow-stage".
0078  */
0079 function expandDagVizArrowKey(forJob) {
0080   return forJob ? "expand-dag-viz-arrow-job" : "expand-dag-viz-arrow-stage";
0081 }
0082 
0083 /*
0084  * Show or hide the RDD DAG visualization.
0085  *
0086  * The graph is only rendered the first time this is called.
0087  * This is the narrow interface called from the Scala UI code.
0088  */
0089 function toggleDagViz(forJob) {
0090   var status = window.localStorage.getItem(expandDagVizArrowKey(forJob)) == "true";
0091   status = !status;
0092 
0093   var arrowSelector = ".expand-dag-viz-arrow";
0094   $(arrowSelector).toggleClass('arrow-closed');
0095   $(arrowSelector).toggleClass('arrow-open');
0096   var shouldShow = $(arrowSelector).hasClass("arrow-open");
0097   if (shouldShow) {
0098     var shouldRender = graphContainer().select("*").empty();
0099     if (shouldRender) {
0100       renderDagViz(forJob);
0101     }
0102     graphContainer().style("display", "block");
0103   } else {
0104     // Save the graph for later so we don't have to render it again
0105     graphContainer().style("display", "none");
0106   }
0107 
0108   window.localStorage.setItem(expandDagVizArrowKey(forJob), "" + status);
0109 }
0110 
0111 $(function (){
0112   if ($("#stage-dag-viz").length &&
0113       window.localStorage.getItem(expandDagVizArrowKey(false)) == "true") {
0114     // Set it to false so that the click function can revert it
0115     window.localStorage.setItem(expandDagVizArrowKey(false), "false");
0116     toggleDagViz(false);
0117   } else if ($("#job-dag-viz").length &&
0118       window.localStorage.getItem(expandDagVizArrowKey(true)) == "true") {
0119     // Set it to false so that the click function can revert it
0120     window.localStorage.setItem(expandDagVizArrowKey(true), "false");
0121     toggleDagViz(true);
0122   }
0123 });
0124 
0125 /*
0126  * Render the RDD DAG visualization.
0127  *
0128  * Input DOM hierarchy:
0129  *   div#dag-viz-metadata >
0130  *   div.stage-metadata >
0131  *   div.[dot-file | incoming-edge | outgoing-edge]
0132  *
0133  * Output DOM hierarchy:
0134  *   div#dag-viz-graph >
0135  *   svg >
0136  *   g.cluster_stage_[stageId]
0137  *
0138  * Note that the input metadata is populated by o.a.s.ui.UIUtils.showDagViz.
0139  * Any changes in the input format here must be reflected there.
0140  */
0141 function renderDagViz(forJob) {
0142 
0143   // If there is not a dot file to render, fail fast and report error
0144   var jobOrStage = forJob ? "job" : "stage";
0145   if (metadataContainer().empty() ||
0146       metadataContainer().selectAll("div").empty()) {
0147     var message =
0148       "<b>No visualization information available for this " + jobOrStage + "!</b><br/>" +
0149       "If this is an old " + jobOrStage + ", its visualization metadata may have been " +
0150       "cleaned up over time.<br/> You may consider increasing the value of ";
0151     if (forJob) {
0152       message += "<i>spark.ui.retainedJobs</i> and <i>spark.ui.retainedStages</i>.";
0153     } else {
0154       message += "<i>spark.ui.retainedStages</i>";
0155     }
0156     graphContainer().append("div").attr("id", "empty-dag-viz-message").html(message);
0157     return;
0158   }
0159 
0160   // Render
0161   var svg = graphContainer().append("svg").attr("class", jobOrStage);
0162   if (forJob) {
0163     renderDagVizForJob(svg);
0164   } else {
0165     renderDagVizForStage(svg);
0166   }
0167 
0168   // Find cached RDDs and mark them as such
0169   metadataContainer().selectAll(".cached-rdd").each(function(v) {
0170     var rddId = d3.select(this).text().trim();
0171     var nodeId = VizConstants.nodePrefix + rddId;
0172     svg.selectAll("g." + nodeId).classed("cached", true);
0173   });
0174 
0175   metadataContainer().selectAll(".barrier-rdd").each(function() {
0176     var opId = d3.select(this).text().trim();
0177     var opClusterId = VizConstants.clusterPrefix + opId;
0178     var stageId = $(this).parents(".stage-metadata").attr("stage-id");
0179     var stageClusterId = VizConstants.graphPrefix + stageId;
0180     svg.selectAll("g[id=" + stageClusterId + "] g." + opClusterId).classed("barrier", true)
0181   });
0182 
0183   resizeSvg(svg);
0184   interpretLineBreak(svg);
0185 }
0186 
0187 /* Render the RDD DAG visualization on the stage page. */
0188 function renderDagVizForStage(svgContainer) {
0189   var metadata = metadataContainer().select(".stage-metadata");
0190   var dot = metadata.select(".dot-file").text().trim();
0191   var containerId = VizConstants.graphPrefix + metadata.attr("stage-id");
0192   var container = svgContainer.append("g").attr("id", containerId);
0193   renderDot(dot, container, false);
0194 
0195   // Round corners on rectangles
0196   svgContainer
0197     .selectAll("rect")
0198     .attr("rx", "5")
0199     .attr("ry", "5");
0200 }
0201 
0202 /*
0203  * Render the RDD DAG visualization on the job page.
0204  *
0205  * Due to limitations in dagre-d3, each stage is rendered independently so that
0206  * we have more control on how to position them. Unfortunately, this means we
0207  * cannot rely on dagre-d3 to render edges that cross stages and must render
0208  * these manually on our own.
0209  */
0210 function renderDagVizForJob(svgContainer) {
0211   var crossStageEdges = [];
0212 
0213   // Each div.stage-metadata contains the information needed to generate the graph
0214   // for a stage. This includes the DOT file produced from the appropriate UI listener,
0215   // any incoming and outgoing edges, and any cached RDDs that belong to this stage.
0216   metadataContainer().selectAll(".stage-metadata").each(function(d, i) {
0217     var metadata = d3.select(this);
0218     var dot = metadata.select(".dot-file").text();
0219     var stageId = metadata.attr("stage-id");
0220     var containerId = VizConstants.graphPrefix + stageId;
0221     var isSkipped = metadata.attr("skipped") === "true";
0222     var container;
0223     if (isSkipped) {
0224       container = svgContainer
0225         .append("g")
0226         .attr("id", containerId)
0227         .attr("skipped", "true");
0228     } else {
0229       // Link each graph to the corresponding stage page (TODO: handle stage attempts)
0230       var attemptId = 0;
0231       var stageLink = uiRoot + appBasePath + "/stages/stage/?id=" + stageId + "&attempt=" + attemptId;
0232       container = svgContainer
0233         .append("a")
0234         .attr("xlink:href", stageLink)
0235         .attr("onclick", "window.localStorage.setItem(expandDagVizArrowKey(false), true)")
0236         .append("g")
0237         .attr("id", containerId);
0238     }
0239 
0240     // Now we need to shift the container for this stage so it doesn't overlap with
0241     // existing ones, taking into account the position and width of the last stage's
0242     // container. We do not need to do this for the first stage of this job.
0243     if (i > 0) {
0244       var existingStages = svgContainer.selectAll("g.cluster.stage");
0245       if (!existingStages.empty()) {
0246         var lastStage = d3.select(existingStages[0].pop());
0247         var lastStageWidth = toFloat(lastStage.select("rect").attr("width"));
0248         var lastStagePosition = getAbsolutePosition(lastStage);
0249         var offset = lastStagePosition.x + lastStageWidth + VizConstants.stageSep;
0250         container.attr("transform", "translate(" + offset + ", 0)");
0251       }
0252     }
0253 
0254     // Actually render the stage
0255     renderDot(dot, container, true);
0256 
0257     // Mark elements as skipped if appropriate. Unfortunately we need to mark all
0258     // elements instead of the parent container because of CSS override rules.
0259     if (isSkipped) {
0260       container.selectAll("g").classed("skipped", true);
0261     }
0262 
0263     // Round corners on rectangles
0264     container
0265       .selectAll("rect")
0266       .attr("rx", "4")
0267       .attr("ry", "4");
0268 
0269     // If there are any incoming edges into this graph, keep track of them to render
0270     // them separately later. Note that we cannot draw them now because we need to
0271     // put these edges in a separate container that is on top of all stage graphs.
0272     metadata.selectAll(".incoming-edge").each(function(v) {
0273       var edge = d3.select(this).text().trim().split(","); // e.g. 3,4 => [3, 4]
0274       crossStageEdges.push(edge);
0275     });
0276   });
0277 
0278   addTooltipsForRDDs(svgContainer);
0279   drawCrossStageEdges(crossStageEdges, svgContainer);
0280 }
0281 
0282 /* Render the dot file as an SVG in the given container. */
0283 function renderDot(dot, container, forJob) {
0284   var g = graphlibDot.read(dot);
0285   var renderer = new dagreD3.render();
0286   preprocessGraphLayout(g, forJob);
0287   renderer(container, g);
0288 
0289   // Find the stage cluster and mark it for styling and post-processing
0290   container.selectAll("g.cluster[name^=\"Stage \"]").classed("stage", true);
0291 }
0292 
0293 /* -------------------- *
0294  * | Helper functions | *
0295  * -------------------- */
0296 
0297 // Helper d3 accessors
0298 function graphContainer() { return d3.select("#dag-viz-graph"); }
0299 function metadataContainer() { return d3.select("#dag-viz-metadata"); }
0300 
0301 /*
0302  * Helper function to pre-process the graph layout.
0303  * This step is necessary for certain styles that affect the positioning
0304  * and sizes of graph elements, e.g. padding, font style, shape.
0305  */
0306 function preprocessGraphLayout(g, forJob) {
0307   var nodes = g.nodes();
0308   for (var i = 0; i < nodes.length; i++) {
0309     var isCluster = g.children(nodes[i]).length > 0;
0310     if (!isCluster) {
0311       var node = g.node(nodes[i]);
0312       if (forJob) {
0313         // Do not display RDD name on job page
0314         node.shape = "circle";
0315         node.labelStyle = "font-size: 0px";
0316       } else {
0317         node.labelStyle = "font-size: 12px";
0318       }
0319       node.padding = "5";
0320     }
0321   }
0322   // Curve the edges
0323   var edges = g.edges();
0324   for (var j = 0; j < edges.length; j++) {
0325     var edge = g.edge(edges[j]);
0326     edge.lineInterpolate = "basis";
0327   }
0328   // Adjust vertical separation between nodes
0329   if (forJob) {
0330     g.graph().rankSep = JobPageVizConstants.rankSep;
0331   } else {
0332     g.graph().rankSep = StagePageVizConstants.rankSep;
0333   }
0334 }
0335 
0336 /*
0337  * Helper function to size the SVG appropriately such that all elements are displyed.
0338  * This assumes that all outermost elements are clusters (rectangles).
0339  */
0340 function resizeSvg(svg) {
0341   var allClusters = svg.selectAll("g.cluster rect")[0];
0342   var startX = -VizConstants.svgMarginX +
0343     toFloat(d3.min(allClusters, function(e) {
0344       return getAbsolutePosition(d3.select(e)).x;
0345     }));
0346   var startY = -VizConstants.svgMarginY +
0347     toFloat(d3.min(allClusters, function(e) {
0348       return getAbsolutePosition(d3.select(e)).y;
0349     }));
0350   var endX = VizConstants.svgMarginX +
0351     toFloat(d3.max(allClusters, function(e) {
0352       var t = d3.select(e);
0353       return getAbsolutePosition(t).x + toFloat(t.attr("width"));
0354     }));
0355   var endY = VizConstants.svgMarginY +
0356     toFloat(d3.max(allClusters, function(e) {
0357       var t = d3.select(e);
0358       return getAbsolutePosition(t).y + toFloat(t.attr("height"));
0359     }));
0360   var width = endX - startX;
0361   var height = endY - startY;
0362   svg.attr("viewBox", startX + " " + startY + " " + width + " " + height)
0363      .attr("width", width)
0364      .attr("height", height);
0365 }
0366 
0367 /*
0368  * Helper function to interpret line break for tag 'tspan'.
0369  * For tag 'tspan', line break '/n' is display in UI as raw for both stage page and job page,
0370  * here this function is to enable line break.
0371  */
0372 function interpretLineBreak(svg) {
0373   svg.selectAll("tspan").each(function() {
0374     var node = d3.select(this);
0375     var original = node[0][0].innerHTML;
0376     if (original.indexOf("\\n") != -1) {
0377       var arr = original.split("\\n");
0378       var newNode = this.cloneNode(this);
0379 
0380       node[0][0].innerHTML = arr[0];
0381       newNode.innerHTML = arr[1];
0382 
0383       this.parentNode.appendChild(newNode);
0384     }
0385   });
0386 }
0387 
0388 /*
0389  * (Job page only) Helper function to draw edges that cross stage boundaries.
0390  * We need to do this manually because we render each stage separately in dagre-d3.
0391  */
0392 function drawCrossStageEdges(edges, svgContainer) {
0393   if (edges.length == 0) {
0394     return;
0395   }
0396   // Draw the paths first
0397   var edgesContainer = svgContainer.append("g").attr("id", "cross-stage-edges");
0398   for (var i = 0; i < edges.length; i++) {
0399     var fromRDDId = edges[i][0];
0400     var toRDDId = edges[i][1];
0401     connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer);
0402   }
0403   // Now draw the arrows by borrowing the arrow marker generated by dagre-d3
0404   var dagreD3Marker = svgContainer.select("g.edgePaths marker");
0405   if (!dagreD3Marker.empty()) {
0406     svgContainer
0407       .append(function() { return dagreD3Marker.node().cloneNode(true); })
0408       .attr("id", "marker-arrow");
0409     svgContainer.selectAll("g > path").attr("marker-end", "url(#marker-arrow)");
0410     svgContainer.selectAll("g.edgePaths def").remove(); // We no longer need these
0411   }
0412 }
0413 
0414 /*
0415  * (Job page only) Helper function to compute the absolute
0416  * position of the specified element in our graph.
0417  */
0418 function getAbsolutePosition(d3selection) {
0419   if (d3selection.empty()) {
0420     throw "Attempted to get absolute position of an empty selection.";
0421   }
0422   var obj = d3selection;
0423   var _x = toFloat(obj.attr("x")) || 0;
0424   var _y = toFloat(obj.attr("y")) || 0;
0425   while (!obj.empty()) {
0426     var transformText = obj.attr("transform");
0427     if (transformText) {
0428       var translate = d3.transform(transformText).translate;
0429       _x += toFloat(translate[0]);
0430       _y += toFloat(translate[1]);
0431     }
0432     // Climb upwards to find how our parents are translated
0433     obj = d3.select(obj.node().parentNode);
0434     // Stop when we've reached the graph container itself
0435     if (obj.node() == graphContainer().node()) {
0436       break;
0437     }
0438   }
0439   return { x: _x, y: _y };
0440 }
0441 
0442 /* (Job page only) Helper function to connect two RDDs with a curved edge. */
0443 function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
0444   var fromNodeId = VizConstants.nodePrefix + fromRDDId;
0445   var toNodeId = VizConstants.nodePrefix + toRDDId;
0446   var fromPos = getAbsolutePosition(svgContainer.select("g." + fromNodeId));
0447   var toPos = getAbsolutePosition(svgContainer.select("g." + toNodeId));
0448 
0449   // On the job page, RDDs are rendered as dots (circles). When rendering the path,
0450   // we need to account for the radii of these circles. Otherwise the arrow heads
0451   // will bleed into the circle itself.
0452   var delta = toFloat(svgContainer
0453     .select("g.node." + toNodeId)
0454     .select("circle")
0455     .attr("r"));
0456   if (fromPos.x < toPos.x) {
0457     fromPos.x += delta;
0458     toPos.x -= delta;
0459   } else if (fromPos.x > toPos.x) {
0460     fromPos.x -= delta;
0461     toPos.x += delta;
0462   }
0463 
0464   var points;
0465   if (fromPos.y == toPos.y) {
0466     // If they are on the same rank, curve the middle part of the edge
0467     // upward a little to avoid interference with things in between
0468     // e.g.       _______
0469     //      _____/       \_____
0470     points = [
0471       [fromPos.x, fromPos.y],
0472       [fromPos.x + (toPos.x - fromPos.x) * 0.2, fromPos.y],
0473       [fromPos.x + (toPos.x - fromPos.x) * 0.3, fromPos.y - 20],
0474       [fromPos.x + (toPos.x - fromPos.x) * 0.7, fromPos.y - 20],
0475       [fromPos.x + (toPos.x - fromPos.x) * 0.8, toPos.y],
0476       [toPos.x, toPos.y]
0477     ];
0478   } else {
0479     // Otherwise, draw a curved edge that flattens out on both ends
0480     // e.g.       _____
0481     //           /
0482     //          |
0483     //    _____/
0484     points = [
0485       [fromPos.x, fromPos.y],
0486       [fromPos.x + (toPos.x - fromPos.x) * 0.4, fromPos.y],
0487       [fromPos.x + (toPos.x - fromPos.x) * 0.6, toPos.y],
0488       [toPos.x, toPos.y]
0489     ];
0490   }
0491 
0492   var line = d3.svg.line().interpolate("basis");
0493   edgesContainer.append("path").datum(points).attr("d", line);
0494 }
0495 
0496 /* (Job page only) Helper function to add tooltips for RDDs. */
0497 function addTooltipsForRDDs(svgContainer) {
0498   svgContainer.selectAll("g.node").each(function() {
0499     var node = d3.select(this);
0500     var tooltipText = node.attr("name");
0501     if (tooltipText) {
0502       node.select("circle")
0503         .attr("data-toggle", "tooltip")
0504         .attr("data-placement", "top")
0505         .attr("data-html", "true") // to interpret line break, tooltipText is showing <circle> title
0506         .attr("title", tooltipText);
0507     }
0508     // Link tooltips for all nodes that belong to the same RDD
0509     node.on("mouseenter", function() { triggerTooltipForRDD(node, true); });
0510     node.on("mouseleave", function() { triggerTooltipForRDD(node, false); });
0511   });
0512 
0513   $("[data-toggle=tooltip]")
0514     .filter("g.node circle")
0515     .tooltip({ container: "body", trigger: "manual" });
0516 }
0517 
0518 /*
0519  * (Job page only) Helper function to show or hide tooltips for all nodes
0520  * in the graph that refer to the same RDD the specified node represents.
0521  */
0522 function triggerTooltipForRDD(d3node, show) {
0523   var classes = d3node.node().classList;
0524   for (var i = 0; i < classes.length; i++) {
0525     var clazz = classes[i];
0526     var isRDDClass = clazz.indexOf(VizConstants.nodePrefix) == 0;
0527     if (isRDDClass) {
0528       graphContainer().selectAll("g." + clazz).each(function() {
0529         var circle = d3.select(this).select("circle").node();
0530         var showOrHide = show ? "show" : "hide";
0531         $(circle).tooltip(showOrHide);
0532       });
0533     }
0534   }
0535 }
0536 
0537 /* Helper function to convert attributes to numeric values. */
0538 function toFloat(f) {
0539   if (f) {
0540     return parseFloat(f.toString().replace(/px$/, ""));
0541   } else {
0542     return f;
0543   }
0544 }
0545