Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
0002 /* Copyright (c) 2019 Mellanox Technologies. */
0003 
0004 #ifndef DIM_H
0005 #define DIM_H
0006 
0007 #include <linux/bits.h>
0008 #include <linux/kernel.h>
0009 #include <linux/module.h>
0010 #include <linux/types.h>
0011 #include <linux/workqueue.h>
0012 
0013 /*
0014  * Number of events between DIM iterations.
0015  * Causes a moderation of the algorithm run.
0016  */
0017 #define DIM_NEVENTS 64
0018 
0019 /*
0020  * Is a difference between values justifies taking an action.
0021  * We consider 10% difference as significant.
0022  */
0023 #define IS_SIGNIFICANT_DIFF(val, ref) \
0024     ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10))
0025 
0026 /*
0027  * Calculate the gap between two values.
0028  * Take wrap-around and variable size into consideration.
0029  */
0030 #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \
0031         & (BIT_ULL(bits) - 1))
0032 
0033 /**
0034  * struct dim_cq_moder - Structure for CQ moderation values.
0035  * Used for communications between DIM and its consumer.
0036  *
0037  * @usec: CQ timer suggestion (by DIM)
0038  * @pkts: CQ packet counter suggestion (by DIM)
0039  * @comps: Completion counter
0040  * @cq_period_mode: CQ period count mode (from CQE/EQE)
0041  */
0042 struct dim_cq_moder {
0043     u16 usec;
0044     u16 pkts;
0045     u16 comps;
0046     u8 cq_period_mode;
0047 };
0048 
0049 /**
0050  * struct dim_sample - Structure for DIM sample data.
0051  * Used for communications between DIM and its consumer.
0052  *
0053  * @time: Sample timestamp
0054  * @pkt_ctr: Number of packets
0055  * @byte_ctr: Number of bytes
0056  * @event_ctr: Number of events
0057  * @comp_ctr: Current completion counter
0058  */
0059 struct dim_sample {
0060     ktime_t time;
0061     u32 pkt_ctr;
0062     u32 byte_ctr;
0063     u16 event_ctr;
0064     u32 comp_ctr;
0065 };
0066 
0067 /**
0068  * struct dim_stats - Structure for DIM stats.
0069  * Used for holding current measured rates.
0070  *
0071  * @ppms: Packets per msec
0072  * @bpms: Bytes per msec
0073  * @epms: Events per msec
0074  * @cpms: Completions per msec
0075  * @cpe_ratio: Ratio of completions to events
0076  */
0077 struct dim_stats {
0078     int ppms; /* packets per msec */
0079     int bpms; /* bytes per msec */
0080     int epms; /* events per msec */
0081     int cpms; /* completions per msec */
0082     int cpe_ratio; /* ratio of completions to events */
0083 };
0084 
0085 /**
0086  * struct dim - Main structure for dynamic interrupt moderation (DIM).
0087  * Used for holding all information about a specific DIM instance.
0088  *
0089  * @state: Algorithm state (see below)
0090  * @prev_stats: Measured rates from previous iteration (for comparison)
0091  * @start_sample: Sampled data at start of current iteration
0092  * @measuring_sample: A &dim_sample that is used to update the current events
0093  * @work: Work to perform on action required
0094  * @priv: A pointer to the struct that points to dim
0095  * @profile_ix: Current moderation profile
0096  * @mode: CQ period count mode
0097  * @tune_state: Algorithm tuning state (see below)
0098  * @steps_right: Number of steps taken towards higher moderation
0099  * @steps_left: Number of steps taken towards lower moderation
0100  * @tired: Parking depth counter
0101  */
0102 struct dim {
0103     u8 state;
0104     struct dim_stats prev_stats;
0105     struct dim_sample start_sample;
0106     struct dim_sample measuring_sample;
0107     struct work_struct work;
0108     void *priv;
0109     u8 profile_ix;
0110     u8 mode;
0111     u8 tune_state;
0112     u8 steps_right;
0113     u8 steps_left;
0114     u8 tired;
0115 };
0116 
0117 /**
0118  * enum dim_cq_period_mode - Modes for CQ period count
0119  *
0120  * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE
0121  * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset)
0122  * @DIM_CQ_PERIOD_NUM_MODES: Number of modes
0123  */
0124 enum dim_cq_period_mode {
0125     DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
0126     DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
0127     DIM_CQ_PERIOD_NUM_MODES
0128 };
0129 
0130 /**
0131  * enum dim_state - DIM algorithm states
0132  *
0133  * These will determine if the algorithm is in a valid state to start an iteration.
0134  *
0135  * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile)
0136  * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if
0137  * need to perform an action
0138  * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure
0139  */
0140 enum dim_state {
0141     DIM_START_MEASURE,
0142     DIM_MEASURE_IN_PROGRESS,
0143     DIM_APPLY_NEW_PROFILE,
0144 };
0145 
0146 /**
0147  * enum dim_tune_state - DIM algorithm tune states
0148  *
0149  * These will determine which action the algorithm should perform.
0150  *
0151  * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference
0152  * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0
0153  * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels
0154  * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels
0155  */
0156 enum dim_tune_state {
0157     DIM_PARKING_ON_TOP,
0158     DIM_PARKING_TIRED,
0159     DIM_GOING_RIGHT,
0160     DIM_GOING_LEFT,
0161 };
0162 
0163 /**
0164  * enum dim_stats_state - DIM algorithm statistics states
0165  *
0166  * These will determine the verdict of current iteration.
0167  *
0168  * @DIM_STATS_WORSE: Current iteration shows worse performance than before
0169  * @DIM_STATS_SAME:  Current iteration shows same performance than before
0170  * @DIM_STATS_BETTER: Current iteration shows better performance than before
0171  */
0172 enum dim_stats_state {
0173     DIM_STATS_WORSE,
0174     DIM_STATS_SAME,
0175     DIM_STATS_BETTER,
0176 };
0177 
0178 /**
0179  * enum dim_step_result - DIM algorithm step results
0180  *
0181  * These describe the result of a step.
0182  *
0183  * @DIM_STEPPED: Performed a regular step
0184  * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to
0185  * tired parking
0186  * @DIM_ON_EDGE: Stepped to the most left/right profile
0187  */
0188 enum dim_step_result {
0189     DIM_STEPPED,
0190     DIM_TOO_TIRED,
0191     DIM_ON_EDGE,
0192 };
0193 
0194 /**
0195  *  dim_on_top - check if current state is a good place to stop (top location)
0196  *  @dim: DIM context
0197  *
0198  * Check if current profile is a good place to park at.
0199  * This will result in reducing the DIM checks frequency as we assume we
0200  * shouldn't probably change profiles, unless traffic pattern wasn't changed.
0201  */
0202 bool dim_on_top(struct dim *dim);
0203 
0204 /**
0205  *  dim_turn - change profile altering direction
0206  *  @dim: DIM context
0207  *
0208  * Go left if we were going right and vice-versa.
0209  * Do nothing if currently parking.
0210  */
0211 void dim_turn(struct dim *dim);
0212 
0213 /**
0214  *  dim_park_on_top - enter a parking state on a top location
0215  *  @dim: DIM context
0216  *
0217  * Enter parking state.
0218  * Clear all movement history.
0219  */
0220 void dim_park_on_top(struct dim *dim);
0221 
0222 /**
0223  *  dim_park_tired - enter a tired parking state
0224  *  @dim: DIM context
0225  *
0226  * Enter parking state.
0227  * Clear all movement history and cause DIM checks frequency to reduce.
0228  */
0229 void dim_park_tired(struct dim *dim);
0230 
0231 /**
0232  *  dim_calc_stats - calculate the difference between two samples
0233  *  @start: start sample
0234  *  @end: end sample
0235  *  @curr_stats: delta between samples
0236  *
0237  * Calculate the delta between two samples (in data rates).
0238  * Takes into consideration counter wrap-around.
0239  */
0240 void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
0241             struct dim_stats *curr_stats);
0242 
0243 /**
0244  *  dim_update_sample - set a sample's fields with given values
0245  *  @event_ctr: number of events to set
0246  *  @packets: number of packets to set
0247  *  @bytes: number of bytes to set
0248  *  @s: DIM sample
0249  */
0250 static inline void
0251 dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s)
0252 {
0253     s->time      = ktime_get();
0254     s->pkt_ctr   = packets;
0255     s->byte_ctr  = bytes;
0256     s->event_ctr = event_ctr;
0257 }
0258 
0259 /**
0260  *  dim_update_sample_with_comps - set a sample's fields with given
0261  *  values including the completion parameter
0262  *  @event_ctr: number of events to set
0263  *  @packets: number of packets to set
0264  *  @bytes: number of bytes to set
0265  *  @comps: number of completions to set
0266  *  @s: DIM sample
0267  */
0268 static inline void
0269 dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps,
0270                  struct dim_sample *s)
0271 {
0272     dim_update_sample(event_ctr, packets, bytes, s);
0273     s->comp_ctr = comps;
0274 }
0275 
0276 /* Net DIM */
0277 
0278 /**
0279  *  net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
0280  *  @cq_period_mode: CQ period mode
0281  *  @ix: Profile index
0282  */
0283 struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix);
0284 
0285 /**
0286  *  net_dim_get_def_rx_moderation - provide the default RX moderation
0287  *  @cq_period_mode: CQ period mode
0288  */
0289 struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode);
0290 
0291 /**
0292  *  net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile
0293  *  @cq_period_mode: CQ period mode
0294  *  @ix: Profile index
0295  */
0296 struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix);
0297 
0298 /**
0299  *  net_dim_get_def_tx_moderation - provide the default TX moderation
0300  *  @cq_period_mode: CQ period mode
0301  */
0302 struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
0303 
0304 /**
0305  *  net_dim - main DIM algorithm entry point
0306  *  @dim: DIM instance information
0307  *  @end_sample: Current data measurement
0308  *
0309  * Called by the consumer.
0310  * This is the main logic of the algorithm, where data is processed in order
0311  * to decide on next required action.
0312  */
0313 void net_dim(struct dim *dim, struct dim_sample end_sample);
0314 
0315 /* RDMA DIM */
0316 
0317 /*
0318  * RDMA DIM profile:
0319  * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES.
0320  */
0321 #define RDMA_DIM_PARAMS_NUM_PROFILES 9
0322 #define RDMA_DIM_START_PROFILE 0
0323 
0324 /**
0325  * rdma_dim - Runs the adaptive moderation.
0326  * @dim: The moderation struct.
0327  * @completions: The number of completions collected in this round.
0328  *
0329  * Each call to rdma_dim takes the latest amount of completions that
0330  * have been collected and counts them as a new event.
0331  * Once enough events have been collected the algorithm decides a new
0332  * moderation level.
0333  */
0334 void rdma_dim(struct dim *dim, u64 completions);
0335 
0336 #endif /* DIM_H */