Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
0002 /*
0003  * Copyright(c) 2015 - 2018 Intel Corporation.
0004  */
0005 
0006 #ifndef _HFI1_SDMA_H
0007 #define _HFI1_SDMA_H
0008 
0009 #include <linux/types.h>
0010 #include <linux/list.h>
0011 #include <asm/byteorder.h>
0012 #include <linux/workqueue.h>
0013 #include <linux/rculist.h>
0014 
0015 #include "hfi.h"
0016 #include "verbs.h"
0017 #include "sdma_txreq.h"
0018 
0019 /* Hardware limit */
0020 #define MAX_DESC 64
0021 /* Hardware limit for SDMA packet size */
0022 #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
0023 
0024 #define SDMA_MAP_NONE          0
0025 #define SDMA_MAP_SINGLE        1
0026 #define SDMA_MAP_PAGE          2
0027 
0028 #define SDMA_AHG_VALUE_MASK          0xffff
0029 #define SDMA_AHG_VALUE_SHIFT         0
0030 #define SDMA_AHG_INDEX_MASK          0xf
0031 #define SDMA_AHG_INDEX_SHIFT         16
0032 #define SDMA_AHG_FIELD_LEN_MASK      0xf
0033 #define SDMA_AHG_FIELD_LEN_SHIFT     20
0034 #define SDMA_AHG_FIELD_START_MASK    0x1f
0035 #define SDMA_AHG_FIELD_START_SHIFT   24
0036 #define SDMA_AHG_UPDATE_ENABLE_MASK  0x1
0037 #define SDMA_AHG_UPDATE_ENABLE_SHIFT 31
0038 
0039 /* AHG modes */
0040 
0041 /*
0042  * Be aware the ordering and values
0043  * for SDMA_AHG_APPLY_UPDATE[123]
0044  * are assumed in generating a skip
0045  * count in submit_tx() in sdma.c
0046  */
0047 #define SDMA_AHG_NO_AHG              0
0048 #define SDMA_AHG_COPY                1
0049 #define SDMA_AHG_APPLY_UPDATE1       2
0050 #define SDMA_AHG_APPLY_UPDATE2       3
0051 #define SDMA_AHG_APPLY_UPDATE3       4
0052 
0053 /*
0054  * Bits defined in the send DMA descriptor.
0055  */
0056 #define SDMA_DESC0_FIRST_DESC_FLAG      BIT_ULL(63)
0057 #define SDMA_DESC0_LAST_DESC_FLAG       BIT_ULL(62)
0058 #define SDMA_DESC0_BYTE_COUNT_SHIFT     48
0059 #define SDMA_DESC0_BYTE_COUNT_WIDTH     14
0060 #define SDMA_DESC0_BYTE_COUNT_MASK \
0061     ((1ULL << SDMA_DESC0_BYTE_COUNT_WIDTH) - 1)
0062 #define SDMA_DESC0_BYTE_COUNT_SMASK \
0063     (SDMA_DESC0_BYTE_COUNT_MASK << SDMA_DESC0_BYTE_COUNT_SHIFT)
0064 #define SDMA_DESC0_PHY_ADDR_SHIFT       0
0065 #define SDMA_DESC0_PHY_ADDR_WIDTH       48
0066 #define SDMA_DESC0_PHY_ADDR_MASK \
0067     ((1ULL << SDMA_DESC0_PHY_ADDR_WIDTH) - 1)
0068 #define SDMA_DESC0_PHY_ADDR_SMASK \
0069     (SDMA_DESC0_PHY_ADDR_MASK << SDMA_DESC0_PHY_ADDR_SHIFT)
0070 
0071 #define SDMA_DESC1_HEADER_UPDATE1_SHIFT 32
0072 #define SDMA_DESC1_HEADER_UPDATE1_WIDTH 32
0073 #define SDMA_DESC1_HEADER_UPDATE1_MASK \
0074     ((1ULL << SDMA_DESC1_HEADER_UPDATE1_WIDTH) - 1)
0075 #define SDMA_DESC1_HEADER_UPDATE1_SMASK \
0076     (SDMA_DESC1_HEADER_UPDATE1_MASK << SDMA_DESC1_HEADER_UPDATE1_SHIFT)
0077 #define SDMA_DESC1_HEADER_MODE_SHIFT    13
0078 #define SDMA_DESC1_HEADER_MODE_WIDTH    3
0079 #define SDMA_DESC1_HEADER_MODE_MASK \
0080     ((1ULL << SDMA_DESC1_HEADER_MODE_WIDTH) - 1)
0081 #define SDMA_DESC1_HEADER_MODE_SMASK \
0082     (SDMA_DESC1_HEADER_MODE_MASK << SDMA_DESC1_HEADER_MODE_SHIFT)
0083 #define SDMA_DESC1_HEADER_INDEX_SHIFT   8
0084 #define SDMA_DESC1_HEADER_INDEX_WIDTH   5
0085 #define SDMA_DESC1_HEADER_INDEX_MASK \
0086     ((1ULL << SDMA_DESC1_HEADER_INDEX_WIDTH) - 1)
0087 #define SDMA_DESC1_HEADER_INDEX_SMASK \
0088     (SDMA_DESC1_HEADER_INDEX_MASK << SDMA_DESC1_HEADER_INDEX_SHIFT)
0089 #define SDMA_DESC1_HEADER_DWS_SHIFT     4
0090 #define SDMA_DESC1_HEADER_DWS_WIDTH     4
0091 #define SDMA_DESC1_HEADER_DWS_MASK \
0092     ((1ULL << SDMA_DESC1_HEADER_DWS_WIDTH) - 1)
0093 #define SDMA_DESC1_HEADER_DWS_SMASK \
0094     (SDMA_DESC1_HEADER_DWS_MASK << SDMA_DESC1_HEADER_DWS_SHIFT)
0095 #define SDMA_DESC1_GENERATION_SHIFT     2
0096 #define SDMA_DESC1_GENERATION_WIDTH     2
0097 #define SDMA_DESC1_GENERATION_MASK \
0098     ((1ULL << SDMA_DESC1_GENERATION_WIDTH) - 1)
0099 #define SDMA_DESC1_GENERATION_SMASK \
0100     (SDMA_DESC1_GENERATION_MASK << SDMA_DESC1_GENERATION_SHIFT)
0101 #define SDMA_DESC1_INT_REQ_FLAG         BIT_ULL(1)
0102 #define SDMA_DESC1_HEAD_TO_HOST_FLAG    BIT_ULL(0)
0103 
0104 enum sdma_states {
0105     sdma_state_s00_hw_down,
0106     sdma_state_s10_hw_start_up_halt_wait,
0107     sdma_state_s15_hw_start_up_clean_wait,
0108     sdma_state_s20_idle,
0109     sdma_state_s30_sw_clean_up_wait,
0110     sdma_state_s40_hw_clean_up_wait,
0111     sdma_state_s50_hw_halt_wait,
0112     sdma_state_s60_idle_halt_wait,
0113     sdma_state_s80_hw_freeze,
0114     sdma_state_s82_freeze_sw_clean,
0115     sdma_state_s99_running,
0116 };
0117 
0118 enum sdma_events {
0119     sdma_event_e00_go_hw_down,
0120     sdma_event_e10_go_hw_start,
0121     sdma_event_e15_hw_halt_done,
0122     sdma_event_e25_hw_clean_up_done,
0123     sdma_event_e30_go_running,
0124     sdma_event_e40_sw_cleaned,
0125     sdma_event_e50_hw_cleaned,
0126     sdma_event_e60_hw_halted,
0127     sdma_event_e70_go_idle,
0128     sdma_event_e80_hw_freeze,
0129     sdma_event_e81_hw_frozen,
0130     sdma_event_e82_hw_unfreeze,
0131     sdma_event_e85_link_down,
0132     sdma_event_e90_sw_halted,
0133 };
0134 
0135 struct sdma_set_state_action {
0136     unsigned op_enable:1;
0137     unsigned op_intenable:1;
0138     unsigned op_halt:1;
0139     unsigned op_cleanup:1;
0140     unsigned go_s99_running_tofalse:1;
0141     unsigned go_s99_running_totrue:1;
0142 };
0143 
0144 struct sdma_state {
0145     struct kref          kref;
0146     struct completion    comp;
0147     enum sdma_states current_state;
0148     unsigned             current_op;
0149     unsigned             go_s99_running;
0150     /* debugging/development */
0151     enum sdma_states previous_state;
0152     unsigned             previous_op;
0153     enum sdma_events last_event;
0154 };
0155 
0156 /**
0157  * DOC: sdma exported routines
0158  *
0159  * These sdma routines fit into three categories:
0160  * - The SDMA API for building and submitting packets
0161  *   to the ring
0162  *
0163  * - Initialization and tear down routines to buildup
0164  *   and tear down SDMA
0165  *
0166  * - ISR entrances to handle interrupts, state changes
0167  *   and errors
0168  */
0169 
0170 /**
0171  * DOC: sdma PSM/verbs API
0172  *
0173  * The sdma API is designed to be used by both PSM
0174  * and verbs to supply packets to the SDMA ring.
0175  *
0176  * The usage of the API is as follows:
0177  *
0178  * Embed a struct iowait in the QP or
0179  * PQ.  The iowait should be initialized with a
0180  * call to iowait_init().
0181  *
0182  * The user of the API should create an allocation method
0183  * for their version of the txreq. slabs, pre-allocated lists,
0184  * and dma pools can be used.  Once the user's overload of
0185  * the sdma_txreq has been allocated, the sdma_txreq member
0186  * must be initialized with sdma_txinit() or sdma_txinit_ahg().
0187  *
0188  * The txreq must be declared with the sdma_txreq first.
0189  *
0190  * The tx request, once initialized,  is manipulated with calls to
0191  * sdma_txadd_daddr(), sdma_txadd_page(), or sdma_txadd_kvaddr()
0192  * for each disjoint memory location.  It is the user's responsibility
0193  * to understand the packet boundaries and page boundaries to do the
0194  * appropriate number of sdma_txadd_* calls..  The user
0195  * must be prepared to deal with failures from these routines due to
0196  * either memory allocation or dma_mapping failures.
0197  *
0198  * The mapping specifics for each memory location are recorded
0199  * in the tx. Memory locations added with sdma_txadd_page()
0200  * and sdma_txadd_kvaddr() are automatically mapped when added
0201  * to the tx and nmapped as part of the progress processing in the
0202  * SDMA interrupt handling.
0203  *
0204  * sdma_txadd_daddr() is used to add an dma_addr_t memory to the
0205  * tx.   An example of a use case would be a pre-allocated
0206  * set of headers allocated via dma_pool_alloc() or
0207  * dma_alloc_coherent().  For these memory locations, it
0208  * is the responsibility of the user to handle that unmapping.
0209  * (This would usually be at an unload or job termination.)
0210  *
0211  * The routine sdma_send_txreq() is used to submit
0212  * a tx to the ring after the appropriate number of
0213  * sdma_txadd_* have been done.
0214  *
0215  * If it is desired to send a burst of sdma_txreqs, sdma_send_txlist()
0216  * can be used to submit a list of packets.
0217  *
0218  * The user is free to use the link overhead in the struct sdma_txreq as
0219  * long as the tx isn't in flight.
0220  *
0221  * The extreme degenerate case of the number of descriptors
0222  * exceeding the ring size is automatically handled as
0223  * memory locations are added.  An overflow of the descriptor
0224  * array that is part of the sdma_txreq is also automatically
0225  * handled.
0226  *
0227  */
0228 
0229 /**
0230  * DOC: Infrastructure calls
0231  *
0232  * sdma_init() is used to initialize data structures and
0233  * CSRs for the desired number of SDMA engines.
0234  *
0235  * sdma_start() is used to kick the SDMA engines initialized
0236  * with sdma_init().   Interrupts must be enabled at this
0237  * point since aspects of the state machine are interrupt
0238  * driven.
0239  *
0240  * sdma_engine_error() and sdma_engine_interrupt() are
0241  * entrances for interrupts.
0242  *
0243  * sdma_map_init() is for the management of the mapping
0244  * table when the number of vls is changed.
0245  *
0246  */
0247 
0248 /*
0249  * struct hw_sdma_desc - raw 128 bit SDMA descriptor
0250  *
0251  * This is the raw descriptor in the SDMA ring
0252  */
0253 struct hw_sdma_desc {
0254     /* private:  don't use directly */
0255     __le64 qw[2];
0256 };
0257 
0258 /**
0259  * struct sdma_engine - Data pertaining to each SDMA engine.
0260  * @dd: a back-pointer to the device data
0261  * @ppd: per port back-pointer
0262  * @imask: mask for irq manipulation
0263  * @idle_mask: mask for determining if an interrupt is due to sdma_idle
0264  *
0265  * This structure has the state for each sdma_engine.
0266  *
0267  * Accessing to non public fields are not supported
0268  * since the private members are subject to change.
0269  */
0270 struct sdma_engine {
0271     /* read mostly */
0272     struct hfi1_devdata *dd;
0273     struct hfi1_pportdata *ppd;
0274     /* private: */
0275     void __iomem *tail_csr;
0276     u64 imask;          /* clear interrupt mask */
0277     u64 idle_mask;
0278     u64 progress_mask;
0279     u64 int_mask;
0280     /* private: */
0281     volatile __le64      *head_dma; /* DMA'ed by chip */
0282     /* private: */
0283     dma_addr_t            head_phys;
0284     /* private: */
0285     struct hw_sdma_desc *descq;
0286     /* private: */
0287     unsigned descq_full_count;
0288     struct sdma_txreq **tx_ring;
0289     /* private: */
0290     dma_addr_t            descq_phys;
0291     /* private */
0292     u32 sdma_mask;
0293     /* private */
0294     struct sdma_state state;
0295     /* private */
0296     int cpu;
0297     /* private: */
0298     u8 sdma_shift;
0299     /* private: */
0300     u8 this_idx; /* zero relative engine */
0301     /* protect changes to senddmactrl shadow */
0302     spinlock_t senddmactrl_lock;
0303     /* private: */
0304     u64 p_senddmactrl;      /* shadow per-engine SendDmaCtrl */
0305 
0306     /* read/write using tail_lock */
0307     spinlock_t            tail_lock ____cacheline_aligned_in_smp;
0308 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
0309     /* private: */
0310     u64                   tail_sn;
0311 #endif
0312     /* private: */
0313     u32                   descq_tail;
0314     /* private: */
0315     unsigned long         ahg_bits;
0316     /* private: */
0317     u16                   desc_avail;
0318     /* private: */
0319     u16                   tx_tail;
0320     /* private: */
0321     u16 descq_cnt;
0322 
0323     /* read/write using head_lock */
0324     /* private: */
0325     seqlock_t            head_lock ____cacheline_aligned_in_smp;
0326 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
0327     /* private: */
0328     u64                   head_sn;
0329 #endif
0330     /* private: */
0331     u32                   descq_head;
0332     /* private: */
0333     u16                   tx_head;
0334     /* private: */
0335     u64                   last_status;
0336     /* private */
0337     u64                     err_cnt;
0338     /* private */
0339     u64                     sdma_int_cnt;
0340     u64                     idle_int_cnt;
0341     u64                     progress_int_cnt;
0342 
0343     /* private: */
0344     seqlock_t            waitlock;
0345     struct list_head      dmawait;
0346 
0347     /* CONFIG SDMA for now, just blindly duplicate */
0348     /* private: */
0349     struct tasklet_struct sdma_hw_clean_up_task
0350         ____cacheline_aligned_in_smp;
0351 
0352     /* private: */
0353     struct tasklet_struct sdma_sw_clean_up_task
0354         ____cacheline_aligned_in_smp;
0355     /* private: */
0356     struct work_struct err_halt_worker;
0357     /* private */
0358     struct timer_list     err_progress_check_timer;
0359     u32                   progress_check_head;
0360     /* private: */
0361     struct work_struct flush_worker;
0362     /* protect flush list */
0363     spinlock_t flushlist_lock;
0364     /* private: */
0365     struct list_head flushlist;
0366     struct cpumask cpu_mask;
0367     struct kobject kobj;
0368     u32 msix_intr;
0369 };
0370 
0371 int sdma_init(struct hfi1_devdata *dd, u8 port);
0372 void sdma_start(struct hfi1_devdata *dd);
0373 void sdma_exit(struct hfi1_devdata *dd);
0374 void sdma_clean(struct hfi1_devdata *dd, size_t num_engines);
0375 void sdma_all_running(struct hfi1_devdata *dd);
0376 void sdma_all_idle(struct hfi1_devdata *dd);
0377 void sdma_freeze_notify(struct hfi1_devdata *dd, int go_idle);
0378 void sdma_freeze(struct hfi1_devdata *dd);
0379 void sdma_unfreeze(struct hfi1_devdata *dd);
0380 void sdma_wait(struct hfi1_devdata *dd);
0381 
0382 /**
0383  * sdma_empty() - idle engine test
0384  * @engine: sdma engine
0385  *
0386  * Currently used by verbs as a latency optimization.
0387  *
0388  * Return:
0389  * 1 - empty, 0 - non-empty
0390  */
0391 static inline int sdma_empty(struct sdma_engine *sde)
0392 {
0393     return sde->descq_tail == sde->descq_head;
0394 }
0395 
0396 static inline u16 sdma_descq_freecnt(struct sdma_engine *sde)
0397 {
0398     return sde->descq_cnt -
0399         (sde->descq_tail -
0400          READ_ONCE(sde->descq_head)) - 1;
0401 }
0402 
0403 static inline u16 sdma_descq_inprocess(struct sdma_engine *sde)
0404 {
0405     return sde->descq_cnt - sdma_descq_freecnt(sde);
0406 }
0407 
0408 /*
0409  * Either head_lock or tail lock required to see
0410  * a steady state.
0411  */
0412 static inline int __sdma_running(struct sdma_engine *engine)
0413 {
0414     return engine->state.current_state == sdma_state_s99_running;
0415 }
0416 
0417 /**
0418  * sdma_running() - state suitability test
0419  * @engine: sdma engine
0420  *
0421  * sdma_running probes the internal state to determine if it is suitable
0422  * for submitting packets.
0423  *
0424  * Return:
0425  * 1 - ok to submit, 0 - not ok to submit
0426  *
0427  */
0428 static inline int sdma_running(struct sdma_engine *engine)
0429 {
0430     unsigned long flags;
0431     int ret;
0432 
0433     spin_lock_irqsave(&engine->tail_lock, flags);
0434     ret = __sdma_running(engine);
0435     spin_unlock_irqrestore(&engine->tail_lock, flags);
0436     return ret;
0437 }
0438 
0439 void _sdma_txreq_ahgadd(
0440     struct sdma_txreq *tx,
0441     u8 num_ahg,
0442     u8 ahg_entry,
0443     u32 *ahg,
0444     u8 ahg_hlen);
0445 
0446 /**
0447  * sdma_txinit_ahg() - initialize an sdma_txreq struct with AHG
0448  * @tx: tx request to initialize
0449  * @flags: flags to key last descriptor additions
0450  * @tlen: total packet length (pbc + headers + data)
0451  * @ahg_entry: ahg entry to use  (0 - 31)
0452  * @num_ahg: ahg descriptor for first descriptor (0 - 9)
0453  * @ahg: array of AHG descriptors (up to 9 entries)
0454  * @ahg_hlen: number of bytes from ASIC entry to use
0455  * @cb: callback
0456  *
0457  * The allocation of the sdma_txreq and it enclosing structure is user
0458  * dependent.  This routine must be called to initialize the user independent
0459  * fields.
0460  *
0461  * The currently supported flags are SDMA_TXREQ_F_URGENT,
0462  * SDMA_TXREQ_F_AHG_COPY, and SDMA_TXREQ_F_USE_AHG.
0463  *
0464  * SDMA_TXREQ_F_URGENT is used for latency sensitive situations where the
0465  * completion is desired as soon as possible.
0466  *
0467  * SDMA_TXREQ_F_AHG_COPY causes the header in the first descriptor to be
0468  * copied to chip entry. SDMA_TXREQ_F_USE_AHG causes the code to add in
0469  * the AHG descriptors into the first 1 to 3 descriptors.
0470  *
0471  * Completions of submitted requests can be gotten on selected
0472  * txreqs by giving a completion routine callback to sdma_txinit() or
0473  * sdma_txinit_ahg().  The environment in which the callback runs
0474  * can be from an ISR, a tasklet, or a thread, so no sleeping
0475  * kernel routines can be used.   Aspects of the sdma ring may
0476  * be locked so care should be taken with locking.
0477  *
0478  * The callback pointer can be NULL to avoid any callback for the packet
0479  * being submitted. The callback will be provided this tx, a status, and a flag.
0480  *
0481  * The status will be one of SDMA_TXREQ_S_OK, SDMA_TXREQ_S_SENDERROR,
0482  * SDMA_TXREQ_S_ABORTED, or SDMA_TXREQ_S_SHUTDOWN.
0483  *
0484  * The flag, if the is the iowait had been used, indicates the iowait
0485  * sdma_busy count has reached zero.
0486  *
0487  * user data portion of tlen should be precise.   The sdma_txadd_* entrances
0488  * will pad with a descriptor references 1 - 3 bytes when the number of bytes
0489  * specified in tlen have been supplied to the sdma_txreq.
0490  *
0491  * ahg_hlen is used to determine the number of on-chip entry bytes to
0492  * use as the header.   This is for cases where the stored header is
0493  * larger than the header to be used in a packet.  This is typical
0494  * for verbs where an RDMA_WRITE_FIRST is larger than the packet in
0495  * and RDMA_WRITE_MIDDLE.
0496  *
0497  */
0498 static inline int sdma_txinit_ahg(
0499     struct sdma_txreq *tx,
0500     u16 flags,
0501     u16 tlen,
0502     u8 ahg_entry,
0503     u8 num_ahg,
0504     u32 *ahg,
0505     u8 ahg_hlen,
0506     void (*cb)(struct sdma_txreq *, int))
0507 {
0508     if (tlen == 0)
0509         return -ENODATA;
0510     if (tlen > MAX_SDMA_PKT_SIZE)
0511         return -EMSGSIZE;
0512     tx->desc_limit = ARRAY_SIZE(tx->descs);
0513     tx->descp = &tx->descs[0];
0514     INIT_LIST_HEAD(&tx->list);
0515     tx->num_desc = 0;
0516     tx->flags = flags;
0517     tx->complete = cb;
0518     tx->coalesce_buf = NULL;
0519     tx->wait = NULL;
0520     tx->packet_len = tlen;
0521     tx->tlen = tx->packet_len;
0522     tx->descs[0].qw[0] = SDMA_DESC0_FIRST_DESC_FLAG;
0523     tx->descs[0].qw[1] = 0;
0524     if (flags & SDMA_TXREQ_F_AHG_COPY)
0525         tx->descs[0].qw[1] |=
0526             (((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
0527                 << SDMA_DESC1_HEADER_INDEX_SHIFT) |
0528             (((u64)SDMA_AHG_COPY & SDMA_DESC1_HEADER_MODE_MASK)
0529                 << SDMA_DESC1_HEADER_MODE_SHIFT);
0530     else if (flags & SDMA_TXREQ_F_USE_AHG && num_ahg)
0531         _sdma_txreq_ahgadd(tx, num_ahg, ahg_entry, ahg, ahg_hlen);
0532     return 0;
0533 }
0534 
0535 /**
0536  * sdma_txinit() - initialize an sdma_txreq struct (no AHG)
0537  * @tx: tx request to initialize
0538  * @flags: flags to key last descriptor additions
0539  * @tlen: total packet length (pbc + headers + data)
0540  * @cb: callback pointer
0541  *
0542  * The allocation of the sdma_txreq and it enclosing structure is user
0543  * dependent.  This routine must be called to initialize the user
0544  * independent fields.
0545  *
0546  * The currently supported flags is SDMA_TXREQ_F_URGENT.
0547  *
0548  * SDMA_TXREQ_F_URGENT is used for latency sensitive situations where the
0549  * completion is desired as soon as possible.
0550  *
0551  * Completions of submitted requests can be gotten on selected
0552  * txreqs by giving a completion routine callback to sdma_txinit() or
0553  * sdma_txinit_ahg().  The environment in which the callback runs
0554  * can be from an ISR, a tasklet, or a thread, so no sleeping
0555  * kernel routines can be used.   The head size of the sdma ring may
0556  * be locked so care should be taken with locking.
0557  *
0558  * The callback pointer can be NULL to avoid any callback for the packet
0559  * being submitted.
0560  *
0561  * The callback, if non-NULL,  will be provided this tx and a status.  The
0562  * status will be one of SDMA_TXREQ_S_OK, SDMA_TXREQ_S_SENDERROR,
0563  * SDMA_TXREQ_S_ABORTED, or SDMA_TXREQ_S_SHUTDOWN.
0564  *
0565  */
0566 static inline int sdma_txinit(
0567     struct sdma_txreq *tx,
0568     u16 flags,
0569     u16 tlen,
0570     void (*cb)(struct sdma_txreq *, int))
0571 {
0572     return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb);
0573 }
0574 
0575 /* helpers - don't use */
0576 static inline int sdma_mapping_type(struct sdma_desc *d)
0577 {
0578     return (d->qw[1] & SDMA_DESC1_GENERATION_SMASK)
0579         >> SDMA_DESC1_GENERATION_SHIFT;
0580 }
0581 
0582 static inline size_t sdma_mapping_len(struct sdma_desc *d)
0583 {
0584     return (d->qw[0] & SDMA_DESC0_BYTE_COUNT_SMASK)
0585         >> SDMA_DESC0_BYTE_COUNT_SHIFT;
0586 }
0587 
0588 static inline dma_addr_t sdma_mapping_addr(struct sdma_desc *d)
0589 {
0590     return (d->qw[0] & SDMA_DESC0_PHY_ADDR_SMASK)
0591         >> SDMA_DESC0_PHY_ADDR_SHIFT;
0592 }
0593 
0594 static inline void make_tx_sdma_desc(
0595     struct sdma_txreq *tx,
0596     int type,
0597     dma_addr_t addr,
0598     size_t len)
0599 {
0600     struct sdma_desc *desc = &tx->descp[tx->num_desc];
0601 
0602     if (!tx->num_desc) {
0603         /* qw[0] zero; qw[1] first, ahg mode already in from init */
0604         desc->qw[1] |= ((u64)type & SDMA_DESC1_GENERATION_MASK)
0605                 << SDMA_DESC1_GENERATION_SHIFT;
0606     } else {
0607         desc->qw[0] = 0;
0608         desc->qw[1] = ((u64)type & SDMA_DESC1_GENERATION_MASK)
0609                 << SDMA_DESC1_GENERATION_SHIFT;
0610     }
0611     desc->qw[0] |= (((u64)addr & SDMA_DESC0_PHY_ADDR_MASK)
0612                 << SDMA_DESC0_PHY_ADDR_SHIFT) |
0613             (((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
0614                 << SDMA_DESC0_BYTE_COUNT_SHIFT);
0615 }
0616 
0617 /* helper to extend txreq */
0618 int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
0619                int type, void *kvaddr, struct page *page,
0620                unsigned long offset, u16 len);
0621 int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *);
0622 void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
0623 
0624 static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
0625 {
0626     if (tx->num_desc)
0627         __sdma_txclean(dd, tx);
0628 }
0629 
0630 /* helpers used by public routines */
0631 static inline void _sdma_close_tx(struct hfi1_devdata *dd,
0632                   struct sdma_txreq *tx)
0633 {
0634     tx->descp[tx->num_desc].qw[0] |=
0635         SDMA_DESC0_LAST_DESC_FLAG;
0636     tx->descp[tx->num_desc].qw[1] |=
0637         dd->default_desc1;
0638     if (tx->flags & SDMA_TXREQ_F_URGENT)
0639         tx->descp[tx->num_desc].qw[1] |=
0640             (SDMA_DESC1_HEAD_TO_HOST_FLAG |
0641              SDMA_DESC1_INT_REQ_FLAG);
0642 }
0643 
0644 static inline int _sdma_txadd_daddr(
0645     struct hfi1_devdata *dd,
0646     int type,
0647     struct sdma_txreq *tx,
0648     dma_addr_t addr,
0649     u16 len)
0650 {
0651     int rval = 0;
0652 
0653     make_tx_sdma_desc(
0654         tx,
0655         type,
0656         addr, len);
0657     WARN_ON(len > tx->tlen);
0658     tx->tlen -= len;
0659     /* special cases for last */
0660     if (!tx->tlen) {
0661         if (tx->packet_len & (sizeof(u32) - 1)) {
0662             rval = _pad_sdma_tx_descs(dd, tx);
0663             if (rval)
0664                 return rval;
0665         } else {
0666             _sdma_close_tx(dd, tx);
0667         }
0668     }
0669     tx->num_desc++;
0670     return rval;
0671 }
0672 
0673 /**
0674  * sdma_txadd_page() - add a page to the sdma_txreq
0675  * @dd: the device to use for mapping
0676  * @tx: tx request to which the page is added
0677  * @page: page to map
0678  * @offset: offset within the page
0679  * @len: length in bytes
0680  *
0681  * This is used to add a page/offset/length descriptor.
0682  *
0683  * The mapping/unmapping of the page/offset/len is automatically handled.
0684  *
0685  * Return:
0686  * 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't
0687  * extend/coalesce descriptor array
0688  */
0689 static inline int sdma_txadd_page(
0690     struct hfi1_devdata *dd,
0691     struct sdma_txreq *tx,
0692     struct page *page,
0693     unsigned long offset,
0694     u16 len)
0695 {
0696     dma_addr_t addr;
0697     int rval;
0698 
0699     if ((unlikely(tx->num_desc == tx->desc_limit))) {
0700         rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_PAGE,
0701                           NULL, page, offset, len);
0702         if (rval <= 0)
0703             return rval;
0704     }
0705 
0706     addr = dma_map_page(
0707                &dd->pcidev->dev,
0708                page,
0709                offset,
0710                len,
0711                DMA_TO_DEVICE);
0712 
0713     if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
0714         __sdma_txclean(dd, tx);
0715         return -ENOSPC;
0716     }
0717 
0718     return _sdma_txadd_daddr(
0719             dd, SDMA_MAP_PAGE, tx, addr, len);
0720 }
0721 
0722 /**
0723  * sdma_txadd_daddr() - add a dma address to the sdma_txreq
0724  * @dd: the device to use for mapping
0725  * @tx: sdma_txreq to which the page is added
0726  * @addr: dma address mapped by caller
0727  * @len: length in bytes
0728  *
0729  * This is used to add a descriptor for memory that is already dma mapped.
0730  *
0731  * In this case, there is no unmapping as part of the progress processing for
0732  * this memory location.
0733  *
0734  * Return:
0735  * 0 - success, -ENOMEM - couldn't extend descriptor array
0736  */
0737 
0738 static inline int sdma_txadd_daddr(
0739     struct hfi1_devdata *dd,
0740     struct sdma_txreq *tx,
0741     dma_addr_t addr,
0742     u16 len)
0743 {
0744     int rval;
0745 
0746     if ((unlikely(tx->num_desc == tx->desc_limit))) {
0747         rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_NONE,
0748                           NULL, NULL, 0, 0);
0749         if (rval <= 0)
0750             return rval;
0751     }
0752 
0753     return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len);
0754 }
0755 
0756 /**
0757  * sdma_txadd_kvaddr() - add a kernel virtual address to sdma_txreq
0758  * @dd: the device to use for mapping
0759  * @tx: sdma_txreq to which the page is added
0760  * @kvaddr: the kernel virtual address
0761  * @len: length in bytes
0762  *
0763  * This is used to add a descriptor referenced by the indicated kvaddr and
0764  * len.
0765  *
0766  * The mapping/unmapping of the kvaddr and len is automatically handled.
0767  *
0768  * Return:
0769  * 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't extend/coalesce
0770  * descriptor array
0771  */
0772 static inline int sdma_txadd_kvaddr(
0773     struct hfi1_devdata *dd,
0774     struct sdma_txreq *tx,
0775     void *kvaddr,
0776     u16 len)
0777 {
0778     dma_addr_t addr;
0779     int rval;
0780 
0781     if ((unlikely(tx->num_desc == tx->desc_limit))) {
0782         rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_SINGLE,
0783                           kvaddr, NULL, 0, len);
0784         if (rval <= 0)
0785             return rval;
0786     }
0787 
0788     addr = dma_map_single(
0789                &dd->pcidev->dev,
0790                kvaddr,
0791                len,
0792                DMA_TO_DEVICE);
0793 
0794     if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
0795         __sdma_txclean(dd, tx);
0796         return -ENOSPC;
0797     }
0798 
0799     return _sdma_txadd_daddr(
0800             dd, SDMA_MAP_SINGLE, tx, addr, len);
0801 }
0802 
0803 struct iowait_work;
0804 
0805 int sdma_send_txreq(struct sdma_engine *sde,
0806             struct iowait_work *wait,
0807             struct sdma_txreq *tx,
0808             bool pkts_sent);
0809 int sdma_send_txlist(struct sdma_engine *sde,
0810              struct iowait_work *wait,
0811              struct list_head *tx_list,
0812              u16 *count_out);
0813 
0814 int sdma_ahg_alloc(struct sdma_engine *sde);
0815 void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
0816 
0817 /**
0818  * sdma_build_ahg - build ahg descriptor
0819  * @data
0820  * @dwindex
0821  * @startbit
0822  * @bits
0823  *
0824  * Build and return a 32 bit descriptor.
0825  */
0826 static inline u32 sdma_build_ahg_descriptor(
0827     u16 data,
0828     u8 dwindex,
0829     u8 startbit,
0830     u8 bits)
0831 {
0832     return (u32)(1UL << SDMA_AHG_UPDATE_ENABLE_SHIFT |
0833         ((startbit & SDMA_AHG_FIELD_START_MASK) <<
0834         SDMA_AHG_FIELD_START_SHIFT) |
0835         ((bits & SDMA_AHG_FIELD_LEN_MASK) <<
0836         SDMA_AHG_FIELD_LEN_SHIFT) |
0837         ((dwindex & SDMA_AHG_INDEX_MASK) <<
0838         SDMA_AHG_INDEX_SHIFT) |
0839         ((data & SDMA_AHG_VALUE_MASK) <<
0840         SDMA_AHG_VALUE_SHIFT));
0841 }
0842 
0843 /**
0844  * sdma_progress - use seq number of detect head progress
0845  * @sde: sdma_engine to check
0846  * @seq: base seq count
0847  * @tx: txreq for which we need to check descriptor availability
0848  *
0849  * This is used in the appropriate spot in the sleep routine
0850  * to check for potential ring progress.  This routine gets the
0851  * seqcount before queuing the iowait structure for progress.
0852  *
0853  * If the seqcount indicates that progress needs to be checked,
0854  * re-submission is detected by checking whether the descriptor
0855  * queue has enough descriptor for the txreq.
0856  */
0857 static inline unsigned sdma_progress(struct sdma_engine *sde, unsigned seq,
0858                      struct sdma_txreq *tx)
0859 {
0860     if (read_seqretry(&sde->head_lock, seq)) {
0861         sde->desc_avail = sdma_descq_freecnt(sde);
0862         if (tx->num_desc > sde->desc_avail)
0863             return 0;
0864         return 1;
0865     }
0866     return 0;
0867 }
0868 
0869 /* for use by interrupt handling */
0870 void sdma_engine_error(struct sdma_engine *sde, u64 status);
0871 void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
0872 
0873 /*
0874  *
0875  * The diagram below details the relationship of the mapping structures
0876  *
0877  * Since the mapping now allows for non-uniform engines per vl, the
0878  * number of engines for a vl is either the vl_engines[vl] or
0879  * a computation based on num_sdma/num_vls:
0880  *
0881  * For example:
0882  * nactual = vl_engines ? vl_engines[vl] : num_sdma/num_vls
0883  *
0884  * n = roundup to next highest power of 2 using nactual
0885  *
0886  * In the case where there are num_sdma/num_vls doesn't divide
0887  * evenly, the extras are added from the last vl downward.
0888  *
0889  * For the case where n > nactual, the engines are assigned
0890  * in a round robin fashion wrapping back to the first engine
0891  * for a particular vl.
0892  *
0893  *               dd->sdma_map
0894  *                    |                                   sdma_map_elem[0]
0895  *                    |                                +--------------------+
0896  *                    v                                |       mask         |
0897  *               sdma_vl_map                           |--------------------|
0898  *      +--------------------------+                   | sde[0] -> eng 1    |
0899  *      |    list (RCU)            |                   |--------------------|
0900  *      |--------------------------|                 ->| sde[1] -> eng 2    |
0901  *      |    mask                  |              --/  |--------------------|
0902  *      |--------------------------|            -/     |        *           |
0903  *      |    actual_vls (max 8)    |          -/       |--------------------|
0904  *      |--------------------------|       --/         | sde[n-1] -> eng n  |
0905  *      |    vls (max 8)           |     -/            +--------------------+
0906  *      |--------------------------|  --/
0907  *      |    map[0]                |-/
0908  *      |--------------------------|                   +---------------------+
0909  *      |    map[1]                |---                |       mask          |
0910  *      |--------------------------|   \----           |---------------------|
0911  *      |           *              |        \--        | sde[0] -> eng 1+n   |
0912  *      |           *              |           \----   |---------------------|
0913  *      |           *              |                \->| sde[1] -> eng 2+n   |
0914  *      |--------------------------|                   |---------------------|
0915  *      |   map[vls - 1]           |-                  |         *           |
0916  *      +--------------------------+ \-                |---------------------|
0917  *                                     \-              | sde[m-1] -> eng m+n |
0918  *                                       \             +---------------------+
0919  *                                        \-
0920  *                                          \
0921  *                                           \-        +----------------------+
0922  *                                             \-      |       mask           |
0923  *                                               \     |----------------------|
0924  *                                                \-   | sde[0] -> eng 1+m+n  |
0925  *                                                  \- |----------------------|
0926  *                                                    >| sde[1] -> eng 2+m+n  |
0927  *                                                     |----------------------|
0928  *                                                     |         *            |
0929  *                                                     |----------------------|
0930  *                                                     | sde[o-1] -> eng o+m+n|
0931  *                                                     +----------------------+
0932  *
0933  */
0934 
0935 /**
0936  * struct sdma_map_elem - mapping for a vl
0937  * @mask - selector mask
0938  * @sde - array of engines for this vl
0939  *
0940  * The mask is used to "mod" the selector
0941  * to produce index into the trailing
0942  * array of sdes.
0943  */
0944 struct sdma_map_elem {
0945     u32 mask;
0946     struct sdma_engine *sde[];
0947 };
0948 
0949 /**
0950  * struct sdma_map_el - mapping for a vl
0951  * @engine_to_vl - map of an engine to a vl
0952  * @list - rcu head for free callback
0953  * @mask - vl mask to "mod" the vl to produce an index to map array
0954  * @actual_vls - number of vls
0955  * @vls - number of vls rounded to next power of 2
0956  * @map - array of sdma_map_elem entries
0957  *
0958  * This is the parent mapping structure.  The trailing
0959  * members of the struct point to sdma_map_elem entries, which
0960  * in turn point to an array of sde's for that vl.
0961  */
0962 struct sdma_vl_map {
0963     s8 engine_to_vl[TXE_NUM_SDMA_ENGINES];
0964     struct rcu_head list;
0965     u32 mask;
0966     u8 actual_vls;
0967     u8 vls;
0968     struct sdma_map_elem *map[];
0969 };
0970 
0971 int sdma_map_init(
0972     struct hfi1_devdata *dd,
0973     u8 port,
0974     u8 num_vls,
0975     u8 *vl_engines);
0976 
0977 /* slow path */
0978 void _sdma_engine_progress_schedule(struct sdma_engine *sde);
0979 
0980 /**
0981  * sdma_engine_progress_schedule() - schedule progress on engine
0982  * @sde: sdma_engine to schedule progress
0983  *
0984  * This is the fast path.
0985  *
0986  */
0987 static inline void sdma_engine_progress_schedule(
0988     struct sdma_engine *sde)
0989 {
0990     if (!sde || sdma_descq_inprocess(sde) < (sde->descq_cnt / 8))
0991         return;
0992     _sdma_engine_progress_schedule(sde);
0993 }
0994 
0995 struct sdma_engine *sdma_select_engine_sc(
0996     struct hfi1_devdata *dd,
0997     u32 selector,
0998     u8 sc5);
0999 
1000 struct sdma_engine *sdma_select_engine_vl(
1001     struct hfi1_devdata *dd,
1002     u32 selector,
1003     u8 vl);
1004 
1005 struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
1006                         u32 selector, u8 vl);
1007 ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
1008 ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
1009                 size_t count);
1010 int sdma_engine_get_vl(struct sdma_engine *sde);
1011 void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
1012 void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd,
1013                 unsigned long cpuid);
1014 
1015 #ifdef CONFIG_SDMA_VERBOSITY
1016 void sdma_dumpstate(struct sdma_engine *);
1017 #endif
1018 static inline char *slashstrip(char *s)
1019 {
1020     char *r = s;
1021 
1022     while (*s)
1023         if (*s++ == '/')
1024             r = s;
1025     return r;
1026 }
1027 
1028 u16 sdma_get_descq_cnt(void);
1029 
1030 extern uint mod_num_sdma;
1031 
1032 void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);
1033 
1034 #endif