0001
0002
0003
0004
0005
0006 #include <linux/spinlock.h>
0007 #include <linux/seqlock.h>
0008 #include <linux/netdevice.h>
0009 #include <linux/moduleparam.h>
0010 #include <linux/bitops.h>
0011 #include <linux/timer.h>
0012 #include <linux/vmalloc.h>
0013 #include <linux/highmem.h>
0014
0015 #include "hfi.h"
0016 #include "common.h"
0017 #include "qp.h"
0018 #include "sdma.h"
0019 #include "iowait.h"
0020 #include "trace.h"
0021
0022
0023 #define SDMA_DESCQ_CNT 2048
0024 #define SDMA_DESC_INTR 64
0025 #define INVALID_TAIL 0xffff
0026 #define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
0027
0028 static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
0029 module_param(sdma_descq_cnt, uint, S_IRUGO);
0030 MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
0031
0032 static uint sdma_idle_cnt = 250;
0033 module_param(sdma_idle_cnt, uint, S_IRUGO);
0034 MODULE_PARM_DESC(sdma_idle_cnt, "sdma interrupt idle delay (ns,default 250)");
0035
0036 uint mod_num_sdma;
0037 module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
0038 MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use");
0039
0040 static uint sdma_desct_intr = SDMA_DESC_INTR;
0041 module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
0042 MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
0043
0044 #define SDMA_WAIT_BATCH_SIZE 20
0045
0046 #define SDMA_ERR_HALT_TIMEOUT 10
0047
0048
0049 #define SD(name) SEND_DMA_##name
0050 #define ALL_SDMA_ENG_HALT_ERRS \
0051 (SD(ENG_ERR_STATUS_SDMA_WRONG_DW_ERR_SMASK) \
0052 | SD(ENG_ERR_STATUS_SDMA_GEN_MISMATCH_ERR_SMASK) \
0053 | SD(ENG_ERR_STATUS_SDMA_TOO_LONG_ERR_SMASK) \
0054 | SD(ENG_ERR_STATUS_SDMA_TAIL_OUT_OF_BOUNDS_ERR_SMASK) \
0055 | SD(ENG_ERR_STATUS_SDMA_FIRST_DESC_ERR_SMASK) \
0056 | SD(ENG_ERR_STATUS_SDMA_MEM_READ_ERR_SMASK) \
0057 | SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK) \
0058 | SD(ENG_ERR_STATUS_SDMA_LENGTH_MISMATCH_ERR_SMASK) \
0059 | SD(ENG_ERR_STATUS_SDMA_PACKET_DESC_OVERFLOW_ERR_SMASK) \
0060 | SD(ENG_ERR_STATUS_SDMA_HEADER_SELECT_ERR_SMASK) \
0061 | SD(ENG_ERR_STATUS_SDMA_HEADER_ADDRESS_ERR_SMASK) \
0062 | SD(ENG_ERR_STATUS_SDMA_HEADER_LENGTH_ERR_SMASK) \
0063 | SD(ENG_ERR_STATUS_SDMA_TIMEOUT_ERR_SMASK) \
0064 | SD(ENG_ERR_STATUS_SDMA_DESC_TABLE_UNC_ERR_SMASK) \
0065 | SD(ENG_ERR_STATUS_SDMA_ASSEMBLY_UNC_ERR_SMASK) \
0066 | SD(ENG_ERR_STATUS_SDMA_PACKET_TRACKING_UNC_ERR_SMASK) \
0067 | SD(ENG_ERR_STATUS_SDMA_HEADER_STORAGE_UNC_ERR_SMASK) \
0068 | SD(ENG_ERR_STATUS_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SMASK))
0069
0070
0071 #define SDMA_SENDCTRL_OP_ENABLE BIT(0)
0072 #define SDMA_SENDCTRL_OP_INTENABLE BIT(1)
0073 #define SDMA_SENDCTRL_OP_HALT BIT(2)
0074 #define SDMA_SENDCTRL_OP_CLEANUP BIT(3)
0075
0076
0077 #define SDMA_EGRESS_PACKET_OCCUPANCY_SMASK \
0078 SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SMASK
0079 #define SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT \
0080 SEND_EGRESS_SEND_DMA_STATUS_SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT
0081
0082 static const char * const sdma_state_names[] = {
0083 [sdma_state_s00_hw_down] = "s00_HwDown",
0084 [sdma_state_s10_hw_start_up_halt_wait] = "s10_HwStartUpHaltWait",
0085 [sdma_state_s15_hw_start_up_clean_wait] = "s15_HwStartUpCleanWait",
0086 [sdma_state_s20_idle] = "s20_Idle",
0087 [sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
0088 [sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
0089 [sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
0090 [sdma_state_s60_idle_halt_wait] = "s60_IdleHaltWait",
0091 [sdma_state_s80_hw_freeze] = "s80_HwFreeze",
0092 [sdma_state_s82_freeze_sw_clean] = "s82_FreezeSwClean",
0093 [sdma_state_s99_running] = "s99_Running",
0094 };
0095
0096 #ifdef CONFIG_SDMA_VERBOSITY
0097 static const char * const sdma_event_names[] = {
0098 [sdma_event_e00_go_hw_down] = "e00_GoHwDown",
0099 [sdma_event_e10_go_hw_start] = "e10_GoHwStart",
0100 [sdma_event_e15_hw_halt_done] = "e15_HwHaltDone",
0101 [sdma_event_e25_hw_clean_up_done] = "e25_HwCleanUpDone",
0102 [sdma_event_e30_go_running] = "e30_GoRunning",
0103 [sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
0104 [sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
0105 [sdma_event_e60_hw_halted] = "e60_HwHalted",
0106 [sdma_event_e70_go_idle] = "e70_GoIdle",
0107 [sdma_event_e80_hw_freeze] = "e80_HwFreeze",
0108 [sdma_event_e81_hw_frozen] = "e81_HwFrozen",
0109 [sdma_event_e82_hw_unfreeze] = "e82_HwUnfreeze",
0110 [sdma_event_e85_link_down] = "e85_LinkDown",
0111 [sdma_event_e90_sw_halted] = "e90_SwHalted",
0112 };
0113 #endif
0114
0115 static const struct sdma_set_state_action sdma_action_table[] = {
0116 [sdma_state_s00_hw_down] = {
0117 .go_s99_running_tofalse = 1,
0118 .op_enable = 0,
0119 .op_intenable = 0,
0120 .op_halt = 0,
0121 .op_cleanup = 0,
0122 },
0123 [sdma_state_s10_hw_start_up_halt_wait] = {
0124 .op_enable = 0,
0125 .op_intenable = 0,
0126 .op_halt = 1,
0127 .op_cleanup = 0,
0128 },
0129 [sdma_state_s15_hw_start_up_clean_wait] = {
0130 .op_enable = 0,
0131 .op_intenable = 1,
0132 .op_halt = 0,
0133 .op_cleanup = 1,
0134 },
0135 [sdma_state_s20_idle] = {
0136 .op_enable = 0,
0137 .op_intenable = 1,
0138 .op_halt = 0,
0139 .op_cleanup = 0,
0140 },
0141 [sdma_state_s30_sw_clean_up_wait] = {
0142 .op_enable = 0,
0143 .op_intenable = 0,
0144 .op_halt = 0,
0145 .op_cleanup = 0,
0146 },
0147 [sdma_state_s40_hw_clean_up_wait] = {
0148 .op_enable = 0,
0149 .op_intenable = 0,
0150 .op_halt = 0,
0151 .op_cleanup = 1,
0152 },
0153 [sdma_state_s50_hw_halt_wait] = {
0154 .op_enable = 0,
0155 .op_intenable = 0,
0156 .op_halt = 0,
0157 .op_cleanup = 0,
0158 },
0159 [sdma_state_s60_idle_halt_wait] = {
0160 .go_s99_running_tofalse = 1,
0161 .op_enable = 0,
0162 .op_intenable = 0,
0163 .op_halt = 1,
0164 .op_cleanup = 0,
0165 },
0166 [sdma_state_s80_hw_freeze] = {
0167 .op_enable = 0,
0168 .op_intenable = 0,
0169 .op_halt = 0,
0170 .op_cleanup = 0,
0171 },
0172 [sdma_state_s82_freeze_sw_clean] = {
0173 .op_enable = 0,
0174 .op_intenable = 0,
0175 .op_halt = 0,
0176 .op_cleanup = 0,
0177 },
0178 [sdma_state_s99_running] = {
0179 .op_enable = 1,
0180 .op_intenable = 1,
0181 .op_halt = 0,
0182 .op_cleanup = 0,
0183 .go_s99_running_totrue = 1,
0184 },
0185 };
0186
0187 #define SDMA_TAIL_UPDATE_THRESH 0x1F
0188
0189
0190 static void sdma_complete(struct kref *);
0191 static void sdma_finalput(struct sdma_state *);
0192 static void sdma_get(struct sdma_state *);
0193 static void sdma_hw_clean_up_task(struct tasklet_struct *);
0194 static void sdma_put(struct sdma_state *);
0195 static void sdma_set_state(struct sdma_engine *, enum sdma_states);
0196 static void sdma_start_hw_clean_up(struct sdma_engine *);
0197 static void sdma_sw_clean_up_task(struct tasklet_struct *);
0198 static void sdma_sendctrl(struct sdma_engine *, unsigned);
0199 static void init_sdma_regs(struct sdma_engine *, u32, uint);
0200 static void sdma_process_event(
0201 struct sdma_engine *sde,
0202 enum sdma_events event);
0203 static void __sdma_process_event(
0204 struct sdma_engine *sde,
0205 enum sdma_events event);
0206 static void dump_sdma_state(struct sdma_engine *sde);
0207 static void sdma_make_progress(struct sdma_engine *sde, u64 status);
0208 static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
0209 static void sdma_flush_descq(struct sdma_engine *sde);
0210
0211
0212
0213
0214
0215 static const char *sdma_state_name(enum sdma_states state)
0216 {
0217 return sdma_state_names[state];
0218 }
0219
0220 static void sdma_get(struct sdma_state *ss)
0221 {
0222 kref_get(&ss->kref);
0223 }
0224
0225 static void sdma_complete(struct kref *kref)
0226 {
0227 struct sdma_state *ss =
0228 container_of(kref, struct sdma_state, kref);
0229
0230 complete(&ss->comp);
0231 }
0232
0233 static void sdma_put(struct sdma_state *ss)
0234 {
0235 kref_put(&ss->kref, sdma_complete);
0236 }
0237
0238 static void sdma_finalput(struct sdma_state *ss)
0239 {
0240 sdma_put(ss);
0241 wait_for_completion(&ss->comp);
0242 }
0243
0244 static inline void write_sde_csr(
0245 struct sdma_engine *sde,
0246 u32 offset0,
0247 u64 value)
0248 {
0249 write_kctxt_csr(sde->dd, sde->this_idx, offset0, value);
0250 }
0251
0252 static inline u64 read_sde_csr(
0253 struct sdma_engine *sde,
0254 u32 offset0)
0255 {
0256 return read_kctxt_csr(sde->dd, sde->this_idx, offset0);
0257 }
0258
0259
0260
0261
0262
0263 static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
0264 int pause)
0265 {
0266 u64 off = 8 * sde->this_idx;
0267 struct hfi1_devdata *dd = sde->dd;
0268 int lcnt = 0;
0269 u64 reg_prev;
0270 u64 reg = 0;
0271
0272 while (1) {
0273 reg_prev = reg;
0274 reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS);
0275
0276 reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK;
0277 reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT;
0278 if (reg == 0)
0279 break;
0280
0281 if (reg != reg_prev)
0282 lcnt = 0;
0283 if (lcnt++ > 500) {
0284
0285 dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
0286 __func__, sde->this_idx, (u32)reg);
0287 queue_work(dd->pport->link_wq,
0288 &dd->pport->link_bounce_work);
0289 break;
0290 }
0291 udelay(1);
0292 }
0293 }
0294
0295
0296
0297
0298
0299 void sdma_wait(struct hfi1_devdata *dd)
0300 {
0301 int i;
0302
0303 for (i = 0; i < dd->num_sdma; i++) {
0304 struct sdma_engine *sde = &dd->per_sdma[i];
0305
0306 sdma_wait_for_packet_egress(sde, 0);
0307 }
0308 }
0309
0310 static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
0311 {
0312 u64 reg;
0313
0314 if (!(sde->dd->flags & HFI1_HAS_SDMA_TIMEOUT))
0315 return;
0316 reg = cnt;
0317 reg &= SD(DESC_CNT_CNT_MASK);
0318 reg <<= SD(DESC_CNT_CNT_SHIFT);
0319 write_sde_csr(sde, SD(DESC_CNT), reg);
0320 }
0321
0322 static inline void complete_tx(struct sdma_engine *sde,
0323 struct sdma_txreq *tx,
0324 int res)
0325 {
0326
0327 struct iowait *wait = tx->wait;
0328 callback_t complete = tx->complete;
0329
0330 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
0331 trace_hfi1_sdma_out_sn(sde, tx->sn);
0332 if (WARN_ON_ONCE(sde->head_sn != tx->sn))
0333 dd_dev_err(sde->dd, "expected %llu got %llu\n",
0334 sde->head_sn, tx->sn);
0335 sde->head_sn++;
0336 #endif
0337 __sdma_txclean(sde->dd, tx);
0338 if (complete)
0339 (*complete)(tx, res);
0340 if (iowait_sdma_dec(wait))
0341 iowait_drain_wakeup(wait);
0342 }
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362 static void sdma_flush(struct sdma_engine *sde)
0363 {
0364 struct sdma_txreq *txp, *txp_next;
0365 LIST_HEAD(flushlist);
0366 unsigned long flags;
0367 uint seq;
0368
0369
0370 sdma_flush_descq(sde);
0371 spin_lock_irqsave(&sde->flushlist_lock, flags);
0372
0373 list_splice_init(&sde->flushlist, &flushlist);
0374 spin_unlock_irqrestore(&sde->flushlist_lock, flags);
0375
0376 list_for_each_entry_safe(txp, txp_next, &flushlist, list)
0377 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
0378
0379 do {
0380 struct iowait *w, *nw;
0381
0382 seq = read_seqbegin(&sde->waitlock);
0383 if (!list_empty(&sde->dmawait)) {
0384 write_seqlock(&sde->waitlock);
0385 list_for_each_entry_safe(w, nw, &sde->dmawait, list) {
0386 if (w->wakeup) {
0387 w->wakeup(w, SDMA_AVAIL_REASON);
0388 list_del_init(&w->list);
0389 }
0390 }
0391 write_sequnlock(&sde->waitlock);
0392 }
0393 } while (read_seqretry(&sde->waitlock, seq));
0394 }
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406 static void sdma_field_flush(struct work_struct *work)
0407 {
0408 unsigned long flags;
0409 struct sdma_engine *sde =
0410 container_of(work, struct sdma_engine, flush_worker);
0411
0412 write_seqlock_irqsave(&sde->head_lock, flags);
0413 if (!__sdma_running(sde))
0414 sdma_flush(sde);
0415 write_sequnlock_irqrestore(&sde->head_lock, flags);
0416 }
0417
0418 static void sdma_err_halt_wait(struct work_struct *work)
0419 {
0420 struct sdma_engine *sde = container_of(work, struct sdma_engine,
0421 err_halt_worker);
0422 u64 statuscsr;
0423 unsigned long timeout;
0424
0425 timeout = jiffies + msecs_to_jiffies(SDMA_ERR_HALT_TIMEOUT);
0426 while (1) {
0427 statuscsr = read_sde_csr(sde, SD(STATUS));
0428 statuscsr &= SD(STATUS_ENG_HALTED_SMASK);
0429 if (statuscsr)
0430 break;
0431 if (time_after(jiffies, timeout)) {
0432 dd_dev_err(sde->dd,
0433 "SDMA engine %d - timeout waiting for engine to halt\n",
0434 sde->this_idx);
0435
0436
0437
0438
0439 break;
0440 }
0441 usleep_range(80, 120);
0442 }
0443
0444 sdma_process_event(sde, sdma_event_e15_hw_halt_done);
0445 }
0446
0447 static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
0448 {
0449 if (!is_bx(sde->dd) && HFI1_CAP_IS_KSET(SDMA_AHG)) {
0450 unsigned index;
0451 struct hfi1_devdata *dd = sde->dd;
0452
0453 for (index = 0; index < dd->num_sdma; index++) {
0454 struct sdma_engine *curr_sdma = &dd->per_sdma[index];
0455
0456 if (curr_sdma != sde)
0457 curr_sdma->progress_check_head =
0458 curr_sdma->descq_head;
0459 }
0460 dd_dev_err(sde->dd,
0461 "SDMA engine %d - check scheduled\n",
0462 sde->this_idx);
0463 mod_timer(&sde->err_progress_check_timer, jiffies + 10);
0464 }
0465 }
0466
0467 static void sdma_err_progress_check(struct timer_list *t)
0468 {
0469 unsigned index;
0470 struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer);
0471
0472 dd_dev_err(sde->dd, "SDE progress check event\n");
0473 for (index = 0; index < sde->dd->num_sdma; index++) {
0474 struct sdma_engine *curr_sde = &sde->dd->per_sdma[index];
0475 unsigned long flags;
0476
0477
0478 if (curr_sde == sde)
0479 continue;
0480
0481
0482
0483
0484
0485 spin_lock_irqsave(&curr_sde->tail_lock, flags);
0486 write_seqlock(&curr_sde->head_lock);
0487
0488
0489 if (curr_sde->state.current_state != sdma_state_s99_running) {
0490 write_sequnlock(&curr_sde->head_lock);
0491 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
0492 continue;
0493 }
0494
0495 if ((curr_sde->descq_head != curr_sde->descq_tail) &&
0496 (curr_sde->descq_head ==
0497 curr_sde->progress_check_head))
0498 __sdma_process_event(curr_sde,
0499 sdma_event_e90_sw_halted);
0500 write_sequnlock(&curr_sde->head_lock);
0501 spin_unlock_irqrestore(&curr_sde->tail_lock, flags);
0502 }
0503 schedule_work(&sde->err_halt_worker);
0504 }
0505
0506 static void sdma_hw_clean_up_task(struct tasklet_struct *t)
0507 {
0508 struct sdma_engine *sde = from_tasklet(sde, t,
0509 sdma_hw_clean_up_task);
0510 u64 statuscsr;
0511
0512 while (1) {
0513 #ifdef CONFIG_SDMA_VERBOSITY
0514 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
0515 sde->this_idx, slashstrip(__FILE__), __LINE__,
0516 __func__);
0517 #endif
0518 statuscsr = read_sde_csr(sde, SD(STATUS));
0519 statuscsr &= SD(STATUS_ENG_CLEANED_UP_SMASK);
0520 if (statuscsr)
0521 break;
0522 udelay(10);
0523 }
0524
0525 sdma_process_event(sde, sdma_event_e25_hw_clean_up_done);
0526 }
0527
0528 static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
0529 {
0530 return sde->tx_ring[sde->tx_head & sde->sdma_mask];
0531 }
0532
0533
0534
0535
0536 static void sdma_flush_descq(struct sdma_engine *sde)
0537 {
0538 u16 head, tail;
0539 int progress = 0;
0540 struct sdma_txreq *txp = get_txhead(sde);
0541
0542
0543
0544
0545
0546
0547 head = sde->descq_head & sde->sdma_mask;
0548 tail = sde->descq_tail & sde->sdma_mask;
0549 while (head != tail) {
0550
0551 head = ++sde->descq_head & sde->sdma_mask;
0552
0553 if (txp && txp->next_descq_idx == head) {
0554
0555 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
0556 complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
0557 trace_hfi1_sdma_progress(sde, head, tail, txp);
0558 txp = get_txhead(sde);
0559 }
0560 progress++;
0561 }
0562 if (progress)
0563 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
0564 }
0565
0566 static void sdma_sw_clean_up_task(struct tasklet_struct *t)
0567 {
0568 struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
0569 unsigned long flags;
0570
0571 spin_lock_irqsave(&sde->tail_lock, flags);
0572 write_seqlock(&sde->head_lock);
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593 sdma_make_progress(sde, 0);
0594
0595 sdma_flush(sde);
0596
0597
0598
0599
0600
0601
0602 sde->descq_tail = 0;
0603 sde->descq_head = 0;
0604 sde->desc_avail = sdma_descq_freecnt(sde);
0605 *sde->head_dma = 0;
0606
0607 __sdma_process_event(sde, sdma_event_e40_sw_cleaned);
0608
0609 write_sequnlock(&sde->head_lock);
0610 spin_unlock_irqrestore(&sde->tail_lock, flags);
0611 }
0612
0613 static void sdma_sw_tear_down(struct sdma_engine *sde)
0614 {
0615 struct sdma_state *ss = &sde->state;
0616
0617
0618 sdma_put(ss);
0619
0620
0621 atomic_set(&sde->dd->sdma_unfreeze_count, -1);
0622 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
0623 }
0624
0625 static void sdma_start_hw_clean_up(struct sdma_engine *sde)
0626 {
0627 tasklet_hi_schedule(&sde->sdma_hw_clean_up_task);
0628 }
0629
0630 static void sdma_set_state(struct sdma_engine *sde,
0631 enum sdma_states next_state)
0632 {
0633 struct sdma_state *ss = &sde->state;
0634 const struct sdma_set_state_action *action = sdma_action_table;
0635 unsigned op = 0;
0636
0637 trace_hfi1_sdma_state(
0638 sde,
0639 sdma_state_names[ss->current_state],
0640 sdma_state_names[next_state]);
0641
0642
0643 ss->previous_state = ss->current_state;
0644 ss->previous_op = ss->current_op;
0645 ss->current_state = next_state;
0646
0647 if (ss->previous_state != sdma_state_s99_running &&
0648 next_state == sdma_state_s99_running)
0649 sdma_flush(sde);
0650
0651 if (action[next_state].op_enable)
0652 op |= SDMA_SENDCTRL_OP_ENABLE;
0653
0654 if (action[next_state].op_intenable)
0655 op |= SDMA_SENDCTRL_OP_INTENABLE;
0656
0657 if (action[next_state].op_halt)
0658 op |= SDMA_SENDCTRL_OP_HALT;
0659
0660 if (action[next_state].op_cleanup)
0661 op |= SDMA_SENDCTRL_OP_CLEANUP;
0662
0663 if (action[next_state].go_s99_running_tofalse)
0664 ss->go_s99_running = 0;
0665
0666 if (action[next_state].go_s99_running_totrue)
0667 ss->go_s99_running = 1;
0668
0669 ss->current_op = op;
0670 sdma_sendctrl(sde, ss->current_op);
0671 }
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685 u16 sdma_get_descq_cnt(void)
0686 {
0687 u16 count = sdma_descq_cnt;
0688
0689 if (!count)
0690 return SDMA_DESCQ_CNT;
0691
0692
0693
0694 if (!is_power_of_2(count))
0695 return SDMA_DESCQ_CNT;
0696 if (count < 64 || count > 32768)
0697 return SDMA_DESCQ_CNT;
0698 return count;
0699 }
0700
0701
0702
0703
0704
0705
0706
0707
0708 int sdma_engine_get_vl(struct sdma_engine *sde)
0709 {
0710 struct hfi1_devdata *dd = sde->dd;
0711 struct sdma_vl_map *m;
0712 u8 vl;
0713
0714 if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
0715 return -EINVAL;
0716
0717 rcu_read_lock();
0718 m = rcu_dereference(dd->sdma_map);
0719 if (unlikely(!m)) {
0720 rcu_read_unlock();
0721 return -EINVAL;
0722 }
0723 vl = m->engine_to_vl[sde->this_idx];
0724 rcu_read_unlock();
0725
0726 return vl;
0727 }
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739 struct sdma_engine *sdma_select_engine_vl(
0740 struct hfi1_devdata *dd,
0741 u32 selector,
0742 u8 vl)
0743 {
0744 struct sdma_vl_map *m;
0745 struct sdma_map_elem *e;
0746 struct sdma_engine *rval;
0747
0748
0749
0750
0751
0752 if (vl >= num_vls) {
0753 rval = NULL;
0754 goto done;
0755 }
0756
0757 rcu_read_lock();
0758 m = rcu_dereference(dd->sdma_map);
0759 if (unlikely(!m)) {
0760 rcu_read_unlock();
0761 return &dd->per_sdma[0];
0762 }
0763 e = m->map[vl & m->mask];
0764 rval = e->sde[selector & e->mask];
0765 rcu_read_unlock();
0766
0767 done:
0768 rval = !rval ? &dd->per_sdma[0] : rval;
0769 trace_hfi1_sdma_engine_select(dd, selector, vl, rval->this_idx);
0770 return rval;
0771 }
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782 struct sdma_engine *sdma_select_engine_sc(
0783 struct hfi1_devdata *dd,
0784 u32 selector,
0785 u8 sc5)
0786 {
0787 u8 vl = sc_to_vlt(dd, sc5);
0788
0789 return sdma_select_engine_vl(dd, selector, vl);
0790 }
0791
0792 struct sdma_rht_map_elem {
0793 u32 mask;
0794 u8 ctr;
0795 struct sdma_engine *sde[];
0796 };
0797
0798 struct sdma_rht_node {
0799 unsigned long cpu_id;
0800 struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
0801 struct rhash_head node;
0802 };
0803
0804 #define NR_CPUS_HINT 192
0805
0806 static const struct rhashtable_params sdma_rht_params = {
0807 .nelem_hint = NR_CPUS_HINT,
0808 .head_offset = offsetof(struct sdma_rht_node, node),
0809 .key_offset = offsetof(struct sdma_rht_node, cpu_id),
0810 .key_len = sizeof_field(struct sdma_rht_node, cpu_id),
0811 .max_size = NR_CPUS,
0812 .min_size = 8,
0813 .automatic_shrinking = true,
0814 };
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826
0827 struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
0828 u32 selector, u8 vl)
0829 {
0830 struct sdma_rht_node *rht_node;
0831 struct sdma_engine *sde = NULL;
0832 unsigned long cpu_id;
0833
0834
0835
0836
0837
0838 if (current->nr_cpus_allowed != 1)
0839 goto out;
0840
0841 rcu_read_lock();
0842 cpu_id = smp_processor_id();
0843 rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
0844 sdma_rht_params);
0845
0846 if (rht_node && rht_node->map[vl]) {
0847 struct sdma_rht_map_elem *map = rht_node->map[vl];
0848
0849 sde = map->sde[selector & map->mask];
0850 }
0851 rcu_read_unlock();
0852
0853 if (sde)
0854 return sde;
0855
0856 out:
0857 return sdma_select_engine_vl(dd, selector, vl);
0858 }
0859
0860 static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
0861 {
0862 int i;
0863
0864 for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
0865 map->sde[map->ctr + i] = map->sde[i];
0866 }
0867
0868 static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
0869 struct sdma_engine *sde)
0870 {
0871 unsigned int i, pow;
0872
0873
0874 for (i = 0; i < map->ctr; i++) {
0875 if (map->sde[i] == sde) {
0876 memmove(&map->sde[i], &map->sde[i + 1],
0877 (map->ctr - i - 1) * sizeof(map->sde[0]));
0878 map->ctr--;
0879 pow = roundup_pow_of_two(map->ctr ? : 1);
0880 map->mask = pow - 1;
0881 sdma_populate_sde_map(map);
0882 break;
0883 }
0884 }
0885 }
0886
0887
0888
0889
0890 static DEFINE_MUTEX(process_to_sde_mutex);
0891
0892 ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
0893 size_t count)
0894 {
0895 struct hfi1_devdata *dd = sde->dd;
0896 cpumask_var_t mask, new_mask;
0897 unsigned long cpu;
0898 int ret, vl, sz;
0899 struct sdma_rht_node *rht_node;
0900
0901 vl = sdma_engine_get_vl(sde);
0902 if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map)))
0903 return -EINVAL;
0904
0905 ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
0906 if (!ret)
0907 return -ENOMEM;
0908
0909 ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
0910 if (!ret) {
0911 free_cpumask_var(mask);
0912 return -ENOMEM;
0913 }
0914 ret = cpulist_parse(buf, mask);
0915 if (ret)
0916 goto out_free;
0917
0918 if (!cpumask_subset(mask, cpu_online_mask)) {
0919 dd_dev_warn(sde->dd, "Invalid CPU mask\n");
0920 ret = -EINVAL;
0921 goto out_free;
0922 }
0923
0924 sz = sizeof(struct sdma_rht_map_elem) +
0925 (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
0926
0927 mutex_lock(&process_to_sde_mutex);
0928
0929 for_each_cpu(cpu, mask) {
0930
0931 if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
0932 cpumask_set_cpu(cpu, new_mask);
0933 continue;
0934 }
0935
0936 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
0937 sdma_rht_params);
0938 if (!rht_node) {
0939 rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
0940 if (!rht_node) {
0941 ret = -ENOMEM;
0942 goto out;
0943 }
0944
0945 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
0946 if (!rht_node->map[vl]) {
0947 kfree(rht_node);
0948 ret = -ENOMEM;
0949 goto out;
0950 }
0951 rht_node->cpu_id = cpu;
0952 rht_node->map[vl]->mask = 0;
0953 rht_node->map[vl]->ctr = 1;
0954 rht_node->map[vl]->sde[0] = sde;
0955
0956 ret = rhashtable_insert_fast(dd->sdma_rht,
0957 &rht_node->node,
0958 sdma_rht_params);
0959 if (ret) {
0960 kfree(rht_node->map[vl]);
0961 kfree(rht_node);
0962 dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
0963 cpu);
0964 goto out;
0965 }
0966
0967 } else {
0968 int ctr, pow;
0969
0970
0971 if (!rht_node->map[vl])
0972 rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
0973
0974 if (!rht_node->map[vl]) {
0975 ret = -ENOMEM;
0976 goto out;
0977 }
0978
0979 rht_node->map[vl]->ctr++;
0980 ctr = rht_node->map[vl]->ctr;
0981 rht_node->map[vl]->sde[ctr - 1] = sde;
0982 pow = roundup_pow_of_two(ctr);
0983 rht_node->map[vl]->mask = pow - 1;
0984
0985
0986 sdma_populate_sde_map(rht_node->map[vl]);
0987 }
0988 cpumask_set_cpu(cpu, new_mask);
0989 }
0990
0991
0992 for_each_cpu(cpu, cpu_online_mask) {
0993 struct sdma_rht_node *rht_node;
0994
0995
0996 if (cpumask_test_cpu(cpu, mask))
0997 continue;
0998
0999 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
1000 sdma_rht_params);
1001 if (rht_node) {
1002 bool empty = true;
1003 int i;
1004
1005
1006 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1007 if (rht_node->map[i])
1008 sdma_cleanup_sde_map(rht_node->map[i],
1009 sde);
1010
1011
1012 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1013 if (!rht_node->map[i])
1014 continue;
1015
1016 if (rht_node->map[i]->ctr) {
1017 empty = false;
1018 break;
1019 }
1020 }
1021
1022 if (empty) {
1023 ret = rhashtable_remove_fast(dd->sdma_rht,
1024 &rht_node->node,
1025 sdma_rht_params);
1026 WARN_ON(ret);
1027
1028 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1029 kfree(rht_node->map[i]);
1030
1031 kfree(rht_node);
1032 }
1033 }
1034 }
1035
1036 cpumask_copy(&sde->cpu_mask, new_mask);
1037 out:
1038 mutex_unlock(&process_to_sde_mutex);
1039 out_free:
1040 free_cpumask_var(mask);
1041 free_cpumask_var(new_mask);
1042 return ret ? : strnlen(buf, PAGE_SIZE);
1043 }
1044
1045 ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
1046 {
1047 mutex_lock(&process_to_sde_mutex);
1048 if (cpumask_empty(&sde->cpu_mask))
1049 snprintf(buf, PAGE_SIZE, "%s\n", "empty");
1050 else
1051 cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
1052 mutex_unlock(&process_to_sde_mutex);
1053 return strnlen(buf, PAGE_SIZE);
1054 }
1055
1056 static void sdma_rht_free(void *ptr, void *arg)
1057 {
1058 struct sdma_rht_node *rht_node = ptr;
1059 int i;
1060
1061 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
1062 kfree(rht_node->map[i]);
1063
1064 kfree(rht_node);
1065 }
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075 void sdma_seqfile_dump_cpu_list(struct seq_file *s,
1076 struct hfi1_devdata *dd,
1077 unsigned long cpuid)
1078 {
1079 struct sdma_rht_node *rht_node;
1080 int i, j;
1081
1082 rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
1083 sdma_rht_params);
1084 if (!rht_node)
1085 return;
1086
1087 seq_printf(s, "cpu%3lu: ", cpuid);
1088 for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
1089 if (!rht_node->map[i] || !rht_node->map[i]->ctr)
1090 continue;
1091
1092 seq_printf(s, " vl%d: [", i);
1093
1094 for (j = 0; j < rht_node->map[i]->ctr; j++) {
1095 if (!rht_node->map[i]->sde[j])
1096 continue;
1097
1098 if (j > 0)
1099 seq_puts(s, ",");
1100
1101 seq_printf(s, " sdma%2d",
1102 rht_node->map[i]->sde[j]->this_idx);
1103 }
1104 seq_puts(s, " ]");
1105 }
1106
1107 seq_puts(s, "\n");
1108 }
1109
1110
1111
1112
1113 static void sdma_map_free(struct sdma_vl_map *m)
1114 {
1115 int i;
1116
1117 for (i = 0; m && i < m->actual_vls; i++)
1118 kfree(m->map[i]);
1119 kfree(m);
1120 }
1121
1122
1123
1124
1125 static void sdma_map_rcu_callback(struct rcu_head *list)
1126 {
1127 struct sdma_vl_map *m = container_of(list, struct sdma_vl_map, list);
1128
1129 sdma_map_free(m);
1130 }
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159 int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
1160 {
1161 int i, j;
1162 int extra, sde_per_vl;
1163 int engine = 0;
1164 u8 lvl_engines[OPA_MAX_VLS];
1165 struct sdma_vl_map *oldmap, *newmap;
1166
1167 if (!(dd->flags & HFI1_HAS_SEND_DMA))
1168 return 0;
1169
1170 if (!vl_engines) {
1171
1172 sde_per_vl = dd->num_sdma / num_vls;
1173
1174 extra = dd->num_sdma % num_vls;
1175 vl_engines = lvl_engines;
1176
1177 for (i = num_vls - 1; i >= 0; i--, extra--)
1178 vl_engines[i] = sde_per_vl + (extra > 0 ? 1 : 0);
1179 }
1180
1181 newmap = kzalloc(
1182 sizeof(struct sdma_vl_map) +
1183 roundup_pow_of_two(num_vls) *
1184 sizeof(struct sdma_map_elem *),
1185 GFP_KERNEL);
1186 if (!newmap)
1187 goto bail;
1188 newmap->actual_vls = num_vls;
1189 newmap->vls = roundup_pow_of_two(num_vls);
1190 newmap->mask = (1 << ilog2(newmap->vls)) - 1;
1191
1192 for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
1193 newmap->engine_to_vl[i] = -1;
1194 for (i = 0; i < newmap->vls; i++) {
1195
1196 int first_engine = engine;
1197
1198 if (i < newmap->actual_vls) {
1199 int sz = roundup_pow_of_two(vl_engines[i]);
1200
1201
1202 newmap->map[i] = kzalloc(
1203 sizeof(struct sdma_map_elem) +
1204 sz * sizeof(struct sdma_engine *),
1205 GFP_KERNEL);
1206 if (!newmap->map[i])
1207 goto bail;
1208 newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
1209
1210 for (j = 0; j < sz; j++) {
1211 newmap->map[i]->sde[j] =
1212 &dd->per_sdma[engine];
1213 if (++engine >= first_engine + vl_engines[i])
1214
1215 engine = first_engine;
1216 }
1217
1218 for (j = 0; j < vl_engines[i]; j++)
1219 newmap->engine_to_vl[first_engine + j] = i;
1220 } else {
1221
1222 newmap->map[i] = newmap->map[i % num_vls];
1223 }
1224 engine = first_engine + vl_engines[i];
1225 }
1226
1227 spin_lock_irq(&dd->sde_map_lock);
1228 oldmap = rcu_dereference_protected(dd->sdma_map,
1229 lockdep_is_held(&dd->sde_map_lock));
1230
1231
1232 rcu_assign_pointer(dd->sdma_map, newmap);
1233
1234 spin_unlock_irq(&dd->sde_map_lock);
1235
1236 if (oldmap)
1237 call_rcu(&oldmap->list, sdma_map_rcu_callback);
1238 return 0;
1239 bail:
1240
1241 sdma_map_free(newmap);
1242 return -ENOMEM;
1243 }
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253 void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
1254 {
1255 size_t i;
1256 struct sdma_engine *sde;
1257
1258 if (dd->sdma_pad_dma) {
1259 dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
1260 (void *)dd->sdma_pad_dma,
1261 dd->sdma_pad_phys);
1262 dd->sdma_pad_dma = NULL;
1263 dd->sdma_pad_phys = 0;
1264 }
1265 if (dd->sdma_heads_dma) {
1266 dma_free_coherent(&dd->pcidev->dev, dd->sdma_heads_size,
1267 (void *)dd->sdma_heads_dma,
1268 dd->sdma_heads_phys);
1269 dd->sdma_heads_dma = NULL;
1270 dd->sdma_heads_phys = 0;
1271 }
1272 for (i = 0; dd->per_sdma && i < num_engines; ++i) {
1273 sde = &dd->per_sdma[i];
1274
1275 sde->head_dma = NULL;
1276 sde->head_phys = 0;
1277
1278 if (sde->descq) {
1279 dma_free_coherent(
1280 &dd->pcidev->dev,
1281 sde->descq_cnt * sizeof(u64[2]),
1282 sde->descq,
1283 sde->descq_phys
1284 );
1285 sde->descq = NULL;
1286 sde->descq_phys = 0;
1287 }
1288 kvfree(sde->tx_ring);
1289 sde->tx_ring = NULL;
1290 }
1291 if (rcu_access_pointer(dd->sdma_map)) {
1292 spin_lock_irq(&dd->sde_map_lock);
1293 sdma_map_free(rcu_access_pointer(dd->sdma_map));
1294 RCU_INIT_POINTER(dd->sdma_map, NULL);
1295 spin_unlock_irq(&dd->sde_map_lock);
1296 synchronize_rcu();
1297 }
1298 kfree(dd->per_sdma);
1299 dd->per_sdma = NULL;
1300
1301 if (dd->sdma_rht) {
1302 rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
1303 kfree(dd->sdma_rht);
1304 dd->sdma_rht = NULL;
1305 }
1306 }
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319 int sdma_init(struct hfi1_devdata *dd, u8 port)
1320 {
1321 unsigned this_idx;
1322 struct sdma_engine *sde;
1323 struct rhashtable *tmp_sdma_rht;
1324 u16 descq_cnt;
1325 void *curr_head;
1326 struct hfi1_pportdata *ppd = dd->pport + port;
1327 u32 per_sdma_credits;
1328 uint idle_cnt = sdma_idle_cnt;
1329 size_t num_engines = chip_sdma_engines(dd);
1330 int ret = -ENOMEM;
1331
1332 if (!HFI1_CAP_IS_KSET(SDMA)) {
1333 HFI1_CAP_CLEAR(SDMA_AHG);
1334 return 0;
1335 }
1336 if (mod_num_sdma &&
1337
1338 mod_num_sdma <= chip_sdma_engines(dd) &&
1339
1340 mod_num_sdma >= num_vls)
1341 num_engines = mod_num_sdma;
1342
1343 dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
1344 dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd));
1345 dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
1346 chip_sdma_mem_size(dd));
1347
1348 per_sdma_credits =
1349 chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE);
1350
1351
1352 init_waitqueue_head(&dd->sdma_unfreeze_wq);
1353 atomic_set(&dd->sdma_unfreeze_count, 0);
1354
1355 descq_cnt = sdma_get_descq_cnt();
1356 dd_dev_info(dd, "SDMA engines %zu descq_cnt %u\n",
1357 num_engines, descq_cnt);
1358
1359
1360 dd->per_sdma = kcalloc_node(num_engines, sizeof(*dd->per_sdma),
1361 GFP_KERNEL, dd->node);
1362 if (!dd->per_sdma)
1363 return ret;
1364
1365 idle_cnt = ns_to_cclock(dd, idle_cnt);
1366 if (idle_cnt)
1367 dd->default_desc1 =
1368 SDMA_DESC1_HEAD_TO_HOST_FLAG;
1369 else
1370 dd->default_desc1 =
1371 SDMA_DESC1_INT_REQ_FLAG;
1372
1373 if (!sdma_desct_intr)
1374 sdma_desct_intr = SDMA_DESC_INTR;
1375
1376
1377 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1378 sde = &dd->per_sdma[this_idx];
1379 sde->dd = dd;
1380 sde->ppd = ppd;
1381 sde->this_idx = this_idx;
1382 sde->descq_cnt = descq_cnt;
1383 sde->desc_avail = sdma_descq_freecnt(sde);
1384 sde->sdma_shift = ilog2(descq_cnt);
1385 sde->sdma_mask = (1 << sde->sdma_shift) - 1;
1386
1387
1388 sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
1389 this_idx);
1390 sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
1391 this_idx);
1392 sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
1393 this_idx);
1394
1395 sde->imask = sde->int_mask | sde->progress_mask |
1396 sde->idle_mask;
1397
1398 spin_lock_init(&sde->tail_lock);
1399 seqlock_init(&sde->head_lock);
1400 spin_lock_init(&sde->senddmactrl_lock);
1401 spin_lock_init(&sde->flushlist_lock);
1402 seqlock_init(&sde->waitlock);
1403
1404 sde->ahg_bits = 0xfffffffe00000000ULL;
1405
1406 sdma_set_state(sde, sdma_state_s00_hw_down);
1407
1408
1409 kref_init(&sde->state.kref);
1410 init_completion(&sde->state.comp);
1411
1412 INIT_LIST_HEAD(&sde->flushlist);
1413 INIT_LIST_HEAD(&sde->dmawait);
1414
1415 sde->tail_csr =
1416 get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
1417
1418 tasklet_setup(&sde->sdma_hw_clean_up_task,
1419 sdma_hw_clean_up_task);
1420 tasklet_setup(&sde->sdma_sw_clean_up_task,
1421 sdma_sw_clean_up_task);
1422 INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
1423 INIT_WORK(&sde->flush_worker, sdma_field_flush);
1424
1425 sde->progress_check_head = 0;
1426
1427 timer_setup(&sde->err_progress_check_timer,
1428 sdma_err_progress_check, 0);
1429
1430 sde->descq = dma_alloc_coherent(&dd->pcidev->dev,
1431 descq_cnt * sizeof(u64[2]),
1432 &sde->descq_phys, GFP_KERNEL);
1433 if (!sde->descq)
1434 goto bail;
1435 sde->tx_ring =
1436 kvzalloc_node(array_size(descq_cnt,
1437 sizeof(struct sdma_txreq *)),
1438 GFP_KERNEL, dd->node);
1439 if (!sde->tx_ring)
1440 goto bail;
1441 }
1442
1443 dd->sdma_heads_size = L1_CACHE_BYTES * num_engines;
1444
1445 dd->sdma_heads_dma = dma_alloc_coherent(&dd->pcidev->dev,
1446 dd->sdma_heads_size,
1447 &dd->sdma_heads_phys,
1448 GFP_KERNEL);
1449 if (!dd->sdma_heads_dma) {
1450 dd_dev_err(dd, "failed to allocate SendDMA head memory\n");
1451 goto bail;
1452 }
1453
1454
1455 dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
1456 &dd->sdma_pad_phys, GFP_KERNEL);
1457 if (!dd->sdma_pad_dma) {
1458 dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
1459 goto bail;
1460 }
1461
1462
1463 curr_head = (void *)dd->sdma_heads_dma;
1464 for (this_idx = 0; this_idx < num_engines; ++this_idx) {
1465 unsigned long phys_offset;
1466
1467 sde = &dd->per_sdma[this_idx];
1468
1469 sde->head_dma = curr_head;
1470 curr_head += L1_CACHE_BYTES;
1471 phys_offset = (unsigned long)sde->head_dma -
1472 (unsigned long)dd->sdma_heads_dma;
1473 sde->head_phys = dd->sdma_heads_phys + phys_offset;
1474 init_sdma_regs(sde, per_sdma_credits, idle_cnt);
1475 }
1476 dd->flags |= HFI1_HAS_SEND_DMA;
1477 dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
1478 dd->num_sdma = num_engines;
1479 ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
1480 if (ret < 0)
1481 goto bail;
1482
1483 tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
1484 if (!tmp_sdma_rht) {
1485 ret = -ENOMEM;
1486 goto bail;
1487 }
1488
1489 ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
1490 if (ret < 0) {
1491 kfree(tmp_sdma_rht);
1492 goto bail;
1493 }
1494
1495 dd->sdma_rht = tmp_sdma_rht;
1496
1497 dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
1498 return 0;
1499
1500 bail:
1501 sdma_clean(dd, num_engines);
1502 return ret;
1503 }
1504
1505
1506
1507
1508
1509
1510
1511 void sdma_all_running(struct hfi1_devdata *dd)
1512 {
1513 struct sdma_engine *sde;
1514 unsigned int i;
1515
1516
1517 for (i = 0; i < dd->num_sdma; ++i) {
1518 sde = &dd->per_sdma[i];
1519 sdma_process_event(sde, sdma_event_e30_go_running);
1520 }
1521 }
1522
1523
1524
1525
1526
1527
1528
1529 void sdma_all_idle(struct hfi1_devdata *dd)
1530 {
1531 struct sdma_engine *sde;
1532 unsigned int i;
1533
1534
1535 for (i = 0; i < dd->num_sdma; ++i) {
1536 sde = &dd->per_sdma[i];
1537 sdma_process_event(sde, sdma_event_e70_go_idle);
1538 }
1539 }
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549 void sdma_start(struct hfi1_devdata *dd)
1550 {
1551 unsigned i;
1552 struct sdma_engine *sde;
1553
1554
1555 for (i = 0; i < dd->num_sdma; ++i) {
1556 sde = &dd->per_sdma[i];
1557 sdma_process_event(sde, sdma_event_e10_go_hw_start);
1558 }
1559 }
1560
1561
1562
1563
1564
1565 void sdma_exit(struct hfi1_devdata *dd)
1566 {
1567 unsigned this_idx;
1568 struct sdma_engine *sde;
1569
1570 for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
1571 ++this_idx) {
1572 sde = &dd->per_sdma[this_idx];
1573 if (!list_empty(&sde->dmawait))
1574 dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
1575 sde->this_idx);
1576 sdma_process_event(sde, sdma_event_e00_go_hw_down);
1577
1578 del_timer_sync(&sde->err_progress_check_timer);
1579
1580
1581
1582
1583
1584
1585 sdma_finalput(&sde->state);
1586 }
1587 }
1588
1589
1590
1591
1592 static inline void sdma_unmap_desc(
1593 struct hfi1_devdata *dd,
1594 struct sdma_desc *descp)
1595 {
1596 switch (sdma_mapping_type(descp)) {
1597 case SDMA_MAP_SINGLE:
1598 dma_unmap_single(
1599 &dd->pcidev->dev,
1600 sdma_mapping_addr(descp),
1601 sdma_mapping_len(descp),
1602 DMA_TO_DEVICE);
1603 break;
1604 case SDMA_MAP_PAGE:
1605 dma_unmap_page(
1606 &dd->pcidev->dev,
1607 sdma_mapping_addr(descp),
1608 sdma_mapping_len(descp),
1609 DMA_TO_DEVICE);
1610 break;
1611 }
1612 }
1613
1614
1615
1616
1617
1618 static inline u8 ahg_mode(struct sdma_txreq *tx)
1619 {
1620 return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
1621 >> SDMA_DESC1_HEADER_MODE_SHIFT;
1622 }
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635 void __sdma_txclean(
1636 struct hfi1_devdata *dd,
1637 struct sdma_txreq *tx)
1638 {
1639 u16 i;
1640
1641 if (tx->num_desc) {
1642 u8 skip = 0, mode = ahg_mode(tx);
1643
1644
1645 sdma_unmap_desc(dd, &tx->descp[0]);
1646
1647 if (mode > SDMA_AHG_APPLY_UPDATE1)
1648 skip = mode >> 1;
1649 for (i = 1 + skip; i < tx->num_desc; i++)
1650 sdma_unmap_desc(dd, &tx->descp[i]);
1651 tx->num_desc = 0;
1652 }
1653 kfree(tx->coalesce_buf);
1654 tx->coalesce_buf = NULL;
1655
1656 if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
1657 tx->desc_limit = ARRAY_SIZE(tx->descs);
1658 kfree(tx->descp);
1659 }
1660 }
1661
1662 static inline u16 sdma_gethead(struct sdma_engine *sde)
1663 {
1664 struct hfi1_devdata *dd = sde->dd;
1665 int use_dmahead;
1666 u16 hwhead;
1667
1668 #ifdef CONFIG_SDMA_VERBOSITY
1669 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1670 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1671 #endif
1672
1673 retry:
1674 use_dmahead = HFI1_CAP_IS_KSET(USE_SDMA_HEAD) && __sdma_running(sde) &&
1675 (dd->flags & HFI1_HAS_SDMA_TIMEOUT);
1676 hwhead = use_dmahead ?
1677 (u16)le64_to_cpu(*sde->head_dma) :
1678 (u16)read_sde_csr(sde, SD(HEAD));
1679
1680 if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
1681 u16 cnt;
1682 u16 swtail;
1683 u16 swhead;
1684 int sane;
1685
1686 swhead = sde->descq_head & sde->sdma_mask;
1687
1688 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1689 cnt = sde->descq_cnt;
1690
1691 if (swhead < swtail)
1692
1693 sane = (hwhead >= swhead) & (hwhead <= swtail);
1694 else if (swhead > swtail)
1695
1696 sane = ((hwhead >= swhead) && (hwhead < cnt)) ||
1697 (hwhead <= swtail);
1698 else
1699
1700 sane = (hwhead == swhead);
1701
1702 if (unlikely(!sane)) {
1703 dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n",
1704 sde->this_idx,
1705 use_dmahead ? "dma" : "kreg",
1706 hwhead, swhead, swtail, cnt);
1707 if (use_dmahead) {
1708
1709 use_dmahead = 0;
1710 goto retry;
1711 }
1712
1713 hwhead = swhead;
1714 }
1715 }
1716 return hwhead;
1717 }
1718
1719
1720
1721
1722
1723
1724
1725 static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
1726 {
1727 struct iowait *wait, *nw, *twait;
1728 struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
1729 uint i, n = 0, seq, tidx = 0;
1730
1731 #ifdef CONFIG_SDMA_VERBOSITY
1732 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
1733 slashstrip(__FILE__), __LINE__, __func__);
1734 dd_dev_err(sde->dd, "avail: %u\n", avail);
1735 #endif
1736
1737 do {
1738 seq = read_seqbegin(&sde->waitlock);
1739 if (!list_empty(&sde->dmawait)) {
1740
1741 write_seqlock(&sde->waitlock);
1742
1743 list_for_each_entry_safe(
1744 wait,
1745 nw,
1746 &sde->dmawait,
1747 list) {
1748 u32 num_desc;
1749
1750 if (!wait->wakeup)
1751 continue;
1752 if (n == ARRAY_SIZE(waits))
1753 break;
1754 iowait_init_priority(wait);
1755 num_desc = iowait_get_all_desc(wait);
1756 if (num_desc > avail)
1757 break;
1758 avail -= num_desc;
1759
1760 if (n) {
1761 twait = waits[tidx];
1762 tidx =
1763 iowait_priority_update_top(wait,
1764 twait,
1765 n,
1766 tidx);
1767 }
1768 list_del_init(&wait->list);
1769 waits[n++] = wait;
1770 }
1771 write_sequnlock(&sde->waitlock);
1772 break;
1773 }
1774 } while (read_seqretry(&sde->waitlock, seq));
1775
1776
1777 if (n)
1778 waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
1779
1780 for (i = 0; i < n; i++)
1781 if (i != tidx)
1782 waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
1783 }
1784
1785
1786 static void sdma_make_progress(struct sdma_engine *sde, u64 status)
1787 {
1788 struct sdma_txreq *txp = NULL;
1789 int progress = 0;
1790 u16 hwhead, swhead;
1791 int idle_check_done = 0;
1792
1793 hwhead = sdma_gethead(sde);
1794
1795
1796
1797
1798
1799
1800
1801 retry:
1802 txp = get_txhead(sde);
1803 swhead = sde->descq_head & sde->sdma_mask;
1804 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1805 while (swhead != hwhead) {
1806
1807 swhead = ++sde->descq_head & sde->sdma_mask;
1808
1809
1810 if (txp && txp->next_descq_idx == swhead) {
1811
1812 sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
1813 complete_tx(sde, txp, SDMA_TXREQ_S_OK);
1814
1815 txp = get_txhead(sde);
1816 }
1817 trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
1818 progress++;
1819 }
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830 if ((status & sde->idle_mask) && !idle_check_done) {
1831 u16 swtail;
1832
1833 swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
1834 if (swtail != hwhead) {
1835 hwhead = (u16)read_sde_csr(sde, SD(HEAD));
1836 idle_check_done = 1;
1837 goto retry;
1838 }
1839 }
1840
1841 sde->last_status = status;
1842 if (progress)
1843 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
1844 }
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855 void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
1856 {
1857 trace_hfi1_sdma_engine_interrupt(sde, status);
1858 write_seqlock(&sde->head_lock);
1859 sdma_set_desc_cnt(sde, sdma_desct_intr);
1860 if (status & sde->idle_mask)
1861 sde->idle_int_cnt++;
1862 else if (status & sde->progress_mask)
1863 sde->progress_int_cnt++;
1864 else if (status & sde->int_mask)
1865 sde->sdma_int_cnt++;
1866 sdma_make_progress(sde, status);
1867 write_sequnlock(&sde->head_lock);
1868 }
1869
1870
1871
1872
1873
1874
1875 void sdma_engine_error(struct sdma_engine *sde, u64 status)
1876 {
1877 unsigned long flags;
1878
1879 #ifdef CONFIG_SDMA_VERBOSITY
1880 dd_dev_err(sde->dd, "CONFIG SDMA(%u) error status 0x%llx state %s\n",
1881 sde->this_idx,
1882 (unsigned long long)status,
1883 sdma_state_names[sde->state.current_state]);
1884 #endif
1885 spin_lock_irqsave(&sde->tail_lock, flags);
1886 write_seqlock(&sde->head_lock);
1887 if (status & ALL_SDMA_ENG_HALT_ERRS)
1888 __sdma_process_event(sde, sdma_event_e60_hw_halted);
1889 if (status & ~SD(ENG_ERR_STATUS_SDMA_HALT_ERR_SMASK)) {
1890 dd_dev_err(sde->dd,
1891 "SDMA (%u) engine error: 0x%llx state %s\n",
1892 sde->this_idx,
1893 (unsigned long long)status,
1894 sdma_state_names[sde->state.current_state]);
1895 dump_sdma_state(sde);
1896 }
1897 write_sequnlock(&sde->head_lock);
1898 spin_unlock_irqrestore(&sde->tail_lock, flags);
1899 }
1900
1901 static void sdma_sendctrl(struct sdma_engine *sde, unsigned op)
1902 {
1903 u64 set_senddmactrl = 0;
1904 u64 clr_senddmactrl = 0;
1905 unsigned long flags;
1906
1907 #ifdef CONFIG_SDMA_VERBOSITY
1908 dd_dev_err(sde->dd, "CONFIG SDMA(%u) senddmactrl E=%d I=%d H=%d C=%d\n",
1909 sde->this_idx,
1910 (op & SDMA_SENDCTRL_OP_ENABLE) ? 1 : 0,
1911 (op & SDMA_SENDCTRL_OP_INTENABLE) ? 1 : 0,
1912 (op & SDMA_SENDCTRL_OP_HALT) ? 1 : 0,
1913 (op & SDMA_SENDCTRL_OP_CLEANUP) ? 1 : 0);
1914 #endif
1915
1916 if (op & SDMA_SENDCTRL_OP_ENABLE)
1917 set_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1918 else
1919 clr_senddmactrl |= SD(CTRL_SDMA_ENABLE_SMASK);
1920
1921 if (op & SDMA_SENDCTRL_OP_INTENABLE)
1922 set_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1923 else
1924 clr_senddmactrl |= SD(CTRL_SDMA_INT_ENABLE_SMASK);
1925
1926 if (op & SDMA_SENDCTRL_OP_HALT)
1927 set_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1928 else
1929 clr_senddmactrl |= SD(CTRL_SDMA_HALT_SMASK);
1930
1931 spin_lock_irqsave(&sde->senddmactrl_lock, flags);
1932
1933 sde->p_senddmactrl |= set_senddmactrl;
1934 sde->p_senddmactrl &= ~clr_senddmactrl;
1935
1936 if (op & SDMA_SENDCTRL_OP_CLEANUP)
1937 write_sde_csr(sde, SD(CTRL),
1938 sde->p_senddmactrl |
1939 SD(CTRL_SDMA_CLEANUP_SMASK));
1940 else
1941 write_sde_csr(sde, SD(CTRL), sde->p_senddmactrl);
1942
1943 spin_unlock_irqrestore(&sde->senddmactrl_lock, flags);
1944
1945 #ifdef CONFIG_SDMA_VERBOSITY
1946 sdma_dumpstate(sde);
1947 #endif
1948 }
1949
1950 static void sdma_setlengen(struct sdma_engine *sde)
1951 {
1952 #ifdef CONFIG_SDMA_VERBOSITY
1953 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1954 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1955 #endif
1956
1957
1958
1959
1960
1961
1962 write_sde_csr(sde, SD(LEN_GEN),
1963 (sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
1964 write_sde_csr(sde, SD(LEN_GEN),
1965 ((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
1966 (4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
1967 }
1968
1969 static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
1970 {
1971
1972 smp_wmb();
1973 writeq(tail, sde->tail_csr);
1974 }
1975
1976
1977
1978
1979
1980 static void sdma_hw_start_up(struct sdma_engine *sde)
1981 {
1982 u64 reg;
1983
1984 #ifdef CONFIG_SDMA_VERBOSITY
1985 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n",
1986 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
1987 #endif
1988
1989 sdma_setlengen(sde);
1990 sdma_update_tail(sde, 0);
1991 *sde->head_dma = 0;
1992
1993 reg = SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_MASK) <<
1994 SD(ENG_ERR_CLEAR_SDMA_HEADER_REQUEST_FIFO_UNC_ERR_SHIFT);
1995 write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
1996 }
1997
1998
1999
2000
2001
2002
2003 static void set_sdma_integrity(struct sdma_engine *sde)
2004 {
2005 struct hfi1_devdata *dd = sde->dd;
2006
2007 write_sde_csr(sde, SD(CHECK_ENABLE),
2008 hfi1_pkt_base_sdma_integrity(dd));
2009 }
2010
2011 static void init_sdma_regs(
2012 struct sdma_engine *sde,
2013 u32 credits,
2014 uint idle_cnt)
2015 {
2016 u8 opval, opmask;
2017 #ifdef CONFIG_SDMA_VERBOSITY
2018 struct hfi1_devdata *dd = sde->dd;
2019
2020 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n",
2021 sde->this_idx, slashstrip(__FILE__), __LINE__, __func__);
2022 #endif
2023
2024 write_sde_csr(sde, SD(BASE_ADDR), sde->descq_phys);
2025 sdma_setlengen(sde);
2026 sdma_update_tail(sde, 0);
2027 write_sde_csr(sde, SD(RELOAD_CNT), idle_cnt);
2028 write_sde_csr(sde, SD(DESC_CNT), 0);
2029 write_sde_csr(sde, SD(HEAD_ADDR), sde->head_phys);
2030 write_sde_csr(sde, SD(MEMORY),
2031 ((u64)credits << SD(MEMORY_SDMA_MEMORY_CNT_SHIFT)) |
2032 ((u64)(credits * sde->this_idx) <<
2033 SD(MEMORY_SDMA_MEMORY_INDEX_SHIFT)));
2034 write_sde_csr(sde, SD(ENG_ERR_MASK), ~0ull);
2035 set_sdma_integrity(sde);
2036 opmask = OPCODE_CHECK_MASK_DISABLED;
2037 opval = OPCODE_CHECK_VAL_DISABLED;
2038 write_sde_csr(sde, SD(CHECK_OPCODE),
2039 (opmask << SEND_CTXT_CHECK_OPCODE_MASK_SHIFT) |
2040 (opval << SEND_CTXT_CHECK_OPCODE_VALUE_SHIFT));
2041 }
2042
2043 #ifdef CONFIG_SDMA_VERBOSITY
2044
2045 #define sdma_dumpstate_helper0(reg) do { \
2046 csr = read_csr(sde->dd, reg); \
2047 dd_dev_err(sde->dd, "%36s 0x%016llx\n", #reg, csr); \
2048 } while (0)
2049
2050 #define sdma_dumpstate_helper(reg) do { \
2051 csr = read_sde_csr(sde, reg); \
2052 dd_dev_err(sde->dd, "%36s[%02u] 0x%016llx\n", \
2053 #reg, sde->this_idx, csr); \
2054 } while (0)
2055
2056 #define sdma_dumpstate_helper2(reg) do { \
2057 csr = read_csr(sde->dd, reg + (8 * i)); \
2058 dd_dev_err(sde->dd, "%33s_%02u 0x%016llx\n", \
2059 #reg, i, csr); \
2060 } while (0)
2061
2062 void sdma_dumpstate(struct sdma_engine *sde)
2063 {
2064 u64 csr;
2065 unsigned i;
2066
2067 sdma_dumpstate_helper(SD(CTRL));
2068 sdma_dumpstate_helper(SD(STATUS));
2069 sdma_dumpstate_helper0(SD(ERR_STATUS));
2070 sdma_dumpstate_helper0(SD(ERR_MASK));
2071 sdma_dumpstate_helper(SD(ENG_ERR_STATUS));
2072 sdma_dumpstate_helper(SD(ENG_ERR_MASK));
2073
2074 for (i = 0; i < CCE_NUM_INT_CSRS; ++i) {
2075 sdma_dumpstate_helper2(CCE_INT_STATUS);
2076 sdma_dumpstate_helper2(CCE_INT_MASK);
2077 sdma_dumpstate_helper2(CCE_INT_BLOCKED);
2078 }
2079
2080 sdma_dumpstate_helper(SD(TAIL));
2081 sdma_dumpstate_helper(SD(HEAD));
2082 sdma_dumpstate_helper(SD(PRIORITY_THLD));
2083 sdma_dumpstate_helper(SD(IDLE_CNT));
2084 sdma_dumpstate_helper(SD(RELOAD_CNT));
2085 sdma_dumpstate_helper(SD(DESC_CNT));
2086 sdma_dumpstate_helper(SD(DESC_FETCHED_CNT));
2087 sdma_dumpstate_helper(SD(MEMORY));
2088 sdma_dumpstate_helper0(SD(ENGINES));
2089 sdma_dumpstate_helper0(SD(MEM_SIZE));
2090
2091 sdma_dumpstate_helper(SD(BASE_ADDR));
2092 sdma_dumpstate_helper(SD(LEN_GEN));
2093 sdma_dumpstate_helper(SD(HEAD_ADDR));
2094 sdma_dumpstate_helper(SD(CHECK_ENABLE));
2095 sdma_dumpstate_helper(SD(CHECK_VL));
2096 sdma_dumpstate_helper(SD(CHECK_JOB_KEY));
2097 sdma_dumpstate_helper(SD(CHECK_PARTITION_KEY));
2098 sdma_dumpstate_helper(SD(CHECK_SLID));
2099 sdma_dumpstate_helper(SD(CHECK_OPCODE));
2100 }
2101 #endif
2102
2103 static void dump_sdma_state(struct sdma_engine *sde)
2104 {
2105 struct hw_sdma_desc *descqp;
2106 u64 desc[2];
2107 u64 addr;
2108 u8 gen;
2109 u16 len;
2110 u16 head, tail, cnt;
2111
2112 head = sde->descq_head & sde->sdma_mask;
2113 tail = sde->descq_tail & sde->sdma_mask;
2114 cnt = sdma_descq_freecnt(sde);
2115
2116 dd_dev_err(sde->dd,
2117 "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
2118 sde->this_idx, head, tail, cnt,
2119 !list_empty(&sde->flushlist));
2120
2121
2122 while (head != tail) {
2123 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2124
2125 descqp = &sde->descq[head];
2126 desc[0] = le64_to_cpu(descqp->qw[0]);
2127 desc[1] = le64_to_cpu(descqp->qw[1]);
2128 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2129 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2130 'H' : '-';
2131 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2132 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2133 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2134 & SDMA_DESC0_PHY_ADDR_MASK;
2135 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2136 & SDMA_DESC1_GENERATION_MASK;
2137 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2138 & SDMA_DESC0_BYTE_COUNT_MASK;
2139 dd_dev_err(sde->dd,
2140 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2141 head, flags, addr, gen, len);
2142 dd_dev_err(sde->dd,
2143 "\tdesc0:0x%016llx desc1 0x%016llx\n",
2144 desc[0], desc[1]);
2145 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2146 dd_dev_err(sde->dd,
2147 "\taidx: %u amode: %u alen: %u\n",
2148 (u8)((desc[1] &
2149 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2150 SDMA_DESC1_HEADER_INDEX_SHIFT),
2151 (u8)((desc[1] &
2152 SDMA_DESC1_HEADER_MODE_SMASK) >>
2153 SDMA_DESC1_HEADER_MODE_SHIFT),
2154 (u8)((desc[1] &
2155 SDMA_DESC1_HEADER_DWS_SMASK) >>
2156 SDMA_DESC1_HEADER_DWS_SHIFT));
2157 head++;
2158 head &= sde->sdma_mask;
2159 }
2160 }
2161
2162 #define SDE_FMT \
2163 "SDE %u CPU %d STE %s C 0x%llx S 0x%016llx E 0x%llx T(HW) 0x%llx T(SW) 0x%x H(HW) 0x%llx H(SW) 0x%x H(D) 0x%llx DM 0x%llx GL 0x%llx R 0x%llx LIS 0x%llx AHGI 0x%llx TXT %u TXH %u DT %u DH %u FLNE %d DQF %u SLC 0x%llx\n"
2164
2165
2166
2167
2168
2169
2170
2171 void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
2172 {
2173 u16 head, tail;
2174 struct hw_sdma_desc *descqp;
2175 u64 desc[2];
2176 u64 addr;
2177 u8 gen;
2178 u16 len;
2179
2180 head = sde->descq_head & sde->sdma_mask;
2181 tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
2182 seq_printf(s, SDE_FMT, sde->this_idx,
2183 sde->cpu,
2184 sdma_state_name(sde->state.current_state),
2185 (unsigned long long)read_sde_csr(sde, SD(CTRL)),
2186 (unsigned long long)read_sde_csr(sde, SD(STATUS)),
2187 (unsigned long long)read_sde_csr(sde, SD(ENG_ERR_STATUS)),
2188 (unsigned long long)read_sde_csr(sde, SD(TAIL)), tail,
2189 (unsigned long long)read_sde_csr(sde, SD(HEAD)), head,
2190 (unsigned long long)le64_to_cpu(*sde->head_dma),
2191 (unsigned long long)read_sde_csr(sde, SD(MEMORY)),
2192 (unsigned long long)read_sde_csr(sde, SD(LEN_GEN)),
2193 (unsigned long long)read_sde_csr(sde, SD(RELOAD_CNT)),
2194 (unsigned long long)sde->last_status,
2195 (unsigned long long)sde->ahg_bits,
2196 sde->tx_tail,
2197 sde->tx_head,
2198 sde->descq_tail,
2199 sde->descq_head,
2200 !list_empty(&sde->flushlist),
2201 sde->descq_full_count,
2202 (unsigned long long)read_sde_csr(sde, SEND_DMA_CHECK_SLID));
2203
2204
2205 while (head != tail) {
2206 char flags[6] = { 'x', 'x', 'x', 'x', 0 };
2207
2208 descqp = &sde->descq[head];
2209 desc[0] = le64_to_cpu(descqp->qw[0]);
2210 desc[1] = le64_to_cpu(descqp->qw[1]);
2211 flags[0] = (desc[1] & SDMA_DESC1_INT_REQ_FLAG) ? 'I' : '-';
2212 flags[1] = (desc[1] & SDMA_DESC1_HEAD_TO_HOST_FLAG) ?
2213 'H' : '-';
2214 flags[2] = (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) ? 'F' : '-';
2215 flags[3] = (desc[0] & SDMA_DESC0_LAST_DESC_FLAG) ? 'L' : '-';
2216 addr = (desc[0] >> SDMA_DESC0_PHY_ADDR_SHIFT)
2217 & SDMA_DESC0_PHY_ADDR_MASK;
2218 gen = (desc[1] >> SDMA_DESC1_GENERATION_SHIFT)
2219 & SDMA_DESC1_GENERATION_MASK;
2220 len = (desc[0] >> SDMA_DESC0_BYTE_COUNT_SHIFT)
2221 & SDMA_DESC0_BYTE_COUNT_MASK;
2222 seq_printf(s,
2223 "\tdesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes\n",
2224 head, flags, addr, gen, len);
2225 if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG)
2226 seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n",
2227 (u8)((desc[1] &
2228 SDMA_DESC1_HEADER_INDEX_SMASK) >>
2229 SDMA_DESC1_HEADER_INDEX_SHIFT),
2230 (u8)((desc[1] &
2231 SDMA_DESC1_HEADER_MODE_SMASK) >>
2232 SDMA_DESC1_HEADER_MODE_SHIFT));
2233 head = (head + 1) & sde->sdma_mask;
2234 }
2235 }
2236
2237
2238
2239
2240
2241 static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
2242 {
2243 u8 generation = (sde->descq_tail >> sde->sdma_shift) & 3;
2244
2245 qw1 &= ~SDMA_DESC1_GENERATION_SMASK;
2246 qw1 |= ((u64)generation & SDMA_DESC1_GENERATION_MASK)
2247 << SDMA_DESC1_GENERATION_SHIFT;
2248 return qw1;
2249 }
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267 static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
2268 {
2269 int i;
2270 u16 tail;
2271 struct sdma_desc *descp = tx->descp;
2272 u8 skip = 0, mode = ahg_mode(tx);
2273
2274 tail = sde->descq_tail & sde->sdma_mask;
2275 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2276 sde->descq[tail].qw[1] = cpu_to_le64(add_gen(sde, descp->qw[1]));
2277 trace_hfi1_sdma_descriptor(sde, descp->qw[0], descp->qw[1],
2278 tail, &sde->descq[tail]);
2279 tail = ++sde->descq_tail & sde->sdma_mask;
2280 descp++;
2281 if (mode > SDMA_AHG_APPLY_UPDATE1)
2282 skip = mode >> 1;
2283 for (i = 1; i < tx->num_desc; i++, descp++) {
2284 u64 qw1;
2285
2286 sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
2287 if (skip) {
2288
2289 qw1 = descp->qw[1];
2290 skip--;
2291 } else {
2292
2293 qw1 = add_gen(sde, descp->qw[1]);
2294 }
2295 sde->descq[tail].qw[1] = cpu_to_le64(qw1);
2296 trace_hfi1_sdma_descriptor(sde, descp->qw[0], qw1,
2297 tail, &sde->descq[tail]);
2298 tail = ++sde->descq_tail & sde->sdma_mask;
2299 }
2300 tx->next_descq_idx = tail;
2301 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2302 tx->sn = sde->tail_sn++;
2303 trace_hfi1_sdma_in_sn(sde, tx->sn);
2304 WARN_ON_ONCE(sde->tx_ring[sde->tx_tail & sde->sdma_mask]);
2305 #endif
2306 sde->tx_ring[sde->tx_tail++ & sde->sdma_mask] = tx;
2307 sde->desc_avail -= tx->num_desc;
2308 return tail;
2309 }
2310
2311
2312
2313
2314 static int sdma_check_progress(
2315 struct sdma_engine *sde,
2316 struct iowait_work *wait,
2317 struct sdma_txreq *tx,
2318 bool pkts_sent)
2319 {
2320 int ret;
2321
2322 sde->desc_avail = sdma_descq_freecnt(sde);
2323 if (tx->num_desc <= sde->desc_avail)
2324 return -EAGAIN;
2325
2326 if (wait && iowait_ioww_to_iow(wait)->sleep) {
2327 unsigned seq;
2328
2329 seq = raw_seqcount_begin(
2330 (const seqcount_t *)&sde->head_lock.seqcount);
2331 ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
2332 if (ret == -EAGAIN)
2333 sde->desc_avail = sdma_descq_freecnt(sde);
2334 } else {
2335 ret = -EBUSY;
2336 }
2337 return ret;
2338 }
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355 int sdma_send_txreq(struct sdma_engine *sde,
2356 struct iowait_work *wait,
2357 struct sdma_txreq *tx,
2358 bool pkts_sent)
2359 {
2360 int ret = 0;
2361 u16 tail;
2362 unsigned long flags;
2363
2364
2365 if (unlikely(tx->tlen))
2366 return -EINVAL;
2367 tx->wait = iowait_ioww_to_iow(wait);
2368 spin_lock_irqsave(&sde->tail_lock, flags);
2369 retry:
2370 if (unlikely(!__sdma_running(sde)))
2371 goto unlock_noconn;
2372 if (unlikely(tx->num_desc > sde->desc_avail))
2373 goto nodesc;
2374 tail = submit_tx(sde, tx);
2375 if (wait)
2376 iowait_sdma_inc(iowait_ioww_to_iow(wait));
2377 sdma_update_tail(sde, tail);
2378 unlock:
2379 spin_unlock_irqrestore(&sde->tail_lock, flags);
2380 return ret;
2381 unlock_noconn:
2382 if (wait)
2383 iowait_sdma_inc(iowait_ioww_to_iow(wait));
2384 tx->next_descq_idx = 0;
2385 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2386 tx->sn = sde->tail_sn++;
2387 trace_hfi1_sdma_in_sn(sde, tx->sn);
2388 #endif
2389 spin_lock(&sde->flushlist_lock);
2390 list_add_tail(&tx->list, &sde->flushlist);
2391 spin_unlock(&sde->flushlist_lock);
2392 iowait_inc_wait_count(wait, tx->num_desc);
2393 queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
2394 ret = -ECOMM;
2395 goto unlock;
2396 nodesc:
2397 ret = sdma_check_progress(sde, wait, tx, pkts_sent);
2398 if (ret == -EAGAIN) {
2399 ret = 0;
2400 goto retry;
2401 }
2402 sde->descq_full_count++;
2403 goto unlock;
2404 }
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434 int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
2435 struct list_head *tx_list, u16 *count_out)
2436 {
2437 struct sdma_txreq *tx, *tx_next;
2438 int ret = 0;
2439 unsigned long flags;
2440 u16 tail = INVALID_TAIL;
2441 u32 submit_count = 0, flush_count = 0, total_count;
2442
2443 spin_lock_irqsave(&sde->tail_lock, flags);
2444 retry:
2445 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2446 tx->wait = iowait_ioww_to_iow(wait);
2447 if (unlikely(!__sdma_running(sde)))
2448 goto unlock_noconn;
2449 if (unlikely(tx->num_desc > sde->desc_avail))
2450 goto nodesc;
2451 if (unlikely(tx->tlen)) {
2452 ret = -EINVAL;
2453 goto update_tail;
2454 }
2455 list_del_init(&tx->list);
2456 tail = submit_tx(sde, tx);
2457 submit_count++;
2458 if (tail != INVALID_TAIL &&
2459 (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
2460 sdma_update_tail(sde, tail);
2461 tail = INVALID_TAIL;
2462 }
2463 }
2464 update_tail:
2465 total_count = submit_count + flush_count;
2466 if (wait) {
2467 iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
2468 iowait_starve_clear(submit_count > 0,
2469 iowait_ioww_to_iow(wait));
2470 }
2471 if (tail != INVALID_TAIL)
2472 sdma_update_tail(sde, tail);
2473 spin_unlock_irqrestore(&sde->tail_lock, flags);
2474 *count_out = total_count;
2475 return ret;
2476 unlock_noconn:
2477 spin_lock(&sde->flushlist_lock);
2478 list_for_each_entry_safe(tx, tx_next, tx_list, list) {
2479 tx->wait = iowait_ioww_to_iow(wait);
2480 list_del_init(&tx->list);
2481 tx->next_descq_idx = 0;
2482 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
2483 tx->sn = sde->tail_sn++;
2484 trace_hfi1_sdma_in_sn(sde, tx->sn);
2485 #endif
2486 list_add_tail(&tx->list, &sde->flushlist);
2487 flush_count++;
2488 iowait_inc_wait_count(wait, tx->num_desc);
2489 }
2490 spin_unlock(&sde->flushlist_lock);
2491 queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
2492 ret = -ECOMM;
2493 goto update_tail;
2494 nodesc:
2495 ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
2496 if (ret == -EAGAIN) {
2497 ret = 0;
2498 goto retry;
2499 }
2500 sde->descq_full_count++;
2501 goto update_tail;
2502 }
2503
2504 static void sdma_process_event(struct sdma_engine *sde, enum sdma_events event)
2505 {
2506 unsigned long flags;
2507
2508 spin_lock_irqsave(&sde->tail_lock, flags);
2509 write_seqlock(&sde->head_lock);
2510
2511 __sdma_process_event(sde, event);
2512
2513 if (sde->state.current_state == sdma_state_s99_running)
2514 sdma_desc_avail(sde, sdma_descq_freecnt(sde));
2515
2516 write_sequnlock(&sde->head_lock);
2517 spin_unlock_irqrestore(&sde->tail_lock, flags);
2518 }
2519
2520 static void __sdma_process_event(struct sdma_engine *sde,
2521 enum sdma_events event)
2522 {
2523 struct sdma_state *ss = &sde->state;
2524 int need_progress = 0;
2525
2526
2527 #ifdef CONFIG_SDMA_VERBOSITY
2528 dd_dev_err(sde->dd, "CONFIG SDMA(%u) [%s] %s\n", sde->this_idx,
2529 sdma_state_names[ss->current_state],
2530 sdma_event_names[event]);
2531 #endif
2532
2533 switch (ss->current_state) {
2534 case sdma_state_s00_hw_down:
2535 switch (event) {
2536 case sdma_event_e00_go_hw_down:
2537 break;
2538 case sdma_event_e30_go_running:
2539
2540
2541
2542
2543
2544
2545
2546 ss->go_s99_running = 1;
2547 fallthrough;
2548 case sdma_event_e10_go_hw_start:
2549
2550 sdma_get(&sde->state);
2551 sdma_set_state(sde,
2552 sdma_state_s10_hw_start_up_halt_wait);
2553 break;
2554 case sdma_event_e15_hw_halt_done:
2555 break;
2556 case sdma_event_e25_hw_clean_up_done:
2557 break;
2558 case sdma_event_e40_sw_cleaned:
2559 sdma_sw_tear_down(sde);
2560 break;
2561 case sdma_event_e50_hw_cleaned:
2562 break;
2563 case sdma_event_e60_hw_halted:
2564 break;
2565 case sdma_event_e70_go_idle:
2566 break;
2567 case sdma_event_e80_hw_freeze:
2568 break;
2569 case sdma_event_e81_hw_frozen:
2570 break;
2571 case sdma_event_e82_hw_unfreeze:
2572 break;
2573 case sdma_event_e85_link_down:
2574 break;
2575 case sdma_event_e90_sw_halted:
2576 break;
2577 }
2578 break;
2579
2580 case sdma_state_s10_hw_start_up_halt_wait:
2581 switch (event) {
2582 case sdma_event_e00_go_hw_down:
2583 sdma_set_state(sde, sdma_state_s00_hw_down);
2584 sdma_sw_tear_down(sde);
2585 break;
2586 case sdma_event_e10_go_hw_start:
2587 break;
2588 case sdma_event_e15_hw_halt_done:
2589 sdma_set_state(sde,
2590 sdma_state_s15_hw_start_up_clean_wait);
2591 sdma_start_hw_clean_up(sde);
2592 break;
2593 case sdma_event_e25_hw_clean_up_done:
2594 break;
2595 case sdma_event_e30_go_running:
2596 ss->go_s99_running = 1;
2597 break;
2598 case sdma_event_e40_sw_cleaned:
2599 break;
2600 case sdma_event_e50_hw_cleaned:
2601 break;
2602 case sdma_event_e60_hw_halted:
2603 schedule_work(&sde->err_halt_worker);
2604 break;
2605 case sdma_event_e70_go_idle:
2606 ss->go_s99_running = 0;
2607 break;
2608 case sdma_event_e80_hw_freeze:
2609 break;
2610 case sdma_event_e81_hw_frozen:
2611 break;
2612 case sdma_event_e82_hw_unfreeze:
2613 break;
2614 case sdma_event_e85_link_down:
2615 break;
2616 case sdma_event_e90_sw_halted:
2617 break;
2618 }
2619 break;
2620
2621 case sdma_state_s15_hw_start_up_clean_wait:
2622 switch (event) {
2623 case sdma_event_e00_go_hw_down:
2624 sdma_set_state(sde, sdma_state_s00_hw_down);
2625 sdma_sw_tear_down(sde);
2626 break;
2627 case sdma_event_e10_go_hw_start:
2628 break;
2629 case sdma_event_e15_hw_halt_done:
2630 break;
2631 case sdma_event_e25_hw_clean_up_done:
2632 sdma_hw_start_up(sde);
2633 sdma_set_state(sde, ss->go_s99_running ?
2634 sdma_state_s99_running :
2635 sdma_state_s20_idle);
2636 break;
2637 case sdma_event_e30_go_running:
2638 ss->go_s99_running = 1;
2639 break;
2640 case sdma_event_e40_sw_cleaned:
2641 break;
2642 case sdma_event_e50_hw_cleaned:
2643 break;
2644 case sdma_event_e60_hw_halted:
2645 break;
2646 case sdma_event_e70_go_idle:
2647 ss->go_s99_running = 0;
2648 break;
2649 case sdma_event_e80_hw_freeze:
2650 break;
2651 case sdma_event_e81_hw_frozen:
2652 break;
2653 case sdma_event_e82_hw_unfreeze:
2654 break;
2655 case sdma_event_e85_link_down:
2656 break;
2657 case sdma_event_e90_sw_halted:
2658 break;
2659 }
2660 break;
2661
2662 case sdma_state_s20_idle:
2663 switch (event) {
2664 case sdma_event_e00_go_hw_down:
2665 sdma_set_state(sde, sdma_state_s00_hw_down);
2666 sdma_sw_tear_down(sde);
2667 break;
2668 case sdma_event_e10_go_hw_start:
2669 break;
2670 case sdma_event_e15_hw_halt_done:
2671 break;
2672 case sdma_event_e25_hw_clean_up_done:
2673 break;
2674 case sdma_event_e30_go_running:
2675 sdma_set_state(sde, sdma_state_s99_running);
2676 ss->go_s99_running = 1;
2677 break;
2678 case sdma_event_e40_sw_cleaned:
2679 break;
2680 case sdma_event_e50_hw_cleaned:
2681 break;
2682 case sdma_event_e60_hw_halted:
2683 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2684 schedule_work(&sde->err_halt_worker);
2685 break;
2686 case sdma_event_e70_go_idle:
2687 break;
2688 case sdma_event_e85_link_down:
2689 case sdma_event_e80_hw_freeze:
2690 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2691 atomic_dec(&sde->dd->sdma_unfreeze_count);
2692 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2693 break;
2694 case sdma_event_e81_hw_frozen:
2695 break;
2696 case sdma_event_e82_hw_unfreeze:
2697 break;
2698 case sdma_event_e90_sw_halted:
2699 break;
2700 }
2701 break;
2702
2703 case sdma_state_s30_sw_clean_up_wait:
2704 switch (event) {
2705 case sdma_event_e00_go_hw_down:
2706 sdma_set_state(sde, sdma_state_s00_hw_down);
2707 break;
2708 case sdma_event_e10_go_hw_start:
2709 break;
2710 case sdma_event_e15_hw_halt_done:
2711 break;
2712 case sdma_event_e25_hw_clean_up_done:
2713 break;
2714 case sdma_event_e30_go_running:
2715 ss->go_s99_running = 1;
2716 break;
2717 case sdma_event_e40_sw_cleaned:
2718 sdma_set_state(sde, sdma_state_s40_hw_clean_up_wait);
2719 sdma_start_hw_clean_up(sde);
2720 break;
2721 case sdma_event_e50_hw_cleaned:
2722 break;
2723 case sdma_event_e60_hw_halted:
2724 break;
2725 case sdma_event_e70_go_idle:
2726 ss->go_s99_running = 0;
2727 break;
2728 case sdma_event_e80_hw_freeze:
2729 break;
2730 case sdma_event_e81_hw_frozen:
2731 break;
2732 case sdma_event_e82_hw_unfreeze:
2733 break;
2734 case sdma_event_e85_link_down:
2735 ss->go_s99_running = 0;
2736 break;
2737 case sdma_event_e90_sw_halted:
2738 break;
2739 }
2740 break;
2741
2742 case sdma_state_s40_hw_clean_up_wait:
2743 switch (event) {
2744 case sdma_event_e00_go_hw_down:
2745 sdma_set_state(sde, sdma_state_s00_hw_down);
2746 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2747 break;
2748 case sdma_event_e10_go_hw_start:
2749 break;
2750 case sdma_event_e15_hw_halt_done:
2751 break;
2752 case sdma_event_e25_hw_clean_up_done:
2753 sdma_hw_start_up(sde);
2754 sdma_set_state(sde, ss->go_s99_running ?
2755 sdma_state_s99_running :
2756 sdma_state_s20_idle);
2757 break;
2758 case sdma_event_e30_go_running:
2759 ss->go_s99_running = 1;
2760 break;
2761 case sdma_event_e40_sw_cleaned:
2762 break;
2763 case sdma_event_e50_hw_cleaned:
2764 break;
2765 case sdma_event_e60_hw_halted:
2766 break;
2767 case sdma_event_e70_go_idle:
2768 ss->go_s99_running = 0;
2769 break;
2770 case sdma_event_e80_hw_freeze:
2771 break;
2772 case sdma_event_e81_hw_frozen:
2773 break;
2774 case sdma_event_e82_hw_unfreeze:
2775 break;
2776 case sdma_event_e85_link_down:
2777 ss->go_s99_running = 0;
2778 break;
2779 case sdma_event_e90_sw_halted:
2780 break;
2781 }
2782 break;
2783
2784 case sdma_state_s50_hw_halt_wait:
2785 switch (event) {
2786 case sdma_event_e00_go_hw_down:
2787 sdma_set_state(sde, sdma_state_s00_hw_down);
2788 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2789 break;
2790 case sdma_event_e10_go_hw_start:
2791 break;
2792 case sdma_event_e15_hw_halt_done:
2793 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2794 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2795 break;
2796 case sdma_event_e25_hw_clean_up_done:
2797 break;
2798 case sdma_event_e30_go_running:
2799 ss->go_s99_running = 1;
2800 break;
2801 case sdma_event_e40_sw_cleaned:
2802 break;
2803 case sdma_event_e50_hw_cleaned:
2804 break;
2805 case sdma_event_e60_hw_halted:
2806 schedule_work(&sde->err_halt_worker);
2807 break;
2808 case sdma_event_e70_go_idle:
2809 ss->go_s99_running = 0;
2810 break;
2811 case sdma_event_e80_hw_freeze:
2812 break;
2813 case sdma_event_e81_hw_frozen:
2814 break;
2815 case sdma_event_e82_hw_unfreeze:
2816 break;
2817 case sdma_event_e85_link_down:
2818 ss->go_s99_running = 0;
2819 break;
2820 case sdma_event_e90_sw_halted:
2821 break;
2822 }
2823 break;
2824
2825 case sdma_state_s60_idle_halt_wait:
2826 switch (event) {
2827 case sdma_event_e00_go_hw_down:
2828 sdma_set_state(sde, sdma_state_s00_hw_down);
2829 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2830 break;
2831 case sdma_event_e10_go_hw_start:
2832 break;
2833 case sdma_event_e15_hw_halt_done:
2834 sdma_set_state(sde, sdma_state_s30_sw_clean_up_wait);
2835 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2836 break;
2837 case sdma_event_e25_hw_clean_up_done:
2838 break;
2839 case sdma_event_e30_go_running:
2840 ss->go_s99_running = 1;
2841 break;
2842 case sdma_event_e40_sw_cleaned:
2843 break;
2844 case sdma_event_e50_hw_cleaned:
2845 break;
2846 case sdma_event_e60_hw_halted:
2847 schedule_work(&sde->err_halt_worker);
2848 break;
2849 case sdma_event_e70_go_idle:
2850 ss->go_s99_running = 0;
2851 break;
2852 case sdma_event_e80_hw_freeze:
2853 break;
2854 case sdma_event_e81_hw_frozen:
2855 break;
2856 case sdma_event_e82_hw_unfreeze:
2857 break;
2858 case sdma_event_e85_link_down:
2859 break;
2860 case sdma_event_e90_sw_halted:
2861 break;
2862 }
2863 break;
2864
2865 case sdma_state_s80_hw_freeze:
2866 switch (event) {
2867 case sdma_event_e00_go_hw_down:
2868 sdma_set_state(sde, sdma_state_s00_hw_down);
2869 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2870 break;
2871 case sdma_event_e10_go_hw_start:
2872 break;
2873 case sdma_event_e15_hw_halt_done:
2874 break;
2875 case sdma_event_e25_hw_clean_up_done:
2876 break;
2877 case sdma_event_e30_go_running:
2878 ss->go_s99_running = 1;
2879 break;
2880 case sdma_event_e40_sw_cleaned:
2881 break;
2882 case sdma_event_e50_hw_cleaned:
2883 break;
2884 case sdma_event_e60_hw_halted:
2885 break;
2886 case sdma_event_e70_go_idle:
2887 ss->go_s99_running = 0;
2888 break;
2889 case sdma_event_e80_hw_freeze:
2890 break;
2891 case sdma_event_e81_hw_frozen:
2892 sdma_set_state(sde, sdma_state_s82_freeze_sw_clean);
2893 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2894 break;
2895 case sdma_event_e82_hw_unfreeze:
2896 break;
2897 case sdma_event_e85_link_down:
2898 break;
2899 case sdma_event_e90_sw_halted:
2900 break;
2901 }
2902 break;
2903
2904 case sdma_state_s82_freeze_sw_clean:
2905 switch (event) {
2906 case sdma_event_e00_go_hw_down:
2907 sdma_set_state(sde, sdma_state_s00_hw_down);
2908 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2909 break;
2910 case sdma_event_e10_go_hw_start:
2911 break;
2912 case sdma_event_e15_hw_halt_done:
2913 break;
2914 case sdma_event_e25_hw_clean_up_done:
2915 break;
2916 case sdma_event_e30_go_running:
2917 ss->go_s99_running = 1;
2918 break;
2919 case sdma_event_e40_sw_cleaned:
2920
2921 atomic_dec(&sde->dd->sdma_unfreeze_count);
2922 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2923 break;
2924 case sdma_event_e50_hw_cleaned:
2925 break;
2926 case sdma_event_e60_hw_halted:
2927 break;
2928 case sdma_event_e70_go_idle:
2929 ss->go_s99_running = 0;
2930 break;
2931 case sdma_event_e80_hw_freeze:
2932 break;
2933 case sdma_event_e81_hw_frozen:
2934 break;
2935 case sdma_event_e82_hw_unfreeze:
2936 sdma_hw_start_up(sde);
2937 sdma_set_state(sde, ss->go_s99_running ?
2938 sdma_state_s99_running :
2939 sdma_state_s20_idle);
2940 break;
2941 case sdma_event_e85_link_down:
2942 break;
2943 case sdma_event_e90_sw_halted:
2944 break;
2945 }
2946 break;
2947
2948 case sdma_state_s99_running:
2949 switch (event) {
2950 case sdma_event_e00_go_hw_down:
2951 sdma_set_state(sde, sdma_state_s00_hw_down);
2952 tasklet_hi_schedule(&sde->sdma_sw_clean_up_task);
2953 break;
2954 case sdma_event_e10_go_hw_start:
2955 break;
2956 case sdma_event_e15_hw_halt_done:
2957 break;
2958 case sdma_event_e25_hw_clean_up_done:
2959 break;
2960 case sdma_event_e30_go_running:
2961 break;
2962 case sdma_event_e40_sw_cleaned:
2963 break;
2964 case sdma_event_e50_hw_cleaned:
2965 break;
2966 case sdma_event_e60_hw_halted:
2967 need_progress = 1;
2968 sdma_err_progress_check_schedule(sde);
2969 fallthrough;
2970 case sdma_event_e90_sw_halted:
2971
2972
2973
2974
2975 sdma_set_state(sde, sdma_state_s50_hw_halt_wait);
2976 schedule_work(&sde->err_halt_worker);
2977 break;
2978 case sdma_event_e70_go_idle:
2979 sdma_set_state(sde, sdma_state_s60_idle_halt_wait);
2980 break;
2981 case sdma_event_e85_link_down:
2982 ss->go_s99_running = 0;
2983 fallthrough;
2984 case sdma_event_e80_hw_freeze:
2985 sdma_set_state(sde, sdma_state_s80_hw_freeze);
2986 atomic_dec(&sde->dd->sdma_unfreeze_count);
2987 wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
2988 break;
2989 case sdma_event_e81_hw_frozen:
2990 break;
2991 case sdma_event_e82_hw_unfreeze:
2992 break;
2993 }
2994 break;
2995 }
2996
2997 ss->last_event = event;
2998 if (need_progress)
2999 sdma_make_progress(sde, 0);
3000 }
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015 static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3016 {
3017 int i;
3018 struct sdma_desc *descp;
3019
3020
3021 if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
3022
3023 if (!tx->tlen) {
3024 tx->desc_limit = MAX_DESC;
3025 } else if (!tx->coalesce_buf) {
3026
3027 tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32),
3028 GFP_ATOMIC);
3029 if (!tx->coalesce_buf)
3030 goto enomem;
3031 tx->coalesce_idx = 0;
3032 }
3033 return 0;
3034 }
3035
3036 if (unlikely(tx->num_desc == MAX_DESC))
3037 goto enomem;
3038
3039 descp = kmalloc_array(MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC);
3040 if (!descp)
3041 goto enomem;
3042 tx->descp = descp;
3043
3044
3045 tx->desc_limit = MAX_DESC - 1;
3046
3047 for (i = 0; i < tx->num_desc; i++)
3048 tx->descp[i] = tx->descs[i];
3049 return 0;
3050 enomem:
3051 __sdma_txclean(dd, tx);
3052 return -ENOMEM;
3053 }
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071 int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
3072 int type, void *kvaddr, struct page *page,
3073 unsigned long offset, u16 len)
3074 {
3075 int pad_len, rval;
3076 dma_addr_t addr;
3077
3078 rval = _extend_sdma_tx_descs(dd, tx);
3079 if (rval) {
3080 __sdma_txclean(dd, tx);
3081 return rval;
3082 }
3083
3084
3085 if (tx->coalesce_buf) {
3086 if (type == SDMA_MAP_NONE) {
3087 __sdma_txclean(dd, tx);
3088 return -EINVAL;
3089 }
3090
3091 if (type == SDMA_MAP_PAGE) {
3092 kvaddr = kmap_local_page(page);
3093 kvaddr += offset;
3094 } else if (WARN_ON(!kvaddr)) {
3095 __sdma_txclean(dd, tx);
3096 return -EINVAL;
3097 }
3098
3099 memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
3100 tx->coalesce_idx += len;
3101 if (type == SDMA_MAP_PAGE)
3102 kunmap_local(kvaddr);
3103
3104
3105 if (tx->tlen - tx->coalesce_idx)
3106 return 0;
3107
3108
3109 pad_len = tx->packet_len & (sizeof(u32) - 1);
3110 if (pad_len) {
3111 pad_len = sizeof(u32) - pad_len;
3112 memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len);
3113
3114 tx->packet_len += pad_len;
3115 tx->tlen += pad_len;
3116 }
3117
3118
3119 addr = dma_map_single(&dd->pcidev->dev,
3120 tx->coalesce_buf,
3121 tx->tlen,
3122 DMA_TO_DEVICE);
3123
3124 if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
3125 __sdma_txclean(dd, tx);
3126 return -ENOSPC;
3127 }
3128
3129
3130 tx->desc_limit = MAX_DESC;
3131 return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
3132 addr, tx->tlen);
3133 }
3134
3135 return 1;
3136 }
3137
3138
3139 void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid)
3140 {
3141 struct sdma_engine *sde;
3142 int i;
3143 u64 sreg;
3144
3145 sreg = ((mask & SD(CHECK_SLID_MASK_MASK)) <<
3146 SD(CHECK_SLID_MASK_SHIFT)) |
3147 (((lid & mask) & SD(CHECK_SLID_VALUE_MASK)) <<
3148 SD(CHECK_SLID_VALUE_SHIFT));
3149
3150 for (i = 0; i < dd->num_sdma; i++) {
3151 hfi1_cdbg(LINKVERB, "SendDmaEngine[%d].SLID_CHECK = 0x%x",
3152 i, (u32)sreg);
3153 sde = &dd->per_sdma[i];
3154 write_sde_csr(sde, SD(CHECK_SLID), sreg);
3155 }
3156 }
3157
3158
3159 int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
3160 {
3161 int rval = 0;
3162
3163 tx->num_desc++;
3164 if ((unlikely(tx->num_desc == tx->desc_limit))) {
3165 rval = _extend_sdma_tx_descs(dd, tx);
3166 if (rval) {
3167 __sdma_txclean(dd, tx);
3168 return rval;
3169 }
3170 }
3171
3172 make_tx_sdma_desc(
3173 tx,
3174 SDMA_MAP_NONE,
3175 dd->sdma_pad_phys,
3176 sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
3177 _sdma_close_tx(dd, tx);
3178 return rval;
3179 }
3180
3181
3182
3183
3184
3185
3186
3187
3188 void _sdma_txreq_ahgadd(
3189 struct sdma_txreq *tx,
3190 u8 num_ahg,
3191 u8 ahg_entry,
3192 u32 *ahg,
3193 u8 ahg_hlen)
3194 {
3195 u32 i, shift = 0, desc = 0;
3196 u8 mode;
3197
3198 WARN_ON_ONCE(num_ahg > 9 || (ahg_hlen & 3) || ahg_hlen == 4);
3199
3200 if (num_ahg == 1)
3201 mode = SDMA_AHG_APPLY_UPDATE1;
3202 else if (num_ahg <= 5)
3203 mode = SDMA_AHG_APPLY_UPDATE2;
3204 else
3205 mode = SDMA_AHG_APPLY_UPDATE3;
3206 tx->num_desc++;
3207
3208 switch (mode) {
3209 case SDMA_AHG_APPLY_UPDATE3:
3210 tx->num_desc++;
3211 tx->descs[2].qw[0] = 0;
3212 tx->descs[2].qw[1] = 0;
3213 fallthrough;
3214 case SDMA_AHG_APPLY_UPDATE2:
3215 tx->num_desc++;
3216 tx->descs[1].qw[0] = 0;
3217 tx->descs[1].qw[1] = 0;
3218 break;
3219 }
3220 ahg_hlen >>= 2;
3221 tx->descs[0].qw[1] |=
3222 (((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
3223 << SDMA_DESC1_HEADER_INDEX_SHIFT) |
3224 (((u64)ahg_hlen & SDMA_DESC1_HEADER_DWS_MASK)
3225 << SDMA_DESC1_HEADER_DWS_SHIFT) |
3226 (((u64)mode & SDMA_DESC1_HEADER_MODE_MASK)
3227 << SDMA_DESC1_HEADER_MODE_SHIFT) |
3228 (((u64)ahg[0] & SDMA_DESC1_HEADER_UPDATE1_MASK)
3229 << SDMA_DESC1_HEADER_UPDATE1_SHIFT);
3230 for (i = 0; i < (num_ahg - 1); i++) {
3231 if (!shift && !(i & 2))
3232 desc++;
3233 tx->descs[desc].qw[!!(i & 2)] |=
3234 (((u64)ahg[i + 1])
3235 << shift);
3236 shift = (shift + 32) & 63;
3237 }
3238 }
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248 int sdma_ahg_alloc(struct sdma_engine *sde)
3249 {
3250 int nr;
3251 int oldbit;
3252
3253 if (!sde) {
3254 trace_hfi1_ahg_allocate(sde, -EINVAL);
3255 return -EINVAL;
3256 }
3257 while (1) {
3258 nr = ffz(READ_ONCE(sde->ahg_bits));
3259 if (nr > 31) {
3260 trace_hfi1_ahg_allocate(sde, -ENOSPC);
3261 return -ENOSPC;
3262 }
3263 oldbit = test_and_set_bit(nr, &sde->ahg_bits);
3264 if (!oldbit)
3265 break;
3266 cpu_relax();
3267 }
3268 trace_hfi1_ahg_allocate(sde, nr);
3269 return nr;
3270 }
3271
3272
3273
3274
3275
3276
3277
3278
3279 void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
3280 {
3281 if (!sde)
3282 return;
3283 trace_hfi1_ahg_deallocate(sde, ahg_index);
3284 if (ahg_index < 0 || ahg_index > 31)
3285 return;
3286 clear_bit(ahg_index, &sde->ahg_bits);
3287 }
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297 void sdma_freeze_notify(struct hfi1_devdata *dd, int link_down)
3298 {
3299 int i;
3300 enum sdma_events event = link_down ? sdma_event_e85_link_down :
3301 sdma_event_e80_hw_freeze;
3302
3303
3304 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3305
3306
3307 for (i = 0; i < dd->num_sdma; i++)
3308 sdma_process_event(&dd->per_sdma[i], event);
3309
3310
3311 }
3312
3313
3314
3315
3316
3317 void sdma_freeze(struct hfi1_devdata *dd)
3318 {
3319 int i;
3320 int ret;
3321
3322
3323
3324
3325
3326 ret = wait_event_interruptible(dd->sdma_unfreeze_wq,
3327 atomic_read(&dd->sdma_unfreeze_count) <=
3328 0);
3329
3330 if (ret || atomic_read(&dd->sdma_unfreeze_count) < 0)
3331 return;
3332
3333
3334 atomic_set(&dd->sdma_unfreeze_count, dd->num_sdma);
3335
3336
3337 for (i = 0; i < dd->num_sdma; i++)
3338 sdma_process_event(&dd->per_sdma[i], sdma_event_e81_hw_frozen);
3339
3340
3341
3342
3343
3344
3345 (void)wait_event_interruptible(dd->sdma_unfreeze_wq,
3346 atomic_read(&dd->sdma_unfreeze_count) <= 0);
3347
3348 }
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358 void sdma_unfreeze(struct hfi1_devdata *dd)
3359 {
3360 int i;
3361
3362
3363 for (i = 0; i < dd->num_sdma; i++)
3364 sdma_process_event(&dd->per_sdma[i],
3365 sdma_event_e82_hw_unfreeze);
3366 }
3367
3368
3369
3370
3371
3372
3373 void _sdma_engine_progress_schedule(
3374 struct sdma_engine *sde)
3375 {
3376 trace_hfi1_sdma_engine_progress(sde, sde->progress_mask);
3377
3378 write_csr(sde->dd,
3379 CCE_INT_FORCE + (8 * (IS_SDMA_START / 64)),
3380 sde->progress_mask);
3381 }