0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070 #include <linux/init.h>
0071 #include <linux/kernel.h>
0072 #include <linux/module.h>
0073 #include <linux/sched.h>
0074 #include <linux/wait.h>
0075 #include <linux/dma-mapping.h>
0076 #include <linux/dmaengine.h>
0077 #include <linux/pci.h>
0078 #include <linux/ktime.h>
0079 #include <linux/slab.h>
0080 #include <linux/delay.h>
0081 #include <linux/sizes.h>
0082 #include <linux/workqueue.h>
0083 #include <linux/debugfs.h>
0084 #include <linux/random.h>
0085 #include <linux/ntb.h>
0086
0087 #define DRIVER_NAME "ntb_perf"
0088 #define DRIVER_VERSION "2.0"
0089
0090 MODULE_LICENSE("Dual BSD/GPL");
0091 MODULE_VERSION(DRIVER_VERSION);
0092 MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>");
0093 MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool");
0094
0095 #define MAX_THREADS_CNT 32
0096 #define DEF_THREADS_CNT 1
0097 #define MAX_CHUNK_SIZE SZ_1M
0098 #define MAX_CHUNK_ORDER 20
0099
0100 #define DMA_TRIES 100
0101 #define DMA_MDELAY 10
0102
0103 #define MSG_TRIES 1000
0104 #define MSG_UDELAY_LOW 1000000
0105 #define MSG_UDELAY_HIGH 2000000
0106
0107 #define PERF_BUF_LEN 1024
0108
0109 static unsigned long max_mw_size;
0110 module_param(max_mw_size, ulong, 0644);
0111 MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size");
0112
0113 static unsigned char chunk_order = 19;
0114 module_param(chunk_order, byte, 0644);
0115 MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer");
0116
0117 static unsigned char total_order = 30;
0118 module_param(total_order, byte, 0644);
0119 MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer");
0120
0121 static bool use_dma;
0122 module_param(use_dma, bool, 0644);
0123 MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance");
0124
0125
0126
0127
0128
0129
0130 enum perf_cmd {
0131 PERF_CMD_INVAL = -1,
0132 PERF_CMD_SSIZE = 0,
0133 PERF_CMD_RSIZE = 1,
0134 PERF_CMD_SXLAT = 2,
0135 PERF_CMD_RXLAT = 3,
0136 PERF_CMD_CLEAR = 4,
0137 PERF_STS_DONE = 5,
0138 PERF_STS_LNKUP = 6,
0139 };
0140
0141 struct perf_ctx;
0142
0143 struct perf_peer {
0144 struct perf_ctx *perf;
0145 int pidx;
0146 int gidx;
0147
0148
0149 u64 outbuf_xlat;
0150 resource_size_t outbuf_size;
0151 void __iomem *outbuf;
0152 phys_addr_t out_phys_addr;
0153 dma_addr_t dma_dst_addr;
0154
0155 dma_addr_t inbuf_xlat;
0156 resource_size_t inbuf_size;
0157 void *inbuf;
0158
0159
0160 struct work_struct service;
0161 unsigned long sts;
0162
0163 struct completion init_comp;
0164 };
0165 #define to_peer_service(__work) \
0166 container_of(__work, struct perf_peer, service)
0167
0168 struct perf_thread {
0169 struct perf_ctx *perf;
0170 int tidx;
0171
0172
0173 atomic_t dma_sync;
0174 wait_queue_head_t dma_wait;
0175 struct dma_chan *dma_chan;
0176
0177
0178 void *src;
0179 u64 copied;
0180 ktime_t duration;
0181 int status;
0182 struct work_struct work;
0183 };
0184 #define to_thread_work(__work) \
0185 container_of(__work, struct perf_thread, work)
0186
0187 struct perf_ctx {
0188 struct ntb_dev *ntb;
0189
0190
0191 int gidx;
0192 int pcnt;
0193 struct perf_peer *peers;
0194
0195
0196 unsigned long busy_flag;
0197 wait_queue_head_t twait;
0198 atomic_t tsync;
0199 u8 tcnt;
0200 struct perf_peer *test_peer;
0201 struct perf_thread threads[MAX_THREADS_CNT];
0202
0203
0204 int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data);
0205 int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd,
0206 u64 *data);
0207
0208 struct dentry *dbgfs_dir;
0209 };
0210
0211
0212
0213
0214 #define PERF_SPAD_CNT(_pcnt) \
0215 (3*((_pcnt) + 1))
0216 #define PERF_SPAD_CMD(_gidx) \
0217 (3*(_gidx))
0218 #define PERF_SPAD_LDATA(_gidx) \
0219 (3*(_gidx) + 1)
0220 #define PERF_SPAD_HDATA(_gidx) \
0221 (3*(_gidx) + 2)
0222 #define PERF_SPAD_NOTIFY(_gidx) \
0223 (BIT_ULL(_gidx))
0224
0225
0226
0227
0228 #define PERF_MSG_CNT 3
0229 #define PERF_MSG_CMD 0
0230 #define PERF_MSG_LDATA 1
0231 #define PERF_MSG_HDATA 2
0232
0233
0234
0235
0236
0237
0238 static struct dentry *perf_dbgfs_topdir;
0239
0240 static struct workqueue_struct *perf_wq __read_mostly;
0241
0242
0243
0244
0245
0246
0247 static void perf_terminate_test(struct perf_ctx *perf);
0248
0249 static inline bool perf_link_is_up(struct perf_peer *peer)
0250 {
0251 u64 link;
0252
0253 link = ntb_link_is_up(peer->perf->ntb, NULL, NULL);
0254 return !!(link & BIT_ULL_MASK(peer->pidx));
0255 }
0256
0257 static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
0258 u64 data)
0259 {
0260 struct perf_ctx *perf = peer->perf;
0261 int try;
0262 u32 sts;
0263
0264 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data);
0265
0266
0267
0268
0269
0270
0271
0272
0273 for (try = 0; try < MSG_TRIES; try++) {
0274 if (!perf_link_is_up(peer))
0275 return -ENOLINK;
0276
0277 sts = ntb_peer_spad_read(perf->ntb, peer->pidx,
0278 PERF_SPAD_CMD(perf->gidx));
0279 if (sts != PERF_CMD_INVAL) {
0280 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH);
0281 continue;
0282 }
0283
0284 ntb_peer_spad_write(perf->ntb, peer->pidx,
0285 PERF_SPAD_LDATA(perf->gidx),
0286 lower_32_bits(data));
0287 ntb_peer_spad_write(perf->ntb, peer->pidx,
0288 PERF_SPAD_HDATA(perf->gidx),
0289 upper_32_bits(data));
0290 ntb_peer_spad_write(perf->ntb, peer->pidx,
0291 PERF_SPAD_CMD(perf->gidx),
0292 cmd);
0293 ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx));
0294
0295 dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n",
0296 PERF_SPAD_NOTIFY(peer->gidx));
0297
0298 break;
0299 }
0300
0301 return try < MSG_TRIES ? 0 : -EAGAIN;
0302 }
0303
0304 static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx,
0305 enum perf_cmd *cmd, u64 *data)
0306 {
0307 struct perf_peer *peer;
0308 u32 val;
0309
0310 ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
0311
0312
0313
0314
0315
0316
0317
0318 for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) {
0319 peer = &perf->peers[*pidx];
0320
0321 if (!perf_link_is_up(peer))
0322 continue;
0323
0324 val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx));
0325 if (val == PERF_CMD_INVAL)
0326 continue;
0327
0328 *cmd = val;
0329
0330 val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx));
0331 *data = val;
0332
0333 val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx));
0334 *data |= (u64)val << 32;
0335
0336
0337 ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx),
0338 PERF_CMD_INVAL);
0339
0340 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data);
0341
0342 return 0;
0343 }
0344
0345 return -ENODATA;
0346 }
0347
0348 static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
0349 u64 data)
0350 {
0351 struct perf_ctx *perf = peer->perf;
0352 int try, ret;
0353 u64 outbits;
0354
0355 dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data);
0356
0357
0358
0359
0360
0361
0362
0363
0364 outbits = ntb_msg_outbits(perf->ntb);
0365 for (try = 0; try < MSG_TRIES; try++) {
0366 if (!perf_link_is_up(peer))
0367 return -ENOLINK;
0368
0369 ret = ntb_msg_clear_sts(perf->ntb, outbits);
0370 if (ret)
0371 return ret;
0372
0373 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA,
0374 lower_32_bits(data));
0375
0376 if (ntb_msg_read_sts(perf->ntb) & outbits) {
0377 usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH);
0378 continue;
0379 }
0380
0381 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA,
0382 upper_32_bits(data));
0383
0384
0385 ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd);
0386
0387 break;
0388 }
0389
0390 return try < MSG_TRIES ? 0 : -EAGAIN;
0391 }
0392
0393 static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx,
0394 enum perf_cmd *cmd, u64 *data)
0395 {
0396 u64 inbits;
0397 u32 val;
0398
0399 inbits = ntb_msg_inbits(perf->ntb);
0400
0401 if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3)
0402 return -ENODATA;
0403
0404 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD);
0405 *cmd = val;
0406
0407 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA);
0408 *data = val;
0409
0410 val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA);
0411 *data |= (u64)val << 32;
0412
0413
0414 ntb_msg_clear_sts(perf->ntb, inbits);
0415
0416 dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data);
0417
0418 return 0;
0419 }
0420
0421 static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data)
0422 {
0423 struct perf_ctx *perf = peer->perf;
0424
0425 if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT)
0426 return perf->cmd_send(peer, cmd, data);
0427
0428 dev_err(&perf->ntb->dev, "Send invalid command\n");
0429 return -EINVAL;
0430 }
0431
0432 static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd)
0433 {
0434 switch (cmd) {
0435 case PERF_CMD_SSIZE:
0436 case PERF_CMD_RSIZE:
0437 case PERF_CMD_SXLAT:
0438 case PERF_CMD_RXLAT:
0439 case PERF_CMD_CLEAR:
0440 break;
0441 default:
0442 dev_err(&peer->perf->ntb->dev, "Exec invalid command\n");
0443 return -EINVAL;
0444 }
0445
0446
0447 set_bit(cmd, &peer->sts);
0448
0449 dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd);
0450
0451 (void)queue_work(system_highpri_wq, &peer->service);
0452
0453 return 0;
0454 }
0455
0456 static int perf_cmd_recv(struct perf_ctx *perf)
0457 {
0458 struct perf_peer *peer;
0459 int ret, pidx, cmd;
0460 u64 data;
0461
0462 while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) {
0463 peer = &perf->peers[pidx];
0464
0465 switch (cmd) {
0466 case PERF_CMD_SSIZE:
0467 peer->inbuf_size = data;
0468 return perf_cmd_exec(peer, PERF_CMD_RSIZE);
0469 case PERF_CMD_SXLAT:
0470 peer->outbuf_xlat = data;
0471 return perf_cmd_exec(peer, PERF_CMD_RXLAT);
0472 default:
0473 dev_err(&perf->ntb->dev, "Recv invalid command\n");
0474 return -EINVAL;
0475 }
0476 }
0477
0478
0479 return ret == -ENODATA ? 0 : ret;
0480 }
0481
0482 static void perf_link_event(void *ctx)
0483 {
0484 struct perf_ctx *perf = ctx;
0485 struct perf_peer *peer;
0486 bool lnk_up;
0487 int pidx;
0488
0489 for (pidx = 0; pidx < perf->pcnt; pidx++) {
0490 peer = &perf->peers[pidx];
0491
0492 lnk_up = perf_link_is_up(peer);
0493
0494 if (lnk_up &&
0495 !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) {
0496 perf_cmd_exec(peer, PERF_CMD_SSIZE);
0497 } else if (!lnk_up &&
0498 test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) {
0499 perf_cmd_exec(peer, PERF_CMD_CLEAR);
0500 }
0501 }
0502 }
0503
0504 static void perf_db_event(void *ctx, int vec)
0505 {
0506 struct perf_ctx *perf = ctx;
0507
0508 dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec,
0509 ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb));
0510
0511
0512 (void)perf_cmd_recv(perf);
0513 }
0514
0515 static void perf_msg_event(void *ctx)
0516 {
0517 struct perf_ctx *perf = ctx;
0518
0519 dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n",
0520 ntb_msg_read_sts(perf->ntb));
0521
0522
0523 (void)perf_cmd_recv(perf);
0524 }
0525
0526 static const struct ntb_ctx_ops perf_ops = {
0527 .link_event = perf_link_event,
0528 .db_event = perf_db_event,
0529 .msg_event = perf_msg_event
0530 };
0531
0532 static void perf_free_outbuf(struct perf_peer *peer)
0533 {
0534 (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx);
0535 }
0536
0537 static int perf_setup_outbuf(struct perf_peer *peer)
0538 {
0539 struct perf_ctx *perf = peer->perf;
0540 int ret;
0541
0542
0543 ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx,
0544 peer->outbuf_xlat, peer->outbuf_size);
0545 if (ret) {
0546 dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n");
0547 return ret;
0548 }
0549
0550
0551 set_bit(PERF_STS_DONE, &peer->sts);
0552 complete_all(&peer->init_comp);
0553
0554 return 0;
0555 }
0556
0557 static void perf_free_inbuf(struct perf_peer *peer)
0558 {
0559 if (!peer->inbuf)
0560 return;
0561
0562 (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx);
0563 dma_free_coherent(&peer->perf->ntb->pdev->dev, peer->inbuf_size,
0564 peer->inbuf, peer->inbuf_xlat);
0565 peer->inbuf = NULL;
0566 }
0567
0568 static int perf_setup_inbuf(struct perf_peer *peer)
0569 {
0570 resource_size_t xlat_align, size_align, size_max;
0571 struct perf_ctx *perf = peer->perf;
0572 int ret;
0573
0574
0575 ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx,
0576 &xlat_align, &size_align, &size_max);
0577 if (ret) {
0578 dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n");
0579 return ret;
0580 }
0581
0582 if (peer->inbuf_size > size_max) {
0583 dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n",
0584 &peer->inbuf_size, &size_max);
0585 return -EINVAL;
0586 }
0587
0588 peer->inbuf_size = round_up(peer->inbuf_size, size_align);
0589
0590 perf_free_inbuf(peer);
0591
0592 peer->inbuf = dma_alloc_coherent(&perf->ntb->pdev->dev,
0593 peer->inbuf_size, &peer->inbuf_xlat,
0594 GFP_KERNEL);
0595 if (!peer->inbuf) {
0596 dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n",
0597 &peer->inbuf_size);
0598 return -ENOMEM;
0599 }
0600 if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) {
0601 ret = -EINVAL;
0602 dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n");
0603 goto err_free_inbuf;
0604 }
0605
0606 ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx,
0607 peer->inbuf_xlat, peer->inbuf_size);
0608 if (ret) {
0609 dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n");
0610 goto err_free_inbuf;
0611 }
0612
0613
0614
0615
0616
0617
0618 (void)perf_cmd_exec(peer, PERF_CMD_SXLAT);
0619
0620 return 0;
0621
0622 err_free_inbuf:
0623 perf_free_inbuf(peer);
0624
0625 return ret;
0626 }
0627
0628 static void perf_service_work(struct work_struct *work)
0629 {
0630 struct perf_peer *peer = to_peer_service(work);
0631
0632 if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts))
0633 perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size);
0634
0635 if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts))
0636 perf_setup_inbuf(peer);
0637
0638 if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts))
0639 perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat);
0640
0641 if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts))
0642 perf_setup_outbuf(peer);
0643
0644 if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) {
0645 init_completion(&peer->init_comp);
0646 clear_bit(PERF_STS_DONE, &peer->sts);
0647 if (test_bit(0, &peer->perf->busy_flag) &&
0648 peer == peer->perf->test_peer) {
0649 dev_warn(&peer->perf->ntb->dev,
0650 "Freeing while test on-fly\n");
0651 perf_terminate_test(peer->perf);
0652 }
0653 perf_free_outbuf(peer);
0654 perf_free_inbuf(peer);
0655 }
0656 }
0657
0658 static int perf_init_service(struct perf_ctx *perf)
0659 {
0660 u64 mask;
0661
0662 if (ntb_peer_mw_count(perf->ntb) < perf->pcnt) {
0663 dev_err(&perf->ntb->dev, "Not enough memory windows\n");
0664 return -EINVAL;
0665 }
0666
0667 if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) {
0668 perf->cmd_send = perf_msg_cmd_send;
0669 perf->cmd_recv = perf_msg_cmd_recv;
0670
0671 dev_dbg(&perf->ntb->dev, "Message service initialized\n");
0672
0673 return 0;
0674 }
0675
0676 dev_dbg(&perf->ntb->dev, "Message service unsupported\n");
0677
0678 mask = GENMASK_ULL(perf->pcnt, 0);
0679 if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) &&
0680 (ntb_db_valid_mask(perf->ntb) & mask) == mask) {
0681 perf->cmd_send = perf_spad_cmd_send;
0682 perf->cmd_recv = perf_spad_cmd_recv;
0683
0684 dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n");
0685
0686 return 0;
0687 }
0688
0689 dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n");
0690
0691 dev_err(&perf->ntb->dev, "Command services unsupported\n");
0692
0693 return -EINVAL;
0694 }
0695
0696 static int perf_enable_service(struct perf_ctx *perf)
0697 {
0698 u64 mask, incmd_bit;
0699 int ret, sidx, scnt;
0700
0701 mask = ntb_db_valid_mask(perf->ntb);
0702 (void)ntb_db_set_mask(perf->ntb, mask);
0703
0704 ret = ntb_set_ctx(perf->ntb, perf, &perf_ops);
0705 if (ret)
0706 return ret;
0707
0708 if (perf->cmd_send == perf_msg_cmd_send) {
0709 u64 inbits, outbits;
0710
0711 inbits = ntb_msg_inbits(perf->ntb);
0712 outbits = ntb_msg_outbits(perf->ntb);
0713 (void)ntb_msg_set_mask(perf->ntb, inbits | outbits);
0714
0715 incmd_bit = BIT_ULL(__ffs64(inbits));
0716 ret = ntb_msg_clear_mask(perf->ntb, incmd_bit);
0717
0718 dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit);
0719 } else {
0720 scnt = ntb_spad_count(perf->ntb);
0721 for (sidx = 0; sidx < scnt; sidx++)
0722 ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL);
0723 incmd_bit = PERF_SPAD_NOTIFY(perf->gidx);
0724 ret = ntb_db_clear_mask(perf->ntb, incmd_bit);
0725
0726 dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit);
0727 }
0728 if (ret) {
0729 ntb_clear_ctx(perf->ntb);
0730 return ret;
0731 }
0732
0733 ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
0734
0735 ntb_link_event(perf->ntb);
0736
0737 return 0;
0738 }
0739
0740 static void perf_disable_service(struct perf_ctx *perf)
0741 {
0742 int pidx;
0743
0744 if (perf->cmd_send == perf_msg_cmd_send) {
0745 u64 inbits;
0746
0747 inbits = ntb_msg_inbits(perf->ntb);
0748 (void)ntb_msg_set_mask(perf->ntb, inbits);
0749 } else {
0750 (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
0751 }
0752
0753 ntb_clear_ctx(perf->ntb);
0754
0755 for (pidx = 0; pidx < perf->pcnt; pidx++)
0756 perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR);
0757
0758 for (pidx = 0; pidx < perf->pcnt; pidx++)
0759 flush_work(&perf->peers[pidx].service);
0760
0761 for (pidx = 0; pidx < perf->pcnt; pidx++) {
0762 struct perf_peer *peer = &perf->peers[pidx];
0763
0764 ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0);
0765 }
0766
0767 ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
0768
0769 ntb_link_disable(perf->ntb);
0770 }
0771
0772
0773
0774
0775
0776
0777 static void perf_dma_copy_callback(void *data)
0778 {
0779 struct perf_thread *pthr = data;
0780
0781 atomic_dec(&pthr->dma_sync);
0782 wake_up(&pthr->dma_wait);
0783 }
0784
0785 static int perf_copy_chunk(struct perf_thread *pthr,
0786 void __iomem *dst, void *src, size_t len)
0787 {
0788 struct dma_async_tx_descriptor *tx;
0789 struct dmaengine_unmap_data *unmap;
0790 struct device *dma_dev;
0791 int try = 0, ret = 0;
0792 struct perf_peer *peer = pthr->perf->test_peer;
0793 void __iomem *vbase;
0794 void __iomem *dst_vaddr;
0795 dma_addr_t dst_dma_addr;
0796
0797 if (!use_dma) {
0798 memcpy_toio(dst, src, len);
0799 goto ret_check_tsync;
0800 }
0801
0802 dma_dev = pthr->dma_chan->device->dev;
0803
0804 if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src),
0805 offset_in_page(dst), len))
0806 return -EIO;
0807
0808 vbase = peer->outbuf;
0809 dst_vaddr = dst;
0810 dst_dma_addr = peer->dma_dst_addr + (dst_vaddr - vbase);
0811
0812 unmap = dmaengine_get_unmap_data(dma_dev, 1, GFP_NOWAIT);
0813 if (!unmap)
0814 return -ENOMEM;
0815
0816 unmap->len = len;
0817 unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src),
0818 offset_in_page(src), len, DMA_TO_DEVICE);
0819 if (dma_mapping_error(dma_dev, unmap->addr[0])) {
0820 ret = -EIO;
0821 goto err_free_resource;
0822 }
0823 unmap->to_cnt = 1;
0824
0825 do {
0826 tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, dst_dma_addr,
0827 unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
0828 if (!tx)
0829 msleep(DMA_MDELAY);
0830 } while (!tx && (try++ < DMA_TRIES));
0831
0832 if (!tx) {
0833 ret = -EIO;
0834 goto err_free_resource;
0835 }
0836
0837 tx->callback = perf_dma_copy_callback;
0838 tx->callback_param = pthr;
0839 dma_set_unmap(tx, unmap);
0840
0841 ret = dma_submit_error(dmaengine_submit(tx));
0842 if (ret) {
0843 dmaengine_unmap_put(unmap);
0844 goto err_free_resource;
0845 }
0846
0847 dmaengine_unmap_put(unmap);
0848
0849 atomic_inc(&pthr->dma_sync);
0850 dma_async_issue_pending(pthr->dma_chan);
0851
0852 ret_check_tsync:
0853 return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR;
0854
0855 err_free_resource:
0856 dmaengine_unmap_put(unmap);
0857
0858 return ret;
0859 }
0860
0861 static bool perf_dma_filter(struct dma_chan *chan, void *data)
0862 {
0863 struct perf_ctx *perf = data;
0864 int node;
0865
0866 node = dev_to_node(&perf->ntb->dev);
0867
0868 return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev);
0869 }
0870
0871 static int perf_init_test(struct perf_thread *pthr)
0872 {
0873 struct perf_ctx *perf = pthr->perf;
0874 dma_cap_mask_t dma_mask;
0875 struct perf_peer *peer = pthr->perf->test_peer;
0876
0877 pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL,
0878 dev_to_node(&perf->ntb->dev));
0879 if (!pthr->src)
0880 return -ENOMEM;
0881
0882 get_random_bytes(pthr->src, perf->test_peer->outbuf_size);
0883
0884 if (!use_dma)
0885 return 0;
0886
0887 dma_cap_zero(dma_mask);
0888 dma_cap_set(DMA_MEMCPY, dma_mask);
0889 pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf);
0890 if (!pthr->dma_chan) {
0891 dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n",
0892 pthr->tidx);
0893 goto err_free;
0894 }
0895 peer->dma_dst_addr =
0896 dma_map_resource(pthr->dma_chan->device->dev,
0897 peer->out_phys_addr, peer->outbuf_size,
0898 DMA_FROM_DEVICE, 0);
0899 if (dma_mapping_error(pthr->dma_chan->device->dev,
0900 peer->dma_dst_addr)) {
0901 dev_err(pthr->dma_chan->device->dev, "%d: Failed to map DMA addr\n",
0902 pthr->tidx);
0903 peer->dma_dst_addr = 0;
0904 dma_release_channel(pthr->dma_chan);
0905 goto err_free;
0906 }
0907 dev_dbg(pthr->dma_chan->device->dev, "%d: Map MMIO %pa to DMA addr %pad\n",
0908 pthr->tidx,
0909 &peer->out_phys_addr,
0910 &peer->dma_dst_addr);
0911
0912 atomic_set(&pthr->dma_sync, 0);
0913 return 0;
0914
0915 err_free:
0916 atomic_dec(&perf->tsync);
0917 wake_up(&perf->twait);
0918 kfree(pthr->src);
0919 return -ENODEV;
0920 }
0921
0922 static int perf_run_test(struct perf_thread *pthr)
0923 {
0924 struct perf_peer *peer = pthr->perf->test_peer;
0925 struct perf_ctx *perf = pthr->perf;
0926 void __iomem *flt_dst, *bnd_dst;
0927 u64 total_size, chunk_size;
0928 void *flt_src;
0929 int ret = 0;
0930
0931 total_size = 1ULL << total_order;
0932 chunk_size = 1ULL << chunk_order;
0933 chunk_size = min_t(u64, peer->outbuf_size, chunk_size);
0934
0935 flt_src = pthr->src;
0936 bnd_dst = peer->outbuf + peer->outbuf_size;
0937 flt_dst = peer->outbuf;
0938
0939 pthr->duration = ktime_get();
0940
0941
0942 while (pthr->copied < total_size) {
0943 ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size);
0944 if (ret) {
0945 dev_err(&perf->ntb->dev, "%d: Got error %d on test\n",
0946 pthr->tidx, ret);
0947 return ret;
0948 }
0949
0950 pthr->copied += chunk_size;
0951
0952 flt_dst += chunk_size;
0953 flt_src += chunk_size;
0954 if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) {
0955 flt_dst = peer->outbuf;
0956 flt_src = pthr->src;
0957 }
0958
0959
0960 schedule();
0961 }
0962
0963 return 0;
0964 }
0965
0966 static int perf_sync_test(struct perf_thread *pthr)
0967 {
0968 struct perf_ctx *perf = pthr->perf;
0969
0970 if (!use_dma)
0971 goto no_dma_ret;
0972
0973 wait_event(pthr->dma_wait,
0974 (atomic_read(&pthr->dma_sync) == 0 ||
0975 atomic_read(&perf->tsync) < 0));
0976
0977 if (atomic_read(&perf->tsync) < 0)
0978 return -EINTR;
0979
0980 no_dma_ret:
0981 pthr->duration = ktime_sub(ktime_get(), pthr->duration);
0982
0983 dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n",
0984 pthr->tidx, pthr->copied);
0985
0986 dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n",
0987 pthr->tidx, ktime_to_us(pthr->duration));
0988
0989 dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx,
0990 div64_u64(pthr->copied, ktime_to_us(pthr->duration)));
0991
0992 return 0;
0993 }
0994
0995 static void perf_clear_test(struct perf_thread *pthr)
0996 {
0997 struct perf_ctx *perf = pthr->perf;
0998
0999 if (!use_dma)
1000 goto no_dma_notify;
1001
1002
1003
1004
1005
1006 (void)dmaengine_terminate_sync(pthr->dma_chan);
1007 if (pthr->perf->test_peer->dma_dst_addr)
1008 dma_unmap_resource(pthr->dma_chan->device->dev,
1009 pthr->perf->test_peer->dma_dst_addr,
1010 pthr->perf->test_peer->outbuf_size,
1011 DMA_FROM_DEVICE, 0);
1012
1013 dma_release_channel(pthr->dma_chan);
1014
1015 no_dma_notify:
1016 atomic_dec(&perf->tsync);
1017 wake_up(&perf->twait);
1018 kfree(pthr->src);
1019 }
1020
1021 static void perf_thread_work(struct work_struct *work)
1022 {
1023 struct perf_thread *pthr = to_thread_work(work);
1024 int ret;
1025
1026
1027
1028
1029
1030
1031
1032
1033 ret = perf_init_test(pthr);
1034 if (ret) {
1035 pthr->status = ret;
1036 return;
1037 }
1038
1039 ret = perf_run_test(pthr);
1040 if (ret) {
1041 pthr->status = ret;
1042 goto err_clear_test;
1043 }
1044
1045 pthr->status = perf_sync_test(pthr);
1046
1047 err_clear_test:
1048 perf_clear_test(pthr);
1049 }
1050
1051 static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt)
1052 {
1053 if (tcnt == 0 || tcnt > MAX_THREADS_CNT)
1054 return -EINVAL;
1055
1056 if (test_and_set_bit_lock(0, &perf->busy_flag))
1057 return -EBUSY;
1058
1059 perf->tcnt = tcnt;
1060
1061 clear_bit_unlock(0, &perf->busy_flag);
1062
1063 return 0;
1064 }
1065
1066 static void perf_terminate_test(struct perf_ctx *perf)
1067 {
1068 int tidx;
1069
1070 atomic_set(&perf->tsync, -1);
1071 wake_up(&perf->twait);
1072
1073 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
1074 wake_up(&perf->threads[tidx].dma_wait);
1075 cancel_work_sync(&perf->threads[tidx].work);
1076 }
1077 }
1078
1079 static int perf_submit_test(struct perf_peer *peer)
1080 {
1081 struct perf_ctx *perf = peer->perf;
1082 struct perf_thread *pthr;
1083 int tidx, ret;
1084
1085 ret = wait_for_completion_interruptible(&peer->init_comp);
1086 if (ret < 0)
1087 return ret;
1088
1089 if (test_and_set_bit_lock(0, &perf->busy_flag))
1090 return -EBUSY;
1091
1092 perf->test_peer = peer;
1093 atomic_set(&perf->tsync, perf->tcnt);
1094
1095 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
1096 pthr = &perf->threads[tidx];
1097
1098 pthr->status = -ENODATA;
1099 pthr->copied = 0;
1100 pthr->duration = ktime_set(0, 0);
1101 if (tidx < perf->tcnt)
1102 (void)queue_work(perf_wq, &pthr->work);
1103 }
1104
1105 ret = wait_event_interruptible(perf->twait,
1106 atomic_read(&perf->tsync) <= 0);
1107 if (ret == -ERESTARTSYS) {
1108 perf_terminate_test(perf);
1109 ret = -EINTR;
1110 }
1111
1112 clear_bit_unlock(0, &perf->busy_flag);
1113
1114 return ret;
1115 }
1116
1117 static int perf_read_stats(struct perf_ctx *perf, char *buf,
1118 size_t size, ssize_t *pos)
1119 {
1120 struct perf_thread *pthr;
1121 int tidx;
1122
1123 if (test_and_set_bit_lock(0, &perf->busy_flag))
1124 return -EBUSY;
1125
1126 (*pos) += scnprintf(buf + *pos, size - *pos,
1127 " Peer %d test statistics:\n", perf->test_peer->pidx);
1128
1129 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
1130 pthr = &perf->threads[tidx];
1131
1132 if (pthr->status == -ENODATA)
1133 continue;
1134
1135 if (pthr->status) {
1136 (*pos) += scnprintf(buf + *pos, size - *pos,
1137 "%d: error status %d\n", tidx, pthr->status);
1138 continue;
1139 }
1140
1141 (*pos) += scnprintf(buf + *pos, size - *pos,
1142 "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
1143 tidx, pthr->copied, ktime_to_us(pthr->duration),
1144 div64_u64(pthr->copied, ktime_to_us(pthr->duration)));
1145 }
1146
1147 clear_bit_unlock(0, &perf->busy_flag);
1148
1149 return 0;
1150 }
1151
1152 static void perf_init_threads(struct perf_ctx *perf)
1153 {
1154 struct perf_thread *pthr;
1155 int tidx;
1156
1157 perf->tcnt = DEF_THREADS_CNT;
1158 perf->test_peer = &perf->peers[0];
1159 init_waitqueue_head(&perf->twait);
1160
1161 for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
1162 pthr = &perf->threads[tidx];
1163
1164 pthr->perf = perf;
1165 pthr->tidx = tidx;
1166 pthr->status = -ENODATA;
1167 init_waitqueue_head(&pthr->dma_wait);
1168 INIT_WORK(&pthr->work, perf_thread_work);
1169 }
1170 }
1171
1172 static void perf_clear_threads(struct perf_ctx *perf)
1173 {
1174 perf_terminate_test(perf);
1175 }
1176
1177
1178
1179
1180
1181
1182 static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf,
1183 size_t size, loff_t *offp)
1184 {
1185 struct perf_ctx *perf = filep->private_data;
1186 struct perf_peer *peer;
1187 size_t buf_size;
1188 ssize_t pos = 0;
1189 int ret, pidx;
1190 char *buf;
1191
1192 buf_size = min_t(size_t, size, 0x1000U);
1193
1194 buf = kmalloc(buf_size, GFP_KERNEL);
1195 if (!buf)
1196 return -ENOMEM;
1197
1198 pos += scnprintf(buf + pos, buf_size - pos,
1199 " Performance measuring tool info:\n\n");
1200
1201 pos += scnprintf(buf + pos, buf_size - pos,
1202 "Local port %d, Global index %d\n", ntb_port_number(perf->ntb),
1203 perf->gidx);
1204 pos += scnprintf(buf + pos, buf_size - pos, "Test status: ");
1205 if (test_bit(0, &perf->busy_flag)) {
1206 pos += scnprintf(buf + pos, buf_size - pos,
1207 "on-fly with port %d (%d)\n",
1208 ntb_peer_port_number(perf->ntb, perf->test_peer->pidx),
1209 perf->test_peer->pidx);
1210 } else {
1211 pos += scnprintf(buf + pos, buf_size - pos, "idle\n");
1212 }
1213
1214 for (pidx = 0; pidx < perf->pcnt; pidx++) {
1215 peer = &perf->peers[pidx];
1216
1217 pos += scnprintf(buf + pos, buf_size - pos,
1218 "Port %d (%d), Global index %d:\n",
1219 ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx,
1220 peer->gidx);
1221
1222 pos += scnprintf(buf + pos, buf_size - pos,
1223 "\tLink status: %s\n",
1224 test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down");
1225
1226 pos += scnprintf(buf + pos, buf_size - pos,
1227 "\tOut buffer addr 0x%pK\n", peer->outbuf);
1228
1229 pos += scnprintf(buf + pos, buf_size - pos,
1230 "\tOut buff phys addr %pa[p]\n", &peer->out_phys_addr);
1231
1232 pos += scnprintf(buf + pos, buf_size - pos,
1233 "\tOut buffer size %pa\n", &peer->outbuf_size);
1234
1235 pos += scnprintf(buf + pos, buf_size - pos,
1236 "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat);
1237
1238 if (!peer->inbuf) {
1239 pos += scnprintf(buf + pos, buf_size - pos,
1240 "\tIn buffer addr: unallocated\n");
1241 continue;
1242 }
1243
1244 pos += scnprintf(buf + pos, buf_size - pos,
1245 "\tIn buffer addr 0x%pK\n", peer->inbuf);
1246
1247 pos += scnprintf(buf + pos, buf_size - pos,
1248 "\tIn buffer size %pa\n", &peer->inbuf_size);
1249
1250 pos += scnprintf(buf + pos, buf_size - pos,
1251 "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat);
1252 }
1253
1254 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos);
1255 kfree(buf);
1256
1257 return ret;
1258 }
1259
1260 static const struct file_operations perf_dbgfs_info = {
1261 .open = simple_open,
1262 .read = perf_dbgfs_read_info
1263 };
1264
1265 static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf,
1266 size_t size, loff_t *offp)
1267 {
1268 struct perf_ctx *perf = filep->private_data;
1269 ssize_t ret, pos = 0;
1270 char *buf;
1271
1272 buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL);
1273 if (!buf)
1274 return -ENOMEM;
1275
1276 ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos);
1277 if (ret)
1278 goto err_free;
1279
1280 ret = simple_read_from_buffer(ubuf, size, offp, buf, pos);
1281 err_free:
1282 kfree(buf);
1283
1284 return ret;
1285 }
1286
1287 static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf,
1288 size_t size, loff_t *offp)
1289 {
1290 struct perf_ctx *perf = filep->private_data;
1291 struct perf_peer *peer;
1292 int pidx, ret;
1293
1294 ret = kstrtoint_from_user(ubuf, size, 0, &pidx);
1295 if (ret)
1296 return ret;
1297
1298 if (pidx < 0 || pidx >= perf->pcnt)
1299 return -EINVAL;
1300
1301 peer = &perf->peers[pidx];
1302
1303 ret = perf_submit_test(peer);
1304 if (ret)
1305 return ret;
1306
1307 return size;
1308 }
1309
1310 static const struct file_operations perf_dbgfs_run = {
1311 .open = simple_open,
1312 .read = perf_dbgfs_read_run,
1313 .write = perf_dbgfs_write_run
1314 };
1315
1316 static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf,
1317 size_t size, loff_t *offp)
1318 {
1319 struct perf_ctx *perf = filep->private_data;
1320 char buf[8];
1321 ssize_t pos;
1322
1323 pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt);
1324
1325 return simple_read_from_buffer(ubuf, size, offp, buf, pos);
1326 }
1327
1328 static ssize_t perf_dbgfs_write_tcnt(struct file *filep,
1329 const char __user *ubuf,
1330 size_t size, loff_t *offp)
1331 {
1332 struct perf_ctx *perf = filep->private_data;
1333 int ret;
1334 u8 val;
1335
1336 ret = kstrtou8_from_user(ubuf, size, 0, &val);
1337 if (ret)
1338 return ret;
1339
1340 ret = perf_set_tcnt(perf, val);
1341 if (ret)
1342 return ret;
1343
1344 return size;
1345 }
1346
1347 static const struct file_operations perf_dbgfs_tcnt = {
1348 .open = simple_open,
1349 .read = perf_dbgfs_read_tcnt,
1350 .write = perf_dbgfs_write_tcnt
1351 };
1352
1353 static void perf_setup_dbgfs(struct perf_ctx *perf)
1354 {
1355 struct pci_dev *pdev = perf->ntb->pdev;
1356
1357 perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir);
1358 if (!perf->dbgfs_dir) {
1359 dev_warn(&perf->ntb->dev, "DebugFS unsupported\n");
1360 return;
1361 }
1362
1363 debugfs_create_file("info", 0600, perf->dbgfs_dir, perf,
1364 &perf_dbgfs_info);
1365
1366 debugfs_create_file("run", 0600, perf->dbgfs_dir, perf,
1367 &perf_dbgfs_run);
1368
1369 debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf,
1370 &perf_dbgfs_tcnt);
1371
1372
1373 debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order);
1374
1375 debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order);
1376
1377 debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma);
1378 }
1379
1380 static void perf_clear_dbgfs(struct perf_ctx *perf)
1381 {
1382 debugfs_remove_recursive(perf->dbgfs_dir);
1383 }
1384
1385
1386
1387
1388
1389
1390 static struct perf_ctx *perf_create_data(struct ntb_dev *ntb)
1391 {
1392 struct perf_ctx *perf;
1393
1394 perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL);
1395 if (!perf)
1396 return ERR_PTR(-ENOMEM);
1397
1398 perf->pcnt = ntb_peer_port_count(ntb);
1399 perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers),
1400 GFP_KERNEL);
1401 if (!perf->peers)
1402 return ERR_PTR(-ENOMEM);
1403
1404 perf->ntb = ntb;
1405
1406 return perf;
1407 }
1408
1409 static int perf_setup_peer_mw(struct perf_peer *peer)
1410 {
1411 struct perf_ctx *perf = peer->perf;
1412 phys_addr_t phys_addr;
1413 int ret;
1414
1415
1416 ret = ntb_peer_mw_get_addr(perf->ntb, perf->gidx, &phys_addr,
1417 &peer->outbuf_size);
1418 if (ret)
1419 return ret;
1420
1421 peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr,
1422 peer->outbuf_size);
1423 if (!peer->outbuf)
1424 return -ENOMEM;
1425
1426 peer->out_phys_addr = phys_addr;
1427
1428 if (max_mw_size && peer->outbuf_size > max_mw_size) {
1429 peer->outbuf_size = max_mw_size;
1430 dev_warn(&peer->perf->ntb->dev,
1431 "Peer %d outbuf reduced to %pa\n", peer->pidx,
1432 &peer->outbuf_size);
1433 }
1434
1435 return 0;
1436 }
1437
1438 static int perf_init_peers(struct perf_ctx *perf)
1439 {
1440 struct perf_peer *peer;
1441 int pidx, lport, ret;
1442
1443 lport = ntb_port_number(perf->ntb);
1444 perf->gidx = -1;
1445 for (pidx = 0; pidx < perf->pcnt; pidx++) {
1446 peer = &perf->peers[pidx];
1447
1448 peer->perf = perf;
1449 peer->pidx = pidx;
1450 if (lport < ntb_peer_port_number(perf->ntb, pidx)) {
1451 if (perf->gidx == -1)
1452 perf->gidx = pidx;
1453 peer->gidx = pidx + 1;
1454 } else {
1455 peer->gidx = pidx;
1456 }
1457 INIT_WORK(&peer->service, perf_service_work);
1458 init_completion(&peer->init_comp);
1459 }
1460 if (perf->gidx == -1)
1461 perf->gidx = pidx;
1462
1463
1464
1465
1466
1467 if (perf->pcnt == 1 && ntb_port_number(perf->ntb) == 0 &&
1468 ntb_peer_port_number(perf->ntb, 0) == 0) {
1469 perf->gidx = 0;
1470 perf->peers[0].gidx = 0;
1471 }
1472
1473 for (pidx = 0; pidx < perf->pcnt; pidx++) {
1474 ret = perf_setup_peer_mw(&perf->peers[pidx]);
1475 if (ret)
1476 return ret;
1477 }
1478
1479 dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx);
1480
1481 return 0;
1482 }
1483
1484 static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
1485 {
1486 struct perf_ctx *perf;
1487 int ret;
1488
1489 perf = perf_create_data(ntb);
1490 if (IS_ERR(perf))
1491 return PTR_ERR(perf);
1492
1493 ret = perf_init_peers(perf);
1494 if (ret)
1495 return ret;
1496
1497 perf_init_threads(perf);
1498
1499 ret = perf_init_service(perf);
1500 if (ret)
1501 return ret;
1502
1503 ret = perf_enable_service(perf);
1504 if (ret)
1505 return ret;
1506
1507 perf_setup_dbgfs(perf);
1508
1509 return 0;
1510 }
1511
1512 static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
1513 {
1514 struct perf_ctx *perf = ntb->ctx;
1515
1516 perf_clear_dbgfs(perf);
1517
1518 perf_disable_service(perf);
1519
1520 perf_clear_threads(perf);
1521 }
1522
1523 static struct ntb_client perf_client = {
1524 .ops = {
1525 .probe = perf_probe,
1526 .remove = perf_remove
1527 }
1528 };
1529
1530 static int __init perf_init(void)
1531 {
1532 int ret;
1533
1534 if (chunk_order > MAX_CHUNK_ORDER) {
1535 chunk_order = MAX_CHUNK_ORDER;
1536 pr_info("Chunk order reduced to %hhu\n", chunk_order);
1537 }
1538
1539 if (total_order < chunk_order) {
1540 total_order = chunk_order;
1541 pr_info("Total data order reduced to %hhu\n", total_order);
1542 }
1543
1544 perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0);
1545 if (!perf_wq)
1546 return -ENOMEM;
1547
1548 if (debugfs_initialized())
1549 perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
1550
1551 ret = ntb_register_client(&perf_client);
1552 if (ret) {
1553 debugfs_remove_recursive(perf_dbgfs_topdir);
1554 destroy_workqueue(perf_wq);
1555 }
1556
1557 return ret;
1558 }
1559 module_init(perf_init);
1560
1561 static void __exit perf_exit(void)
1562 {
1563 ntb_unregister_client(&perf_client);
1564 debugfs_remove_recursive(perf_dbgfs_topdir);
1565 destroy_workqueue(perf_wq);
1566 }
1567 module_exit(perf_exit);