0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019 #include <linux/types.h>
0020 #include <linux/pci.h>
0021 #include <linux/err.h>
0022 #include <linux/aer.h>
0023 #include <linux/string.h>
0024 #include <linux/sched.h>
0025 #include <linux/wait.h>
0026 #include <linux/delay.h>
0027 #include <linux/dma-mapping.h>
0028 #include <linux/module.h>
0029 #include <linux/notifier.h>
0030 #include <linux/device.h>
0031 #include <linux/log2.h>
0032
0033 #include "card_base.h"
0034 #include "card_ddcb.h"
0035
0036 MODULE_AUTHOR("Frank Haverkamp <haver@linux.vnet.ibm.com>");
0037 MODULE_AUTHOR("Michael Ruettger <michael@ibmra.de>");
0038 MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
0039 MODULE_AUTHOR("Michael Jung <mijung@gmx.net>");
0040
0041 MODULE_DESCRIPTION("GenWQE Card");
0042 MODULE_VERSION(DRV_VERSION);
0043 MODULE_LICENSE("GPL");
0044
0045 static char genwqe_driver_name[] = GENWQE_DEVNAME;
0046 static struct class *class_genwqe;
0047 static struct dentry *debugfs_genwqe;
0048 static struct genwqe_dev *genwqe_devices[GENWQE_CARD_NO_MAX];
0049
0050
0051 static const struct pci_device_id genwqe_device_table[] = {
0052 { .vendor = PCI_VENDOR_ID_IBM,
0053 .device = PCI_DEVICE_GENWQE,
0054 .subvendor = PCI_SUBVENDOR_ID_IBM,
0055 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
0056 .class = (PCI_CLASSCODE_GENWQE5 << 8),
0057 .class_mask = ~0,
0058 .driver_data = 0 },
0059
0060
0061 { .vendor = PCI_VENDOR_ID_IBM,
0062 .device = PCI_DEVICE_GENWQE,
0063 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
0064 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
0065 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
0066 .class_mask = ~0,
0067 .driver_data = 0 },
0068
0069 { .vendor = PCI_VENDOR_ID_IBM,
0070 .device = 0x0000,
0071 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
0072 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_SRIOV,
0073 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
0074 .class_mask = ~0,
0075 .driver_data = 0 },
0076
0077
0078 { .vendor = PCI_VENDOR_ID_IBM,
0079 .device = PCI_DEVICE_GENWQE,
0080 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
0081 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
0082 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
0083 .class_mask = ~0,
0084 .driver_data = 0 },
0085
0086 { .vendor = PCI_VENDOR_ID_IBM,
0087 .device = 0x0000,
0088 .subvendor = PCI_SUBVENDOR_ID_IBM_SRIOV,
0089 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5,
0090 .class = (PCI_CLASSCODE_GENWQE5_SRIOV << 8),
0091 .class_mask = ~0,
0092 .driver_data = 0 },
0093
0094
0095 { .vendor = PCI_VENDOR_ID_IBM,
0096 .device = PCI_DEVICE_GENWQE,
0097 .subvendor = PCI_SUBVENDOR_ID_IBM,
0098 .subdevice = PCI_SUBSYSTEM_ID_GENWQE5_NEW,
0099 .class = (PCI_CLASSCODE_GENWQE5 << 8),
0100 .class_mask = ~0,
0101 .driver_data = 0 },
0102
0103 { 0, }
0104 };
0105
0106 MODULE_DEVICE_TABLE(pci, genwqe_device_table);
0107
0108
0109
0110
0111
0112
0113 static struct genwqe_dev *genwqe_dev_alloc(void)
0114 {
0115 unsigned int i = 0, j;
0116 struct genwqe_dev *cd;
0117
0118 for (i = 0; i < GENWQE_CARD_NO_MAX; i++) {
0119 if (genwqe_devices[i] == NULL)
0120 break;
0121 }
0122 if (i >= GENWQE_CARD_NO_MAX)
0123 return ERR_PTR(-ENODEV);
0124
0125 cd = kzalloc(sizeof(struct genwqe_dev), GFP_KERNEL);
0126 if (!cd)
0127 return ERR_PTR(-ENOMEM);
0128
0129 cd->card_idx = i;
0130 cd->class_genwqe = class_genwqe;
0131 cd->debugfs_genwqe = debugfs_genwqe;
0132
0133
0134
0135
0136
0137 cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY;
0138
0139 init_waitqueue_head(&cd->queue_waitq);
0140
0141 spin_lock_init(&cd->file_lock);
0142 INIT_LIST_HEAD(&cd->file_list);
0143
0144 cd->card_state = GENWQE_CARD_UNUSED;
0145 spin_lock_init(&cd->print_lock);
0146
0147 cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT;
0148 cd->kill_timeout = GENWQE_KILL_TIMEOUT;
0149
0150 for (j = 0; j < GENWQE_MAX_VFS; j++)
0151 cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC;
0152
0153 genwqe_devices[i] = cd;
0154 return cd;
0155 }
0156
0157 static void genwqe_dev_free(struct genwqe_dev *cd)
0158 {
0159 if (!cd)
0160 return;
0161
0162 genwqe_devices[cd->card_idx] = NULL;
0163 kfree(cd);
0164 }
0165
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175 static int genwqe_bus_reset(struct genwqe_dev *cd)
0176 {
0177 int rc = 0;
0178 struct pci_dev *pci_dev = cd->pci_dev;
0179 void __iomem *mmio;
0180
0181 if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE)
0182 return -EIO;
0183
0184 mmio = cd->mmio;
0185 cd->mmio = NULL;
0186 pci_iounmap(pci_dev, mmio);
0187
0188 pci_release_mem_regions(pci_dev);
0189
0190
0191
0192
0193
0194
0195 dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__);
0196 rc = pci_reset_function(pci_dev);
0197 if (rc) {
0198 dev_err(&pci_dev->dev,
0199 "[%s] err: failed reset func (rc %d)\n", __func__, rc);
0200 return rc;
0201 }
0202 dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc);
0203
0204
0205
0206
0207
0208 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
0209 GENWQE_INJECT_GFIR_FATAL |
0210 GENWQE_INJECT_GFIR_INFO);
0211
0212 rc = pci_request_mem_regions(pci_dev, genwqe_driver_name);
0213 if (rc) {
0214 dev_err(&pci_dev->dev,
0215 "[%s] err: request bars failed (%d)\n", __func__, rc);
0216 return -EIO;
0217 }
0218
0219 cd->mmio = pci_iomap(pci_dev, 0, 0);
0220 if (cd->mmio == NULL) {
0221 dev_err(&pci_dev->dev,
0222 "[%s] err: mapping BAR0 failed\n", __func__);
0223 return -ENOMEM;
0224 }
0225 return 0;
0226 }
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240 bool genwqe_need_err_masking(struct genwqe_dev *cd)
0241 {
0242 return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
0243 }
0244
0245 static void genwqe_tweak_hardware(struct genwqe_dev *cd)
0246 {
0247 struct pci_dev *pci_dev = cd->pci_dev;
0248
0249
0250 if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) &&
0251 ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) {
0252 dev_warn(&pci_dev->dev,
0253 "FIRs masked due to bitstream %016llx.%016llx\n",
0254 cd->slu_unitcfg, cd->app_unitcfg);
0255
0256 __genwqe_writeq(cd, IO_APP_SEC_LEM_DEBUG_OVR,
0257 0xFFFFFFFFFFFFFFFFull);
0258
0259 __genwqe_writeq(cd, IO_APP_ERR_ACT_MASK,
0260 0x0000000000000000ull);
0261 }
0262 }
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273 int genwqe_recovery_on_fatal_gfir_required(struct genwqe_dev *cd)
0274 {
0275 return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull;
0276 }
0277
0278 int genwqe_flash_readback_fails(struct genwqe_dev *cd)
0279 {
0280 return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull;
0281 }
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295 static int genwqe_T_psec(struct genwqe_dev *cd)
0296 {
0297 u16 speed;
0298 static const int T[] = { 4000, 5000, 6000, 5714 };
0299
0300 speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
0301 if (speed >= ARRAY_SIZE(T))
0302 return -1;
0303
0304 return T[speed];
0305 }
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317 static bool genwqe_setup_pf_jtimer(struct genwqe_dev *cd)
0318 {
0319 u32 T = genwqe_T_psec(cd);
0320 u64 x;
0321
0322 if (GENWQE_PF_JOBTIMEOUT_MSEC == 0)
0323 return false;
0324
0325
0326 x = ilog2(GENWQE_PF_JOBTIMEOUT_MSEC *
0327 16000000000uL/(T * 15)) - 10;
0328
0329 genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
0330 0xff00 | (x & 0xff), 0);
0331 return true;
0332 }
0333
0334
0335
0336
0337
0338 static bool genwqe_setup_vf_jtimer(struct genwqe_dev *cd)
0339 {
0340 struct pci_dev *pci_dev = cd->pci_dev;
0341 unsigned int vf;
0342 u32 T = genwqe_T_psec(cd);
0343 u64 x;
0344 int totalvfs;
0345
0346 totalvfs = pci_sriov_get_totalvfs(pci_dev);
0347 if (totalvfs <= 0)
0348 return false;
0349
0350 for (vf = 0; vf < totalvfs; vf++) {
0351
0352 if (cd->vf_jobtimeout_msec[vf] == 0)
0353 continue;
0354
0355 x = ilog2(cd->vf_jobtimeout_msec[vf] *
0356 16000000000uL/(T * 15)) - 10;
0357
0358 genwqe_write_vreg(cd, IO_SLC_VF_APPJOB_TIMEOUT,
0359 0xff00 | (x & 0xff), vf + 1);
0360 }
0361 return true;
0362 }
0363
0364 static int genwqe_ffdc_buffs_alloc(struct genwqe_dev *cd)
0365 {
0366 unsigned int type, e = 0;
0367
0368 for (type = 0; type < GENWQE_DBG_UNITS; type++) {
0369 switch (type) {
0370 case GENWQE_DBG_UNIT0:
0371 e = genwqe_ffdc_buff_size(cd, 0);
0372 break;
0373 case GENWQE_DBG_UNIT1:
0374 e = genwqe_ffdc_buff_size(cd, 1);
0375 break;
0376 case GENWQE_DBG_UNIT2:
0377 e = genwqe_ffdc_buff_size(cd, 2);
0378 break;
0379 case GENWQE_DBG_REGS:
0380 e = GENWQE_FFDC_REGS;
0381 break;
0382 }
0383
0384
0385 cd->ffdc[type].entries = e;
0386 cd->ffdc[type].regs =
0387 kmalloc_array(e, sizeof(struct genwqe_reg),
0388 GFP_KERNEL);
0389
0390
0391
0392
0393 }
0394 return 0;
0395 }
0396
0397 static void genwqe_ffdc_buffs_free(struct genwqe_dev *cd)
0398 {
0399 unsigned int type;
0400
0401 for (type = 0; type < GENWQE_DBG_UNITS; type++) {
0402 kfree(cd->ffdc[type].regs);
0403 cd->ffdc[type].regs = NULL;
0404 }
0405 }
0406
0407 static int genwqe_read_ids(struct genwqe_dev *cd)
0408 {
0409 int err = 0;
0410 int slu_id;
0411 struct pci_dev *pci_dev = cd->pci_dev;
0412
0413 cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
0414 if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) {
0415 dev_err(&pci_dev->dev,
0416 "err: SLUID=%016llx\n", cd->slu_unitcfg);
0417 err = -EIO;
0418 goto out_err;
0419 }
0420
0421 slu_id = genwqe_get_slu_id(cd);
0422 if (slu_id < GENWQE_SLU_ARCH_REQ || slu_id == 0xff) {
0423 dev_err(&pci_dev->dev,
0424 "err: incompatible SLU Architecture %u\n", slu_id);
0425 err = -ENOENT;
0426 goto out_err;
0427 }
0428
0429 cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
0430 if (cd->app_unitcfg == IO_ILLEGAL_VALUE) {
0431 dev_err(&pci_dev->dev,
0432 "err: APPID=%016llx\n", cd->app_unitcfg);
0433 err = -EIO;
0434 goto out_err;
0435 }
0436 genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name));
0437
0438
0439
0440
0441
0442
0443
0444
0445 if (pci_dev->is_virtfn)
0446 cd->is_privileged = 0;
0447 else
0448 cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM)
0449 != IO_ILLEGAL_VALUE);
0450
0451 out_err:
0452 return err;
0453 }
0454
0455 static int genwqe_start(struct genwqe_dev *cd)
0456 {
0457 int err;
0458 struct pci_dev *pci_dev = cd->pci_dev;
0459
0460 err = genwqe_read_ids(cd);
0461 if (err)
0462 return err;
0463
0464 if (genwqe_is_privileged(cd)) {
0465
0466 genwqe_ffdc_buffs_alloc(cd);
0467 genwqe_stop_traps(cd);
0468
0469
0470 genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs,
0471 cd->ffdc[GENWQE_DBG_REGS].entries, 0);
0472
0473 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT0,
0474 cd->ffdc[GENWQE_DBG_UNIT0].regs,
0475 cd->ffdc[GENWQE_DBG_UNIT0].entries);
0476
0477 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT1,
0478 cd->ffdc[GENWQE_DBG_UNIT1].regs,
0479 cd->ffdc[GENWQE_DBG_UNIT1].entries);
0480
0481 genwqe_ffdc_buff_read(cd, GENWQE_DBG_UNIT2,
0482 cd->ffdc[GENWQE_DBG_UNIT2].regs,
0483 cd->ffdc[GENWQE_DBG_UNIT2].entries);
0484
0485 genwqe_start_traps(cd);
0486
0487 if (cd->card_state == GENWQE_CARD_FATAL_ERROR) {
0488 dev_warn(&pci_dev->dev,
0489 "[%s] chip reload/recovery!\n", __func__);
0490
0491
0492
0493
0494
0495 cd->softreset = 0x7Cull;
0496 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
0497 cd->softreset);
0498
0499 err = genwqe_bus_reset(cd);
0500 if (err != 0) {
0501 dev_err(&pci_dev->dev,
0502 "[%s] err: bus reset failed!\n",
0503 __func__);
0504 goto out;
0505 }
0506
0507
0508
0509
0510
0511
0512 err = genwqe_read_ids(cd);
0513 if (err)
0514 goto out;
0515 }
0516 }
0517
0518 err = genwqe_setup_service_layer(cd);
0519 if (err != 0) {
0520 dev_err(&pci_dev->dev,
0521 "[%s] err: could not setup servicelayer!\n", __func__);
0522 err = -ENODEV;
0523 goto out;
0524 }
0525
0526 if (genwqe_is_privileged(cd)) {
0527 genwqe_tweak_hardware(cd);
0528
0529 genwqe_setup_pf_jtimer(cd);
0530 genwqe_setup_vf_jtimer(cd);
0531 }
0532
0533 err = genwqe_device_create(cd);
0534 if (err < 0) {
0535 dev_err(&pci_dev->dev,
0536 "err: chdev init failed! (err=%d)\n", err);
0537 goto out_release_service_layer;
0538 }
0539 return 0;
0540
0541 out_release_service_layer:
0542 genwqe_release_service_layer(cd);
0543 out:
0544 if (genwqe_is_privileged(cd))
0545 genwqe_ffdc_buffs_free(cd);
0546 return -EIO;
0547 }
0548
0549
0550
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561
0562 static int genwqe_stop(struct genwqe_dev *cd)
0563 {
0564 genwqe_finish_queue(cd);
0565 genwqe_device_remove(cd);
0566 genwqe_release_service_layer(cd);
0567
0568 if (genwqe_is_privileged(cd)) {
0569 pci_disable_sriov(cd->pci_dev);
0570 genwqe_ffdc_buffs_free(cd);
0571 }
0572
0573 return 0;
0574 }
0575
0576
0577
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588 static int genwqe_recover_card(struct genwqe_dev *cd, int fatal_err)
0589 {
0590 int rc;
0591 struct pci_dev *pci_dev = cd->pci_dev;
0592
0593 genwqe_stop(cd);
0594
0595
0596
0597
0598
0599 if (!fatal_err) {
0600 cd->softreset = 0x70ull;
0601 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset);
0602 }
0603
0604 rc = genwqe_bus_reset(cd);
0605 if (rc != 0) {
0606 dev_err(&pci_dev->dev,
0607 "[%s] err: card recovery impossible!\n", __func__);
0608 return rc;
0609 }
0610
0611 rc = genwqe_start(cd);
0612 if (rc < 0) {
0613 dev_err(&pci_dev->dev,
0614 "[%s] err: failed to launch device!\n", __func__);
0615 return rc;
0616 }
0617 return 0;
0618 }
0619
0620 static int genwqe_health_check_cond(struct genwqe_dev *cd, u64 *gfir)
0621 {
0622 *gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
0623 return (*gfir & GFIR_ERR_TRIGGER) &&
0624 genwqe_recovery_on_fatal_gfir_required(cd);
0625 }
0626
0627
0628
0629
0630
0631
0632
0633
0634
0635
0636
0637 static u64 genwqe_fir_checking(struct genwqe_dev *cd)
0638 {
0639 int j, iterations = 0;
0640 u64 mask, fir, fec, uid, gfir, gfir_masked, sfir, sfec;
0641 u32 fir_addr, fir_clr_addr, fec_addr, sfir_addr, sfec_addr;
0642 struct pci_dev *pci_dev = cd->pci_dev;
0643
0644 healthMonitor:
0645 iterations++;
0646 if (iterations > 16) {
0647 dev_err(&pci_dev->dev, "* exit looping after %d times\n",
0648 iterations);
0649 goto fatal_error;
0650 }
0651
0652 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
0653 if (gfir != 0x0)
0654 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n",
0655 IO_SLC_CFGREG_GFIR, gfir);
0656 if (gfir == IO_ILLEGAL_VALUE)
0657 goto fatal_error;
0658
0659
0660
0661
0662
0663
0664
0665 if (gfir == 0)
0666 return 0;
0667
0668 gfir_masked = gfir & GFIR_ERR_TRIGGER;
0669
0670 for (uid = 0; uid < GENWQE_MAX_UNITS; uid++) {
0671
0672
0673 fir_addr = (uid << 24) + 0x08;
0674 fir = __genwqe_readq(cd, fir_addr);
0675 if (fir == 0x0)
0676 continue;
0677
0678 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir);
0679 if (fir == IO_ILLEGAL_VALUE)
0680 goto fatal_error;
0681
0682
0683 fec_addr = (uid << 24) + 0x18;
0684 fec = __genwqe_readq(cd, fec_addr);
0685
0686 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec);
0687 if (fec == IO_ILLEGAL_VALUE)
0688 goto fatal_error;
0689
0690 for (j = 0, mask = 1ULL; j < 64; j++, mask <<= 1) {
0691
0692
0693 if ((fir & mask) == 0x0)
0694 continue;
0695
0696 sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
0697 sfir = __genwqe_readq(cd, sfir_addr);
0698
0699 if (sfir == IO_ILLEGAL_VALUE)
0700 goto fatal_error;
0701 dev_err(&pci_dev->dev,
0702 "* 0x%08x 0x%016llx\n", sfir_addr, sfir);
0703
0704 sfec_addr = (uid << 24) + 0x300 + 0x08 * j;
0705 sfec = __genwqe_readq(cd, sfec_addr);
0706
0707 if (sfec == IO_ILLEGAL_VALUE)
0708 goto fatal_error;
0709 dev_err(&pci_dev->dev,
0710 "* 0x%08x 0x%016llx\n", sfec_addr, sfec);
0711
0712 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
0713 if (gfir == IO_ILLEGAL_VALUE)
0714 goto fatal_error;
0715
0716
0717
0718 if ((gfir_masked == 0x0) &&
0719 (gfir & GFIR_ERR_TRIGGER)) {
0720 goto healthMonitor;
0721 }
0722
0723
0724 if (gfir_masked == 0x0) {
0725
0726
0727 sfir_addr = (uid << 24) + 0x100 + 0x08 * j;
0728 __genwqe_writeq(cd, sfir_addr, sfir);
0729
0730 dev_dbg(&pci_dev->dev,
0731 "[HM] Clearing 2ndary FIR 0x%08x with 0x%016llx\n",
0732 sfir_addr, sfir);
0733
0734
0735
0736
0737
0738
0739
0740
0741
0742
0743
0744 fir_clr_addr = (uid << 24) + 0x10;
0745 __genwqe_writeq(cd, fir_clr_addr, mask);
0746
0747 dev_dbg(&pci_dev->dev,
0748 "[HM] Clearing primary FIR 0x%08x with 0x%016llx\n",
0749 fir_clr_addr, mask);
0750 }
0751 }
0752 }
0753 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
0754 if (gfir == IO_ILLEGAL_VALUE)
0755 goto fatal_error;
0756
0757 if ((gfir_masked == 0x0) && (gfir & GFIR_ERR_TRIGGER)) {
0758
0759
0760
0761
0762 dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n",
0763 iterations);
0764 goto healthMonitor;
0765 }
0766 return gfir_masked;
0767
0768 fatal_error:
0769 return IO_ILLEGAL_VALUE;
0770 }
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781 static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev)
0782 {
0783 int rc;
0784
0785
0786
0787
0788
0789 pci_cfg_access_lock(pci_dev);
0790 pci_save_state(pci_dev);
0791 rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset);
0792 if (!rc) {
0793
0794 msleep(250);
0795 pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset);
0796
0797 msleep(2000);
0798 }
0799 pci_restore_state(pci_dev);
0800 pci_cfg_access_unlock(pci_dev);
0801 return rc;
0802 }
0803
0804
0805 static int genwqe_platform_recovery(struct genwqe_dev *cd)
0806 {
0807 struct pci_dev *pci_dev = cd->pci_dev;
0808 int rc;
0809
0810 dev_info(&pci_dev->dev,
0811 "[%s] resetting card for error recovery\n", __func__);
0812
0813
0814 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
0815 GENWQE_INJECT_GFIR_FATAL |
0816 GENWQE_INJECT_GFIR_INFO);
0817
0818 genwqe_stop(cd);
0819
0820
0821 rc = genwqe_pci_fundamental_reset(pci_dev);
0822 if (!rc) {
0823 rc = genwqe_start(cd);
0824 if (!rc)
0825 dev_info(&pci_dev->dev,
0826 "[%s] card recovered\n", __func__);
0827 else
0828 dev_err(&pci_dev->dev,
0829 "[%s] err: cannot start card services! (err=%d)\n",
0830 __func__, rc);
0831 } else {
0832 dev_err(&pci_dev->dev,
0833 "[%s] card reset failed\n", __func__);
0834 }
0835
0836 return rc;
0837 }
0838
0839
0840
0841
0842
0843
0844
0845
0846
0847
0848 static int genwqe_reload_bistream(struct genwqe_dev *cd)
0849 {
0850 struct pci_dev *pci_dev = cd->pci_dev;
0851 int rc;
0852
0853 dev_info(&pci_dev->dev,
0854 "[%s] resetting card for bitstream reload\n",
0855 __func__);
0856
0857 genwqe_stop(cd);
0858
0859
0860
0861
0862
0863 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
0864 (cd->softreset & 0xcull) | 0x70ull);
0865
0866 rc = genwqe_pci_fundamental_reset(pci_dev);
0867 if (rc) {
0868
0869
0870
0871
0872
0873
0874 dev_err(&pci_dev->dev,
0875 "[%s] err: failed to reset card for bitstream reload\n",
0876 __func__);
0877 }
0878
0879 rc = genwqe_start(cd);
0880 if (rc) {
0881 dev_err(&pci_dev->dev,
0882 "[%s] err: cannot start card services! (err=%d)\n",
0883 __func__, rc);
0884 return rc;
0885 }
0886 dev_info(&pci_dev->dev,
0887 "[%s] card reloaded\n", __func__);
0888 return 0;
0889 }
0890
0891
0892
0893
0894
0895
0896
0897
0898
0899
0900
0901
0902
0903
0904
0905
0906
0907
0908
0909
0910
0911
0912 static int genwqe_health_thread(void *data)
0913 {
0914 int rc, should_stop = 0;
0915 struct genwqe_dev *cd = data;
0916 struct pci_dev *pci_dev = cd->pci_dev;
0917 u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
0918
0919 health_thread_begin:
0920 while (!kthread_should_stop()) {
0921 rc = wait_event_interruptible_timeout(cd->health_waitq,
0922 (genwqe_health_check_cond(cd, &gfir) ||
0923 (should_stop = kthread_should_stop())),
0924 GENWQE_HEALTH_CHECK_INTERVAL * HZ);
0925
0926 if (should_stop)
0927 break;
0928
0929 if (gfir == IO_ILLEGAL_VALUE) {
0930 dev_err(&pci_dev->dev,
0931 "[%s] GFIR=%016llx\n", __func__, gfir);
0932 goto fatal_error;
0933 }
0934
0935 slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG);
0936 if (slu_unitcfg == IO_ILLEGAL_VALUE) {
0937 dev_err(&pci_dev->dev,
0938 "[%s] SLU_UNITCFG=%016llx\n",
0939 __func__, slu_unitcfg);
0940 goto fatal_error;
0941 }
0942
0943 app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG);
0944 if (app_unitcfg == IO_ILLEGAL_VALUE) {
0945 dev_err(&pci_dev->dev,
0946 "[%s] APP_UNITCFG=%016llx\n",
0947 __func__, app_unitcfg);
0948 goto fatal_error;
0949 }
0950
0951 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
0952 if (gfir == IO_ILLEGAL_VALUE) {
0953 dev_err(&pci_dev->dev,
0954 "[%s] %s: GFIR=%016llx\n", __func__,
0955 (gfir & GFIR_ERR_TRIGGER) ? "err" : "info",
0956 gfir);
0957 goto fatal_error;
0958 }
0959
0960 gfir_masked = genwqe_fir_checking(cd);
0961 if (gfir_masked == IO_ILLEGAL_VALUE)
0962 goto fatal_error;
0963
0964
0965
0966
0967
0968 if ((gfir_masked) && !cd->skip_recovery &&
0969 genwqe_recovery_on_fatal_gfir_required(cd)) {
0970
0971 cd->card_state = GENWQE_CARD_FATAL_ERROR;
0972
0973 rc = genwqe_recover_card(cd, 0);
0974 if (rc < 0) {
0975
0976 goto fatal_error;
0977 }
0978 }
0979
0980 if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) {
0981
0982 rc = genwqe_reload_bistream(cd);
0983 if (rc)
0984 goto fatal_error;
0985 }
0986
0987 cd->last_gfir = gfir;
0988 cond_resched();
0989 }
0990
0991 return 0;
0992
0993 fatal_error:
0994 if (cd->use_platform_recovery) {
0995
0996
0997
0998
0999
1000 readq(cd->mmio + IO_SLC_CFGREG_GFIR);
1001
1002
1003 if (pci_channel_offline(pci_dev))
1004 return -EIO;
1005
1006
1007
1008
1009
1010
1011 rc = genwqe_platform_recovery(cd);
1012 if (!rc)
1013 goto health_thread_begin;
1014 }
1015
1016 dev_err(&pci_dev->dev,
1017 "[%s] card unusable. Please trigger unbind!\n", __func__);
1018
1019
1020 cd->card_state = GENWQE_CARD_FATAL_ERROR;
1021 genwqe_stop(cd);
1022
1023
1024 while (!kthread_should_stop())
1025 cond_resched();
1026
1027 return -EIO;
1028 }
1029
1030 static int genwqe_health_check_start(struct genwqe_dev *cd)
1031 {
1032 int rc;
1033
1034 if (GENWQE_HEALTH_CHECK_INTERVAL <= 0)
1035 return 0;
1036
1037
1038
1039
1040 cd->health_thread = kthread_run(genwqe_health_thread, cd,
1041 GENWQE_DEVNAME "%d_health",
1042 cd->card_idx);
1043 if (IS_ERR(cd->health_thread)) {
1044 rc = PTR_ERR(cd->health_thread);
1045 cd->health_thread = NULL;
1046 return rc;
1047 }
1048 return 0;
1049 }
1050
1051 static int genwqe_health_thread_running(struct genwqe_dev *cd)
1052 {
1053 return cd->health_thread != NULL;
1054 }
1055
1056 static int genwqe_health_check_stop(struct genwqe_dev *cd)
1057 {
1058 if (!genwqe_health_thread_running(cd))
1059 return -EIO;
1060
1061 kthread_stop(cd->health_thread);
1062 cd->health_thread = NULL;
1063 return 0;
1064 }
1065
1066
1067
1068
1069
1070 static int genwqe_pci_setup(struct genwqe_dev *cd)
1071 {
1072 int err;
1073 struct pci_dev *pci_dev = cd->pci_dev;
1074
1075 err = pci_enable_device_mem(pci_dev);
1076 if (err) {
1077 dev_err(&pci_dev->dev,
1078 "err: failed to enable pci memory (err=%d)\n", err);
1079 goto err_out;
1080 }
1081
1082
1083 err = pci_request_mem_regions(pci_dev, genwqe_driver_name);
1084 if (err) {
1085 dev_err(&pci_dev->dev,
1086 "[%s] err: request bars failed (%d)\n", __func__, err);
1087 err = -EIO;
1088 goto err_disable_device;
1089 }
1090
1091
1092
1093 if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)) &&
1094 dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32))) {
1095 dev_err(&pci_dev->dev,
1096 "err: neither DMA32 nor DMA64 supported\n");
1097 err = -EIO;
1098 goto out_release_resources;
1099 }
1100
1101 pci_set_master(pci_dev);
1102 pci_enable_pcie_error_reporting(pci_dev);
1103
1104
1105 pci_dev->needs_freset = 1;
1106
1107
1108 cd->mmio_len = pci_resource_len(pci_dev, 0);
1109 cd->mmio = pci_iomap(pci_dev, 0, 0);
1110 if (cd->mmio == NULL) {
1111 dev_err(&pci_dev->dev,
1112 "[%s] err: mapping BAR0 failed\n", __func__);
1113 err = -ENOMEM;
1114 goto out_release_resources;
1115 }
1116
1117 cd->num_vfs = pci_sriov_get_totalvfs(pci_dev);
1118 if (cd->num_vfs < 0)
1119 cd->num_vfs = 0;
1120
1121 err = genwqe_read_ids(cd);
1122 if (err)
1123 goto out_iounmap;
1124
1125 return 0;
1126
1127 out_iounmap:
1128 pci_iounmap(pci_dev, cd->mmio);
1129 out_release_resources:
1130 pci_release_mem_regions(pci_dev);
1131 err_disable_device:
1132 pci_disable_device(pci_dev);
1133 err_out:
1134 return err;
1135 }
1136
1137
1138
1139
1140
1141 static void genwqe_pci_remove(struct genwqe_dev *cd)
1142 {
1143 struct pci_dev *pci_dev = cd->pci_dev;
1144
1145 if (cd->mmio)
1146 pci_iounmap(pci_dev, cd->mmio);
1147
1148 pci_release_mem_regions(pci_dev);
1149 pci_disable_device(pci_dev);
1150 }
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161 static int genwqe_probe(struct pci_dev *pci_dev,
1162 const struct pci_device_id *id)
1163 {
1164 int err;
1165 struct genwqe_dev *cd;
1166
1167 genwqe_init_crc32();
1168
1169 cd = genwqe_dev_alloc();
1170 if (IS_ERR(cd)) {
1171 dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n",
1172 (int)PTR_ERR(cd));
1173 return PTR_ERR(cd);
1174 }
1175
1176 dev_set_drvdata(&pci_dev->dev, cd);
1177 cd->pci_dev = pci_dev;
1178
1179 err = genwqe_pci_setup(cd);
1180 if (err < 0) {
1181 dev_err(&pci_dev->dev,
1182 "err: problems with PCI setup (err=%d)\n", err);
1183 goto out_free_dev;
1184 }
1185
1186 err = genwqe_start(cd);
1187 if (err < 0) {
1188 dev_err(&pci_dev->dev,
1189 "err: cannot start card services! (err=%d)\n", err);
1190 goto out_pci_remove;
1191 }
1192
1193 if (genwqe_is_privileged(cd)) {
1194 err = genwqe_health_check_start(cd);
1195 if (err < 0) {
1196 dev_err(&pci_dev->dev,
1197 "err: cannot start health checking! (err=%d)\n",
1198 err);
1199 goto out_stop_services;
1200 }
1201 }
1202 return 0;
1203
1204 out_stop_services:
1205 genwqe_stop(cd);
1206 out_pci_remove:
1207 genwqe_pci_remove(cd);
1208 out_free_dev:
1209 genwqe_dev_free(cd);
1210 return err;
1211 }
1212
1213
1214
1215
1216
1217
1218
1219 static void genwqe_remove(struct pci_dev *pci_dev)
1220 {
1221 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
1222
1223 genwqe_health_check_stop(cd);
1224
1225
1226
1227
1228
1229
1230 genwqe_stop(cd);
1231 genwqe_pci_remove(cd);
1232 genwqe_dev_free(cd);
1233 }
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243 static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
1244 pci_channel_state_t state)
1245 {
1246 struct genwqe_dev *cd;
1247
1248 dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
1249
1250 cd = dev_get_drvdata(&pci_dev->dev);
1251 if (cd == NULL)
1252 return PCI_ERS_RESULT_DISCONNECT;
1253
1254
1255 genwqe_health_check_stop(cd);
1256 genwqe_stop(cd);
1257
1258
1259
1260
1261
1262
1263 if (state == pci_channel_io_perm_failure) {
1264 return PCI_ERS_RESULT_DISCONNECT;
1265 } else {
1266 genwqe_pci_remove(cd);
1267 return PCI_ERS_RESULT_NEED_RESET;
1268 }
1269 }
1270
1271 static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev)
1272 {
1273 int rc;
1274 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
1275
1276 rc = genwqe_pci_setup(cd);
1277 if (!rc) {
1278 return PCI_ERS_RESULT_RECOVERED;
1279 } else {
1280 dev_err(&pci_dev->dev,
1281 "err: problems with PCI setup (err=%d)\n", rc);
1282 return PCI_ERS_RESULT_DISCONNECT;
1283 }
1284 }
1285
1286 static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
1287 {
1288 return PCI_ERS_RESULT_NONE;
1289 }
1290
1291 static void genwqe_err_resume(struct pci_dev *pci_dev)
1292 {
1293 int rc;
1294 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
1295
1296 rc = genwqe_start(cd);
1297 if (!rc) {
1298 rc = genwqe_health_check_start(cd);
1299 if (rc)
1300 dev_err(&pci_dev->dev,
1301 "err: cannot start health checking! (err=%d)\n",
1302 rc);
1303 } else {
1304 dev_err(&pci_dev->dev,
1305 "err: cannot start card services! (err=%d)\n", rc);
1306 }
1307 }
1308
1309 static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
1310 {
1311 int rc;
1312 struct genwqe_dev *cd = dev_get_drvdata(&dev->dev);
1313
1314 if (numvfs > 0) {
1315 genwqe_setup_vf_jtimer(cd);
1316 rc = pci_enable_sriov(dev, numvfs);
1317 if (rc < 0)
1318 return rc;
1319 return numvfs;
1320 }
1321 if (numvfs == 0) {
1322 pci_disable_sriov(dev);
1323 return 0;
1324 }
1325 return 0;
1326 }
1327
1328 static const struct pci_error_handlers genwqe_err_handler = {
1329 .error_detected = genwqe_err_error_detected,
1330 .mmio_enabled = genwqe_err_result_none,
1331 .slot_reset = genwqe_err_slot_reset,
1332 .resume = genwqe_err_resume,
1333 };
1334
1335 static struct pci_driver genwqe_driver = {
1336 .name = genwqe_driver_name,
1337 .id_table = genwqe_device_table,
1338 .probe = genwqe_probe,
1339 .remove = genwqe_remove,
1340 .sriov_configure = genwqe_sriov_configure,
1341 .err_handler = &genwqe_err_handler,
1342 };
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 static char *genwqe_devnode(struct device *dev, umode_t *mode)
1353 {
1354 if (mode)
1355 *mode = 0666;
1356 return NULL;
1357 }
1358
1359
1360
1361
1362 static int __init genwqe_init_module(void)
1363 {
1364 int rc;
1365
1366 class_genwqe = class_create(THIS_MODULE, GENWQE_DEVNAME);
1367 if (IS_ERR(class_genwqe)) {
1368 pr_err("[%s] create class failed\n", __func__);
1369 return -ENOMEM;
1370 }
1371
1372 class_genwqe->devnode = genwqe_devnode;
1373
1374 debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
1375
1376 rc = pci_register_driver(&genwqe_driver);
1377 if (rc != 0) {
1378 pr_err("[%s] pci_reg_driver (rc=%d)\n", __func__, rc);
1379 goto err_out0;
1380 }
1381
1382 return rc;
1383
1384 err_out0:
1385 debugfs_remove(debugfs_genwqe);
1386 class_destroy(class_genwqe);
1387 return rc;
1388 }
1389
1390
1391
1392
1393 static void __exit genwqe_exit_module(void)
1394 {
1395 pci_unregister_driver(&genwqe_driver);
1396 debugfs_remove(debugfs_genwqe);
1397 class_destroy(class_genwqe);
1398 }
1399
1400 module_init(genwqe_init_module);
1401 module_exit(genwqe_exit_module);