Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * IBM Accelerator Family 'GenWQE'
0004  *
0005  * (C) Copyright IBM Corp. 2013
0006  *
0007  * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
0008  * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
0009  * Author: Michael Jung <mijung@gmx.net>
0010  * Author: Michael Ruettger <michael@ibmra.de>
0011  */
0012 
0013 /*
0014  * Miscelanous functionality used in the other GenWQE driver parts.
0015  */
0016 
0017 #include <linux/kernel.h>
0018 #include <linux/sched.h>
0019 #include <linux/vmalloc.h>
0020 #include <linux/page-flags.h>
0021 #include <linux/scatterlist.h>
0022 #include <linux/hugetlb.h>
0023 #include <linux/iommu.h>
0024 #include <linux/pci.h>
0025 #include <linux/dma-mapping.h>
0026 #include <linux/ctype.h>
0027 #include <linux/module.h>
0028 #include <linux/platform_device.h>
0029 #include <linux/delay.h>
0030 #include <linux/pgtable.h>
0031 
0032 #include "genwqe_driver.h"
0033 #include "card_base.h"
0034 #include "card_ddcb.h"
0035 
0036 /**
0037  * __genwqe_writeq() - Write 64-bit register
0038  * @cd:         genwqe device descriptor
0039  * @byte_offs:  byte offset within BAR
0040  * @val:        64-bit value
0041  *
0042  * Return: 0 if success; < 0 if error
0043  */
0044 int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
0045 {
0046     struct pci_dev *pci_dev = cd->pci_dev;
0047 
0048     if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
0049         return -EIO;
0050 
0051     if (cd->mmio == NULL)
0052         return -EIO;
0053 
0054     if (pci_channel_offline(pci_dev))
0055         return -EIO;
0056 
0057     __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
0058     return 0;
0059 }
0060 
0061 /**
0062  * __genwqe_readq() - Read 64-bit register
0063  * @cd:         genwqe device descriptor
0064  * @byte_offs:  offset within BAR
0065  *
0066  * Return: value from register
0067  */
0068 u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
0069 {
0070     if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
0071         return 0xffffffffffffffffull;
0072 
0073     if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) &&
0074         (byte_offs == IO_SLC_CFGREG_GFIR))
0075         return 0x000000000000ffffull;
0076 
0077     if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) &&
0078         (byte_offs == IO_SLC_CFGREG_GFIR))
0079         return 0x00000000ffff0000ull;
0080 
0081     if (cd->mmio == NULL)
0082         return 0xffffffffffffffffull;
0083 
0084     return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs));
0085 }
0086 
0087 /**
0088  * __genwqe_writel() - Write 32-bit register
0089  * @cd:         genwqe device descriptor
0090  * @byte_offs:  byte offset within BAR
0091  * @val:        32-bit value
0092  *
0093  * Return: 0 if success; < 0 if error
0094  */
0095 int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
0096 {
0097     struct pci_dev *pci_dev = cd->pci_dev;
0098 
0099     if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
0100         return -EIO;
0101 
0102     if (cd->mmio == NULL)
0103         return -EIO;
0104 
0105     if (pci_channel_offline(pci_dev))
0106         return -EIO;
0107 
0108     __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
0109     return 0;
0110 }
0111 
0112 /**
0113  * __genwqe_readl() - Read 32-bit register
0114  * @cd:         genwqe device descriptor
0115  * @byte_offs:  offset within BAR
0116  *
0117  * Return: Value from register
0118  */
0119 u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs)
0120 {
0121     if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
0122         return 0xffffffff;
0123 
0124     if (cd->mmio == NULL)
0125         return 0xffffffff;
0126 
0127     return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs));
0128 }
0129 
0130 /**
0131  * genwqe_read_app_id() - Extract app_id
0132  * @cd:         genwqe device descriptor
0133  * @app_name:   carrier used to pass-back name
0134  * @len:        length of data for name
0135  *
0136  * app_unitcfg need to be filled with valid data first
0137  */
0138 int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len)
0139 {
0140     int i, j;
0141     u32 app_id = (u32)cd->app_unitcfg;
0142 
0143     memset(app_name, 0, len);
0144     for (i = 0, j = 0; j < min(len, 4); j++) {
0145         char ch = (char)((app_id >> (24 - j*8)) & 0xff);
0146 
0147         if (ch == ' ')
0148             continue;
0149         app_name[i++] = isprint(ch) ? ch : 'X';
0150     }
0151     return i;
0152 }
0153 
0154 /**
0155  * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations
0156  *
0157  * Existing kernel functions seem to use a different polynom,
0158  * therefore we could not use them here.
0159  *
0160  * Genwqe's Polynomial = 0x20044009
0161  */
0162 #define CRC32_POLYNOMIAL    0x20044009
0163 static u32 crc32_tab[256];  /* crc32 lookup table */
0164 
0165 void genwqe_init_crc32(void)
0166 {
0167     int i, j;
0168     u32 crc;
0169 
0170     for (i = 0;  i < 256;  i++) {
0171         crc = i << 24;
0172         for (j = 0;  j < 8;  j++) {
0173             if (crc & 0x80000000)
0174                 crc = (crc << 1) ^ CRC32_POLYNOMIAL;
0175             else
0176                 crc = (crc << 1);
0177         }
0178         crc32_tab[i] = crc;
0179     }
0180 }
0181 
0182 /**
0183  * genwqe_crc32() - Generate 32-bit crc as required for DDCBs
0184  * @buff:       pointer to data buffer
0185  * @len:        length of data for calculation
0186  * @init:       initial crc (0xffffffff at start)
0187  *
0188  * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009)
0189  *
0190  * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should
0191  * result in a crc32 of 0xf33cb7d3.
0192  *
0193  * The existing kernel crc functions did not cover this polynom yet.
0194  *
0195  * Return: crc32 checksum.
0196  */
0197 u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
0198 {
0199     int i;
0200     u32 crc;
0201 
0202     crc = init;
0203     while (len--) {
0204         i = ((crc >> 24) ^ *buff++) & 0xFF;
0205         crc = (crc << 8) ^ crc32_tab[i];
0206     }
0207     return crc;
0208 }
0209 
0210 void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
0211                    dma_addr_t *dma_handle)
0212 {
0213     if (get_order(size) >= MAX_ORDER)
0214         return NULL;
0215 
0216     return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
0217                   GFP_KERNEL);
0218 }
0219 
0220 void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
0221                  void *vaddr, dma_addr_t dma_handle)
0222 {
0223     if (vaddr == NULL)
0224         return;
0225 
0226     dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
0227 }
0228 
0229 static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
0230                   int num_pages)
0231 {
0232     int i;
0233     struct pci_dev *pci_dev = cd->pci_dev;
0234 
0235     for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) {
0236         dma_unmap_page(&pci_dev->dev, dma_list[i], PAGE_SIZE,
0237                    DMA_BIDIRECTIONAL);
0238         dma_list[i] = 0x0;
0239     }
0240 }
0241 
0242 static int genwqe_map_pages(struct genwqe_dev *cd,
0243                struct page **page_list, int num_pages,
0244                dma_addr_t *dma_list)
0245 {
0246     int i;
0247     struct pci_dev *pci_dev = cd->pci_dev;
0248 
0249     /* establish DMA mapping for requested pages */
0250     for (i = 0; i < num_pages; i++) {
0251         dma_addr_t daddr;
0252 
0253         dma_list[i] = 0x0;
0254         daddr = dma_map_page(&pci_dev->dev, page_list[i],
0255                      0,  /* map_offs */
0256                      PAGE_SIZE,
0257                      DMA_BIDIRECTIONAL);  /* FIXME rd/rw */
0258 
0259         if (dma_mapping_error(&pci_dev->dev, daddr)) {
0260             dev_err(&pci_dev->dev,
0261                 "[%s] err: no dma addr daddr=%016llx!\n",
0262                 __func__, (long long)daddr);
0263             goto err;
0264         }
0265 
0266         dma_list[i] = daddr;
0267     }
0268     return 0;
0269 
0270  err:
0271     genwqe_unmap_pages(cd, dma_list, num_pages);
0272     return -EIO;
0273 }
0274 
0275 static int genwqe_sgl_size(int num_pages)
0276 {
0277     int len, num_tlb = num_pages / 7;
0278 
0279     len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1);
0280     return roundup(len, PAGE_SIZE);
0281 }
0282 
0283 /*
0284  * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages
0285  *
0286  * Allocates memory for sgl and overlapping pages. Pages which might
0287  * overlap other user-space memory blocks are being cached for DMAs,
0288  * such that we do not run into syncronization issues. Data is copied
0289  * from user-space into the cached pages.
0290  */
0291 int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
0292               void __user *user_addr, size_t user_size, int write)
0293 {
0294     int ret = -ENOMEM;
0295     struct pci_dev *pci_dev = cd->pci_dev;
0296 
0297     sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
0298     sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size);
0299     sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE);
0300     sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE;
0301 
0302     dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n",
0303         __func__, user_addr, user_size, sgl->nr_pages,
0304         sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size);
0305 
0306     sgl->user_addr = user_addr;
0307     sgl->user_size = user_size;
0308     sgl->write = write;
0309     sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages);
0310 
0311     if (get_order(sgl->sgl_size) > MAX_ORDER) {
0312         dev_err(&pci_dev->dev,
0313             "[%s] err: too much memory requested!\n", __func__);
0314         return ret;
0315     }
0316 
0317     sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
0318                          &sgl->sgl_dma_addr);
0319     if (sgl->sgl == NULL) {
0320         dev_err(&pci_dev->dev,
0321             "[%s] err: no memory available!\n", __func__);
0322         return ret;
0323     }
0324 
0325     /* Only use buffering on incomplete pages */
0326     if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) {
0327         sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
0328                                &sgl->fpage_dma_addr);
0329         if (sgl->fpage == NULL)
0330             goto err_out;
0331 
0332         /* Sync with user memory */
0333         if (copy_from_user(sgl->fpage + sgl->fpage_offs,
0334                    user_addr, sgl->fpage_size)) {
0335             ret = -EFAULT;
0336             goto err_out;
0337         }
0338     }
0339     if (sgl->lpage_size != 0) {
0340         sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
0341                                &sgl->lpage_dma_addr);
0342         if (sgl->lpage == NULL)
0343             goto err_out1;
0344 
0345         /* Sync with user memory */
0346         if (copy_from_user(sgl->lpage, user_addr + user_size -
0347                    sgl->lpage_size, sgl->lpage_size)) {
0348             ret = -EFAULT;
0349             goto err_out2;
0350         }
0351     }
0352     return 0;
0353 
0354  err_out2:
0355     __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
0356                  sgl->lpage_dma_addr);
0357     sgl->lpage = NULL;
0358     sgl->lpage_dma_addr = 0;
0359  err_out1:
0360     __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
0361                  sgl->fpage_dma_addr);
0362     sgl->fpage = NULL;
0363     sgl->fpage_dma_addr = 0;
0364  err_out:
0365     __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
0366                  sgl->sgl_dma_addr);
0367     sgl->sgl = NULL;
0368     sgl->sgl_dma_addr = 0;
0369     sgl->sgl_size = 0;
0370 
0371     return ret;
0372 }
0373 
0374 int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
0375              dma_addr_t *dma_list)
0376 {
0377     int i = 0, j = 0, p;
0378     unsigned long dma_offs, map_offs;
0379     dma_addr_t prev_daddr = 0;
0380     struct sg_entry *s, *last_s = NULL;
0381     size_t size = sgl->user_size;
0382 
0383     dma_offs = 128;     /* next block if needed/dma_offset */
0384     map_offs = sgl->fpage_offs; /* offset in first page */
0385 
0386     s = &sgl->sgl[0];   /* first set of 8 entries */
0387     p = 0;          /* page */
0388     while (p < sgl->nr_pages) {
0389         dma_addr_t daddr;
0390         unsigned int size_to_map;
0391 
0392         /* always write the chaining entry, cleanup is done later */
0393         j = 0;
0394         s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs);
0395         s[j].len     = cpu_to_be32(128);
0396         s[j].flags   = cpu_to_be32(SG_CHAINED);
0397         j++;
0398 
0399         while (j < 8) {
0400             /* DMA mapping for requested page, offs, size */
0401             size_to_map = min(size, PAGE_SIZE - map_offs);
0402 
0403             if ((p == 0) && (sgl->fpage != NULL)) {
0404                 daddr = sgl->fpage_dma_addr + map_offs;
0405 
0406             } else if ((p == sgl->nr_pages - 1) &&
0407                    (sgl->lpage != NULL)) {
0408                 daddr = sgl->lpage_dma_addr;
0409             } else {
0410                 daddr = dma_list[p] + map_offs;
0411             }
0412 
0413             size -= size_to_map;
0414             map_offs = 0;
0415 
0416             if (prev_daddr == daddr) {
0417                 u32 prev_len = be32_to_cpu(last_s->len);
0418 
0419                 /* pr_info("daddr combining: "
0420                     "%016llx/%08x -> %016llx\n",
0421                     prev_daddr, prev_len, daddr); */
0422 
0423                 last_s->len = cpu_to_be32(prev_len +
0424                               size_to_map);
0425 
0426                 p++; /* process next page */
0427                 if (p == sgl->nr_pages)
0428                     goto fixup;  /* nothing to do */
0429 
0430                 prev_daddr = daddr + size_to_map;
0431                 continue;
0432             }
0433 
0434             /* start new entry */
0435             s[j].target_addr = cpu_to_be64(daddr);
0436             s[j].len     = cpu_to_be32(size_to_map);
0437             s[j].flags   = cpu_to_be32(SG_DATA);
0438             prev_daddr = daddr + size_to_map;
0439             last_s = &s[j];
0440             j++;
0441 
0442             p++;    /* process next page */
0443             if (p == sgl->nr_pages)
0444                 goto fixup;  /* nothing to do */
0445         }
0446         dma_offs += 128;
0447         s += 8;     /* continue 8 elements further */
0448     }
0449  fixup:
0450     if (j == 1) {       /* combining happened on last entry! */
0451         s -= 8;     /* full shift needed on previous sgl block */
0452         j =  7;     /* shift all elements */
0453     }
0454 
0455     for (i = 0; i < j; i++) /* move elements 1 up */
0456         s[i] = s[i + 1];
0457 
0458     s[i].target_addr = cpu_to_be64(0);
0459     s[i].len     = cpu_to_be32(0);
0460     s[i].flags   = cpu_to_be32(SG_END_LIST);
0461     return 0;
0462 }
0463 
0464 /**
0465  * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages
0466  * @cd:         genwqe device descriptor
0467  * @sgl:        scatter gather list describing user-space memory
0468  *
0469  * After the DMA transfer has been completed we free the memory for
0470  * the sgl and the cached pages. Data is being transferred from cached
0471  * pages into user-space buffers.
0472  */
0473 int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl)
0474 {
0475     int rc = 0;
0476     size_t offset;
0477     unsigned long res;
0478     struct pci_dev *pci_dev = cd->pci_dev;
0479 
0480     if (sgl->fpage) {
0481         if (sgl->write) {
0482             res = copy_to_user(sgl->user_addr,
0483                 sgl->fpage + sgl->fpage_offs, sgl->fpage_size);
0484             if (res) {
0485                 dev_err(&pci_dev->dev,
0486                     "[%s] err: copying fpage! (res=%lu)\n",
0487                     __func__, res);
0488                 rc = -EFAULT;
0489             }
0490         }
0491         __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
0492                      sgl->fpage_dma_addr);
0493         sgl->fpage = NULL;
0494         sgl->fpage_dma_addr = 0;
0495     }
0496     if (sgl->lpage) {
0497         if (sgl->write) {
0498             offset = sgl->user_size - sgl->lpage_size;
0499             res = copy_to_user(sgl->user_addr + offset, sgl->lpage,
0500                        sgl->lpage_size);
0501             if (res) {
0502                 dev_err(&pci_dev->dev,
0503                     "[%s] err: copying lpage! (res=%lu)\n",
0504                     __func__, res);
0505                 rc = -EFAULT;
0506             }
0507         }
0508         __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
0509                      sgl->lpage_dma_addr);
0510         sgl->lpage = NULL;
0511         sgl->lpage_dma_addr = 0;
0512     }
0513     __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
0514                  sgl->sgl_dma_addr);
0515 
0516     sgl->sgl = NULL;
0517     sgl->sgl_dma_addr = 0x0;
0518     sgl->sgl_size = 0;
0519     return rc;
0520 }
0521 
0522 /**
0523  * genwqe_user_vmap() - Map user-space memory to virtual kernel memory
0524  * @cd:         pointer to genwqe device
0525  * @m:          mapping params
0526  * @uaddr:      user virtual address
0527  * @size:       size of memory to be mapped
0528  *
0529  * We need to think about how we could speed this up. Of course it is
0530  * not a good idea to do this over and over again, like we are
0531  * currently doing it. Nevertheless, I am curious where on the path
0532  * the performance is spend. Most probably within the memory
0533  * allocation functions, but maybe also in the DMA mapping code.
0534  *
0535  * Restrictions: The maximum size of the possible mapping currently depends
0536  *               on the amount of memory we can get using kzalloc() for the
0537  *               page_list and pci_alloc_consistent for the sg_list.
0538  *               The sg_list is currently itself not scattered, which could
0539  *               be fixed with some effort. The page_list must be split into
0540  *               PAGE_SIZE chunks too. All that will make the complicated
0541  *               code more complicated.
0542  *
0543  * Return: 0 if success
0544  */
0545 int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
0546              unsigned long size)
0547 {
0548     int rc = -EINVAL;
0549     unsigned long data, offs;
0550     struct pci_dev *pci_dev = cd->pci_dev;
0551 
0552     if ((uaddr == NULL) || (size == 0)) {
0553         m->size = 0;    /* mark unused and not added */
0554         return -EINVAL;
0555     }
0556     m->u_vaddr = uaddr;
0557     m->size    = size;
0558 
0559     /* determine space needed for page_list. */
0560     data = (unsigned long)uaddr;
0561     offs = offset_in_page(data);
0562     if (size > ULONG_MAX - PAGE_SIZE - offs) {
0563         m->size = 0;    /* mark unused and not added */
0564         return -EINVAL;
0565     }
0566     m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE);
0567 
0568     m->page_list = kcalloc(m->nr_pages,
0569                    sizeof(struct page *) + sizeof(dma_addr_t),
0570                    GFP_KERNEL);
0571     if (!m->page_list) {
0572         dev_err(&pci_dev->dev, "err: alloc page_list failed\n");
0573         m->nr_pages = 0;
0574         m->u_vaddr = NULL;
0575         m->size = 0;    /* mark unused and not added */
0576         return -ENOMEM;
0577     }
0578     m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages);
0579 
0580     /* pin user pages in memory */
0581     rc = pin_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
0582                  m->nr_pages,
0583                  m->write ? FOLL_WRITE : 0, /* readable/writable */
0584                  m->page_list); /* ptrs to pages */
0585     if (rc < 0)
0586         goto fail_pin_user_pages;
0587 
0588     /* assumption: pin_user_pages can be killed by signals. */
0589     if (rc < m->nr_pages) {
0590         unpin_user_pages_dirty_lock(m->page_list, rc, m->write);
0591         rc = -EFAULT;
0592         goto fail_pin_user_pages;
0593     }
0594 
0595     rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list);
0596     if (rc != 0)
0597         goto fail_free_user_pages;
0598 
0599     return 0;
0600 
0601  fail_free_user_pages:
0602     unpin_user_pages_dirty_lock(m->page_list, m->nr_pages, m->write);
0603 
0604  fail_pin_user_pages:
0605     kfree(m->page_list);
0606     m->page_list = NULL;
0607     m->dma_list = NULL;
0608     m->nr_pages = 0;
0609     m->u_vaddr = NULL;
0610     m->size = 0;        /* mark unused and not added */
0611     return rc;
0612 }
0613 
0614 /**
0615  * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel
0616  *                        memory
0617  * @cd:         pointer to genwqe device
0618  * @m:          mapping params
0619  */
0620 int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m)
0621 {
0622     struct pci_dev *pci_dev = cd->pci_dev;
0623 
0624     if (!dma_mapping_used(m)) {
0625         dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n",
0626             __func__, m);
0627         return -EINVAL;
0628     }
0629 
0630     if (m->dma_list)
0631         genwqe_unmap_pages(cd, m->dma_list, m->nr_pages);
0632 
0633     if (m->page_list) {
0634         unpin_user_pages_dirty_lock(m->page_list, m->nr_pages,
0635                         m->write);
0636         kfree(m->page_list);
0637         m->page_list = NULL;
0638         m->dma_list = NULL;
0639         m->nr_pages = 0;
0640     }
0641 
0642     m->u_vaddr = NULL;
0643     m->size = 0;        /* mark as unused and not added */
0644     return 0;
0645 }
0646 
0647 /**
0648  * genwqe_card_type() - Get chip type SLU Configuration Register
0649  * @cd:         pointer to the genwqe device descriptor
0650  * Return: 0: Altera Stratix-IV 230
0651  *         1: Altera Stratix-IV 530
0652  *         2: Altera Stratix-V A4
0653  *         3: Altera Stratix-V A7
0654  */
0655 u8 genwqe_card_type(struct genwqe_dev *cd)
0656 {
0657     u64 card_type = cd->slu_unitcfg;
0658 
0659     return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20);
0660 }
0661 
0662 /**
0663  * genwqe_card_reset() - Reset the card
0664  * @cd:         pointer to the genwqe device descriptor
0665  */
0666 int genwqe_card_reset(struct genwqe_dev *cd)
0667 {
0668     u64 softrst;
0669     struct pci_dev *pci_dev = cd->pci_dev;
0670 
0671     if (!genwqe_is_privileged(cd))
0672         return -ENODEV;
0673 
0674     /* new SL */
0675     __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull);
0676     msleep(1000);
0677     __genwqe_readq(cd, IO_HSU_FIR_CLR);
0678     __genwqe_readq(cd, IO_APP_FIR_CLR);
0679     __genwqe_readq(cd, IO_SLU_FIR_CLR);
0680 
0681     /*
0682      * Read-modify-write to preserve the stealth bits
0683      *
0684      * For SL >= 039, Stealth WE bit allows removing
0685      * the read-modify-wrote.
0686      * r-m-w may require a mask 0x3C to avoid hitting hard
0687      * reset again for error reset (should be 0, chicken).
0688      */
0689     softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull;
0690     __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull);
0691 
0692     /* give ERRORRESET some time to finish */
0693     msleep(50);
0694 
0695     if (genwqe_need_err_masking(cd)) {
0696         dev_info(&pci_dev->dev,
0697              "[%s] masking errors for old bitstreams\n", __func__);
0698         __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
0699     }
0700     return 0;
0701 }
0702 
0703 int genwqe_read_softreset(struct genwqe_dev *cd)
0704 {
0705     u64 bitstream;
0706 
0707     if (!genwqe_is_privileged(cd))
0708         return -ENODEV;
0709 
0710     bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
0711     cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull;
0712     return 0;
0713 }
0714 
0715 /**
0716  * genwqe_set_interrupt_capability() - Configure MSI capability structure
0717  * @cd:         pointer to the device
0718  * @count:      number of vectors to allocate
0719  * Return: 0 if no error
0720  */
0721 int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count)
0722 {
0723     int rc;
0724 
0725     rc = pci_alloc_irq_vectors(cd->pci_dev, 1, count, PCI_IRQ_MSI);
0726     if (rc < 0)
0727         return rc;
0728     return 0;
0729 }
0730 
0731 /**
0732  * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability()
0733  * @cd:         pointer to the device
0734  */
0735 void genwqe_reset_interrupt_capability(struct genwqe_dev *cd)
0736 {
0737     pci_free_irq_vectors(cd->pci_dev);
0738 }
0739 
0740 /**
0741  * set_reg_idx() - Fill array with data. Ignore illegal offsets.
0742  * @cd:         card device
0743  * @r:          debug register array
0744  * @i:          index to desired entry
0745  * @m:          maximum possible entries
0746  * @addr:       addr which is read
0747  * @idx:        index in debug array
0748  * @val:        read value
0749  */
0750 static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r,
0751                unsigned int *i, unsigned int m, u32 addr, u32 idx,
0752                u64 val)
0753 {
0754     if (WARN_ON_ONCE(*i >= m))
0755         return -EFAULT;
0756 
0757     r[*i].addr = addr;
0758     r[*i].idx = idx;
0759     r[*i].val = val;
0760     ++*i;
0761     return 0;
0762 }
0763 
0764 static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r,
0765            unsigned int *i, unsigned int m, u32 addr, u64 val)
0766 {
0767     return set_reg_idx(cd, r, i, m, addr, 0, val);
0768 }
0769 
0770 int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
0771              unsigned int max_regs, int all)
0772 {
0773     unsigned int i, j, idx = 0;
0774     u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr;
0775     u64 gfir, sluid, appid, ufir, ufec, sfir, sfec;
0776 
0777     /* Global FIR */
0778     gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
0779     set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir);
0780 
0781     /* UnitCfg for SLU */
0782     sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */
0783     set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid);
0784 
0785     /* UnitCfg for APP */
0786     appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */
0787     set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid);
0788 
0789     /* Check all chip Units */
0790     for (i = 0; i < GENWQE_MAX_UNITS; i++) {
0791 
0792         /* Unit FIR */
0793         ufir_addr = (i << 24) | 0x008;
0794         ufir = __genwqe_readq(cd, ufir_addr);
0795         set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir);
0796 
0797         /* Unit FEC */
0798         ufec_addr = (i << 24) | 0x018;
0799         ufec = __genwqe_readq(cd, ufec_addr);
0800         set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec);
0801 
0802         for (j = 0; j < 64; j++) {
0803             /* wherever there is a primary 1, read the 2ndary */
0804             if (!all && (!(ufir & (1ull << j))))
0805                 continue;
0806 
0807             sfir_addr = (i << 24) | (0x100 + 8 * j);
0808             sfir = __genwqe_readq(cd, sfir_addr);
0809             set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir);
0810 
0811             sfec_addr = (i << 24) | (0x300 + 8 * j);
0812             sfec = __genwqe_readq(cd, sfec_addr);
0813             set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec);
0814         }
0815     }
0816 
0817     /* fill with invalid data until end */
0818     for (i = idx; i < max_regs; i++) {
0819         regs[i].addr = 0xffffffff;
0820         regs[i].val = 0xffffffffffffffffull;
0821     }
0822     return idx;
0823 }
0824 
0825 /**
0826  * genwqe_ffdc_buff_size() - Calculates the number of dump registers
0827  * @cd:         genwqe device descriptor
0828  * @uid:    unit ID
0829  */
0830 int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid)
0831 {
0832     int entries = 0, ring, traps, traces, trace_entries;
0833     u32 eevptr_addr, l_addr, d_len, d_type;
0834     u64 eevptr, val, addr;
0835 
0836     eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
0837     eevptr = __genwqe_readq(cd, eevptr_addr);
0838 
0839     if ((eevptr != 0x0) && (eevptr != -1ull)) {
0840         l_addr = GENWQE_UID_OFFS(uid) | eevptr;
0841 
0842         while (1) {
0843             val = __genwqe_readq(cd, l_addr);
0844 
0845             if ((val == 0x0) || (val == -1ull))
0846                 break;
0847 
0848             /* 38:24 */
0849             d_len  = (val & 0x0000007fff000000ull) >> 24;
0850 
0851             /* 39 */
0852             d_type = (val & 0x0000008000000000ull) >> 36;
0853 
0854             if (d_type) {   /* repeat */
0855                 entries += d_len;
0856             } else {    /* size in bytes! */
0857                 entries += d_len >> 3;
0858             }
0859 
0860             l_addr += 8;
0861         }
0862     }
0863 
0864     for (ring = 0; ring < 8; ring++) {
0865         addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
0866         val = __genwqe_readq(cd, addr);
0867 
0868         if ((val == 0x0ull) || (val == -1ull))
0869             continue;
0870 
0871         traps = (val >> 24) & 0xff;
0872         traces = (val >> 16) & 0xff;
0873         trace_entries = val & 0xffff;
0874 
0875         entries += traps + (traces * trace_entries);
0876     }
0877     return entries;
0878 }
0879 
0880 /**
0881  * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure
0882  * @cd:         genwqe device descriptor
0883  * @uid:    unit ID
0884  * @regs:       register information
0885  * @max_regs:   number of register entries
0886  */
0887 int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid,
0888               struct genwqe_reg *regs, unsigned int max_regs)
0889 {
0890     int i, traps, traces, trace, trace_entries, trace_entry, ring;
0891     unsigned int idx = 0;
0892     u32 eevptr_addr, l_addr, d_addr, d_len, d_type;
0893     u64 eevptr, e, val, addr;
0894 
0895     eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
0896     eevptr = __genwqe_readq(cd, eevptr_addr);
0897 
0898     if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) {
0899         l_addr = GENWQE_UID_OFFS(uid) | eevptr;
0900         while (1) {
0901             e = __genwqe_readq(cd, l_addr);
0902             if ((e == 0x0) || (e == 0xffffffffffffffffull))
0903                 break;
0904 
0905             d_addr = (e & 0x0000000000ffffffull);       /* 23:0 */
0906             d_len  = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */
0907             d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */
0908             d_addr |= GENWQE_UID_OFFS(uid);
0909 
0910             if (d_type) {
0911                 for (i = 0; i < (int)d_len; i++) {
0912                     val = __genwqe_readq(cd, d_addr);
0913                     set_reg_idx(cd, regs, &idx, max_regs,
0914                             d_addr, i, val);
0915                 }
0916             } else {
0917                 d_len >>= 3; /* Size in bytes! */
0918                 for (i = 0; i < (int)d_len; i++, d_addr += 8) {
0919                     val = __genwqe_readq(cd, d_addr);
0920                     set_reg_idx(cd, regs, &idx, max_regs,
0921                             d_addr, 0, val);
0922                 }
0923             }
0924             l_addr += 8;
0925         }
0926     }
0927 
0928     /*
0929      * To save time, there are only 6 traces poplulated on Uid=2,
0930      * Ring=1. each with iters=512.
0931      */
0932     for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds,
0933                           2...7 are ASI rings */
0934         addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
0935         val = __genwqe_readq(cd, addr);
0936 
0937         if ((val == 0x0ull) || (val == -1ull))
0938             continue;
0939 
0940         traps = (val >> 24) & 0xff; /* Number of Traps  */
0941         traces = (val >> 16) & 0xff;    /* Number of Traces */
0942         trace_entries = val & 0xffff;   /* Entries per trace    */
0943 
0944         /* Note: This is a combined loop that dumps both the traps */
0945         /* (for the trace == 0 case) as well as the traces 1 to    */
0946         /* 'traces'.                           */
0947         for (trace = 0; trace <= traces; trace++) {
0948             u32 diag_sel =
0949                 GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace);
0950 
0951             addr = (GENWQE_UID_OFFS(uid) |
0952                 IO_EXTENDED_DIAG_SELECTOR);
0953             __genwqe_writeq(cd, addr, diag_sel);
0954 
0955             for (trace_entry = 0;
0956                  trace_entry < (trace ? trace_entries : traps);
0957                  trace_entry++) {
0958                 addr = (GENWQE_UID_OFFS(uid) |
0959                     IO_EXTENDED_DIAG_READ_MBX);
0960                 val = __genwqe_readq(cd, addr);
0961                 set_reg_idx(cd, regs, &idx, max_regs, addr,
0962                         (diag_sel<<16) | trace_entry, val);
0963             }
0964         }
0965     }
0966     return 0;
0967 }
0968 
0969 /**
0970  * genwqe_write_vreg() - Write register in virtual window
0971  * @cd:         genwqe device descriptor
0972  * @reg:    register (byte) offset within BAR
0973  * @val:    value to write
0974  * @func:   PCI virtual function
0975  *
0976  * Note, these registers are only accessible to the PF through the
0977  * VF-window. It is not intended for the VF to access.
0978  */
0979 int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func)
0980 {
0981     __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
0982     __genwqe_writeq(cd, reg, val);
0983     return 0;
0984 }
0985 
0986 /**
0987  * genwqe_read_vreg() - Read register in virtual window
0988  * @cd:         genwqe device descriptor
0989  * @reg:    register (byte) offset within BAR
0990  * @func:   PCI virtual function
0991  *
0992  * Note, these registers are only accessible to the PF through the
0993  * VF-window. It is not intended for the VF to access.
0994  */
0995 u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func)
0996 {
0997     __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
0998     return __genwqe_readq(cd, reg);
0999 }
1000 
1001 /**
1002  * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card
1003  * @cd:         genwqe device descriptor
1004  *
1005  * Note: From a design perspective it turned out to be a bad idea to
1006  * use codes here to specifiy the frequency/speed values. An old
1007  * driver cannot understand new codes and is therefore always a
1008  * problem. Better is to measure out the value or put the
1009  * speed/frequency directly into a register which is always a valid
1010  * value for old as well as for new software.
1011  *
1012  * Return: Card clock in MHz
1013  */
1014 int genwqe_base_clock_frequency(struct genwqe_dev *cd)
1015 {
1016     u16 speed;      /*         MHz  MHz  MHz  MHz */
1017     static const int speed_grade[] = { 250, 200, 166, 175 };
1018 
1019     speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
1020     if (speed >= ARRAY_SIZE(speed_grade))
1021         return 0;   /* illegal value */
1022 
1023     return speed_grade[speed];
1024 }
1025 
1026 /**
1027  * genwqe_stop_traps() - Stop traps
1028  * @cd:         genwqe device descriptor
1029  *
1030  * Before reading out the analysis data, we need to stop the traps.
1031  */
1032 void genwqe_stop_traps(struct genwqe_dev *cd)
1033 {
1034     __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull);
1035 }
1036 
1037 /**
1038  * genwqe_start_traps() - Start traps
1039  * @cd:         genwqe device descriptor
1040  *
1041  * After having read the data, we can/must enable the traps again.
1042  */
1043 void genwqe_start_traps(struct genwqe_dev *cd)
1044 {
1045     __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull);
1046 
1047     if (genwqe_need_err_masking(cd))
1048         __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
1049 }