Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager
0003  *
0004  * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
0005  *
0006  * This software is available to you under a choice of one of two
0007  * licenses.  You may choose to be licensed under the terms of the GNU
0008  * General Public License (GPL) Version 2, available from the file
0009  * COPYING in the main directory of this source tree, or the
0010  * OpenIB.org BSD license below:
0011  *
0012  *     Redistribution and use in source and binary forms, with or
0013  *     without modification, are permitted provided that the following
0014  *     conditions are met:
0015  *
0016  *      - Redistributions of source code must retain the above
0017  *        copyright notice, this list of conditions and the following
0018  *        disclaimer.
0019  *
0020  *      - Redistributions in binary form must reproduce the above
0021  *        copyright notice, this list of conditions and the following
0022  *        disclaimer in the documentation and/or other materials
0023  *        provided with the distribution.
0024  *
0025  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0026  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0027  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0028  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0029  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0030  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0031  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0032  * SOFTWARE.
0033  *
0034  * Written by: Karen Xie (kxie@chelsio.com)
0035  */
0036 
0037 #define DRV_NAME "libcxgb"
0038 #define pr_fmt(fmt) DRV_NAME ": " fmt
0039 
0040 #include <linux/kernel.h>
0041 #include <linux/module.h>
0042 #include <linux/errno.h>
0043 #include <linux/types.h>
0044 #include <linux/debugfs.h>
0045 #include <linux/export.h>
0046 #include <linux/list.h>
0047 #include <linux/skbuff.h>
0048 #include <linux/pci.h>
0049 #include <linux/scatterlist.h>
0050 
0051 #include "libcxgb_ppm.h"
0052 
0053 /* Direct Data Placement -
0054  * Directly place the iSCSI Data-In or Data-Out PDU's payload into
0055  * pre-posted final destination host-memory buffers based on the
0056  * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
0057  * in Data-Out PDUs. The host memory address is programmed into
0058  * h/w in the format of pagepod entries. The location of the
0059  * pagepod entry is encoded into ddp tag which is used as the base
0060  * for ITT/TTT.
0061  */
0062 
0063 /* Direct-Data Placement page size adjustment
0064  */
0065 int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz)
0066 {
0067     struct cxgbi_tag_format *tformat = &ppm->tformat;
0068     int i;
0069 
0070     for (i = 0; i < DDP_PGIDX_MAX; i++) {
0071         if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT +
0072                      tformat->pgsz_order[i])) {
0073             pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
0074                  __func__, ppm->ndev->name, pgsz, i);
0075             return i;
0076         }
0077     }
0078     pr_info("ippm: ddp page size %lu not supported.\n", pgsz);
0079     return DDP_PGIDX_MAX;
0080 }
0081 
0082 /* DDP setup & teardown
0083  */
0084 static int ppm_find_unused_entries(unsigned long *bmap,
0085                    unsigned int max_ppods,
0086                    unsigned int start,
0087                    unsigned int nr,
0088                    unsigned int align_mask)
0089 {
0090     unsigned long i;
0091 
0092     i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask);
0093 
0094     if (unlikely(i >= max_ppods) && (start > nr))
0095         i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1,
0096                            align_mask);
0097     if (unlikely(i >= max_ppods))
0098         return -ENOSPC;
0099 
0100     bitmap_set(bmap, i, nr);
0101     return (int)i;
0102 }
0103 
0104 static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count,
0105                  unsigned long caller_data)
0106 {
0107     struct cxgbi_ppod_data *pdata = ppm->ppod_data + i;
0108 
0109     pdata->caller_data = caller_data;
0110     pdata->npods = count;
0111 
0112     if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1))
0113         pdata->color = 0;
0114     else
0115         pdata->color++;
0116 }
0117 
0118 static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count,
0119                    unsigned long caller_data)
0120 {
0121     struct cxgbi_ppm_pool *pool;
0122     unsigned int cpu;
0123     int i;
0124 
0125     if (!ppm->pool)
0126         return -EINVAL;
0127 
0128     cpu = get_cpu();
0129     pool = per_cpu_ptr(ppm->pool, cpu);
0130     spin_lock_bh(&pool->lock);
0131     put_cpu();
0132 
0133     i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max,
0134                     pool->next, count, 0);
0135     if (i < 0) {
0136         pool->next = 0;
0137         spin_unlock_bh(&pool->lock);
0138         return -ENOSPC;
0139     }
0140 
0141     pool->next = i + count;
0142     if (pool->next >= ppm->pool_index_max)
0143         pool->next = 0;
0144 
0145     spin_unlock_bh(&pool->lock);
0146 
0147     pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
0148          __func__, cpu, i, count, i + cpu * ppm->pool_index_max,
0149         pool->next);
0150 
0151     i += cpu * ppm->pool_index_max;
0152     ppm_mark_entries(ppm, i, count, caller_data);
0153 
0154     return i;
0155 }
0156 
0157 static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count,
0158                unsigned long caller_data)
0159 {
0160     int i;
0161 
0162     spin_lock_bh(&ppm->map_lock);
0163     i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max,
0164                     ppm->next, count, 0);
0165     if (i < 0) {
0166         ppm->next = 0;
0167         spin_unlock_bh(&ppm->map_lock);
0168         pr_debug("ippm: NO suitable entries %u available.\n",
0169              count);
0170         return -ENOSPC;
0171     }
0172 
0173     ppm->next = i + count;
0174     if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram))
0175         ppm->next = 0;
0176     else if (ppm->next >= ppm->bmap_index_max)
0177         ppm->next = 0;
0178 
0179     spin_unlock_bh(&ppm->map_lock);
0180 
0181     pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
0182          __func__, i, count, i + ppm->pool_rsvd, ppm->next,
0183          caller_data);
0184 
0185     i += ppm->pool_rsvd;
0186     ppm_mark_entries(ppm, i, count, caller_data);
0187 
0188     return i;
0189 }
0190 
0191 static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count)
0192 {
0193     pr_debug("%s: idx %d + %d.\n", __func__, i, count);
0194 
0195     if (i < ppm->pool_rsvd) {
0196         unsigned int cpu;
0197         struct cxgbi_ppm_pool *pool;
0198 
0199         cpu = i / ppm->pool_index_max;
0200         i %= ppm->pool_index_max;
0201 
0202         pool = per_cpu_ptr(ppm->pool, cpu);
0203         spin_lock_bh(&pool->lock);
0204         bitmap_clear(pool->bmap, i, count);
0205 
0206         if (i < pool->next)
0207             pool->next = i;
0208         spin_unlock_bh(&pool->lock);
0209 
0210         pr_debug("%s: cpu %u, idx %d, next %u.\n",
0211              __func__, cpu, i, pool->next);
0212     } else {
0213         spin_lock_bh(&ppm->map_lock);
0214 
0215         i -= ppm->pool_rsvd;
0216         bitmap_clear(ppm->ppod_bmap, i, count);
0217 
0218         if (i < ppm->next)
0219             ppm->next = i;
0220         spin_unlock_bh(&ppm->map_lock);
0221 
0222         pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next);
0223     }
0224 }
0225 
0226 void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx)
0227 {
0228     struct cxgbi_ppod_data *pdata;
0229 
0230     if (idx >= ppm->ppmax) {
0231         pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax);
0232         return;
0233     }
0234 
0235     pdata = ppm->ppod_data + idx;
0236     if (!pdata->npods) {
0237         pr_warn("ippm: idx %u, npods 0.\n", idx);
0238         return;
0239     }
0240 
0241     pr_debug("release idx %u, npods %u.\n", idx, pdata->npods);
0242     ppm_unmark_entries(ppm, idx, pdata->npods);
0243 }
0244 EXPORT_SYMBOL(cxgbi_ppm_ppod_release);
0245 
0246 int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages,
0247                 u32 per_tag_pg_idx, u32 *ppod_idx,
0248                 u32 *ddp_tag, unsigned long caller_data)
0249 {
0250     struct cxgbi_ppod_data *pdata;
0251     unsigned int npods;
0252     int idx = -1;
0253     unsigned int hwidx;
0254     u32 tag;
0255 
0256     npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
0257     if (!npods) {
0258         pr_warn("%s: pages %u -> npods %u, full.\n",
0259             __func__, nr_pages, npods);
0260         return -EINVAL;
0261     }
0262 
0263     /* grab from cpu pool first */
0264     idx = ppm_get_cpu_entries(ppm, npods, caller_data);
0265     /* try the general pool */
0266     if (idx < 0)
0267         idx = ppm_get_entries(ppm, npods, caller_data);
0268     if (idx < 0) {
0269         pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
0270              nr_pages, npods, ppm->next, caller_data);
0271         return idx;
0272     }
0273 
0274     pdata = ppm->ppod_data + idx;
0275     hwidx = ppm->base_idx + idx;
0276 
0277     tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color);
0278 
0279     if (per_tag_pg_idx)
0280         tag |= (per_tag_pg_idx << 30) & 0xC0000000;
0281 
0282     *ppod_idx = idx;
0283     *ddp_tag = tag;
0284 
0285     pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
0286          nr_pages, tag, idx, npods, caller_data);
0287 
0288     return npods;
0289 }
0290 EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve);
0291 
0292 void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag,
0293                  unsigned int tid, unsigned int offset,
0294                  unsigned int length,
0295                  struct cxgbi_pagepod_hdr *hdr)
0296 {
0297     /* The ddp tag in pagepod should be with bit 31:30 set to 0.
0298      * The ddp Tag on the wire should be with non-zero 31:30 to the peer
0299      */
0300     tag &= 0x3FFFFFFF;
0301 
0302     hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
0303 
0304     hdr->rsvd = 0;
0305     hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask);
0306     hdr->max_offset = htonl(length);
0307     hdr->page_offset = htonl(offset);
0308 
0309     pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
0310          tag, tid, length, offset);
0311 }
0312 EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr);
0313 
0314 static void ppm_free(struct cxgbi_ppm *ppm)
0315 {
0316     vfree(ppm);
0317 }
0318 
0319 static void ppm_destroy(struct kref *kref)
0320 {
0321     struct cxgbi_ppm *ppm = container_of(kref,
0322                          struct cxgbi_ppm,
0323                          refcnt);
0324     pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
0325         ppm->ndev->name, ppm);
0326 
0327     *ppm->ppm_pp = NULL;
0328 
0329     free_percpu(ppm->pool);
0330     ppm_free(ppm);
0331 }
0332 
0333 int cxgbi_ppm_release(struct cxgbi_ppm *ppm)
0334 {
0335     if (ppm) {
0336         int rv;
0337 
0338         rv = kref_put(&ppm->refcnt, ppm_destroy);
0339         return rv;
0340     }
0341     return 1;
0342 }
0343 EXPORT_SYMBOL(cxgbi_ppm_release);
0344 
0345 static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
0346                          unsigned int *pcpu_ppmax)
0347 {
0348     struct cxgbi_ppm_pool *pools;
0349     unsigned int ppmax = (*total) / num_possible_cpus();
0350     unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3;
0351     unsigned int bmap;
0352     unsigned int alloc_sz;
0353     unsigned int count = 0;
0354     unsigned int cpu;
0355 
0356     /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
0357     if (ppmax > max)
0358         ppmax = max;
0359 
0360     /* pool size must be multiple of unsigned long */
0361     bmap = ppmax / BITS_PER_TYPE(unsigned long);
0362     if (!bmap)
0363         return NULL;
0364 
0365     ppmax = (bmap * sizeof(unsigned long)) << 3;
0366 
0367     alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
0368     pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool));
0369 
0370     if (!pools)
0371         return NULL;
0372 
0373     for_each_possible_cpu(cpu) {
0374         struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu);
0375 
0376         memset(ppool, 0, alloc_sz);
0377         spin_lock_init(&ppool->lock);
0378         count += ppmax;
0379     }
0380 
0381     *total = count;
0382     *pcpu_ppmax = ppmax;
0383 
0384     return pools;
0385 }
0386 
0387 int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
0388            struct pci_dev *pdev, void *lldev,
0389            struct cxgbi_tag_format *tformat, unsigned int iscsi_size,
0390            unsigned int llimit, unsigned int start,
0391            unsigned int reserve_factor, unsigned int iscsi_edram_start,
0392            unsigned int iscsi_edram_size)
0393 {
0394     struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
0395     struct cxgbi_ppm_pool *pool = NULL;
0396     unsigned int pool_index_max = 0;
0397     unsigned int ppmax_pool = 0;
0398     unsigned int ppod_bmap_size;
0399     unsigned int alloc_sz;
0400     unsigned int ppmax;
0401 
0402     if (!iscsi_edram_start)
0403         iscsi_edram_size = 0;
0404 
0405     if (iscsi_edram_size &&
0406         ((iscsi_edram_start + iscsi_edram_size) != start)) {
0407         pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x "
0408             "size 0x%x DDR start 0x%x\n",
0409             iscsi_edram_start, iscsi_edram_size, start);
0410         return -EINVAL;
0411     }
0412 
0413     if (iscsi_edram_size) {
0414         reserve_factor = 0;
0415         start = iscsi_edram_start;
0416     }
0417 
0418     ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT;
0419 
0420     if (ppm) {
0421         pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
0422             ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax);
0423         kref_get(&ppm->refcnt);
0424         return 1;
0425     }
0426 
0427     if (reserve_factor) {
0428         ppmax_pool = ppmax / reserve_factor;
0429         pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
0430         if (!pool) {
0431             ppmax_pool = 0;
0432             reserve_factor = 0;
0433         }
0434 
0435         pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
0436              ndev->name, ppmax, ppmax_pool, pool_index_max);
0437     }
0438 
0439     ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool);
0440     alloc_sz = sizeof(struct cxgbi_ppm) +
0441             ppmax * (sizeof(struct cxgbi_ppod_data)) +
0442             ppod_bmap_size * sizeof(unsigned long);
0443 
0444     ppm = vzalloc(alloc_sz);
0445     if (!ppm)
0446         goto release_ppm_pool;
0447 
0448     ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]);
0449 
0450     if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) {
0451         unsigned int start = ppmax - ppmax_pool;
0452         unsigned int end = ppod_bmap_size >> 3;
0453 
0454         bitmap_set(ppm->ppod_bmap, ppmax, end - start);
0455         pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
0456             __func__, ppmax, ppmax_pool, ppod_bmap_size, start,
0457             end);
0458     }
0459     if (iscsi_edram_size) {
0460         unsigned int first_ddr_idx =
0461                 iscsi_edram_size >> PPOD_SIZE_SHIFT;
0462 
0463         ppm->max_index_in_edram = first_ddr_idx - 1;
0464         bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1);
0465         pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx);
0466     }
0467 
0468     spin_lock_init(&ppm->map_lock);
0469     kref_init(&ppm->refcnt);
0470 
0471     memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format));
0472 
0473     ppm->ppm_pp = ppm_pp;
0474     ppm->ndev = ndev;
0475     ppm->pdev = pdev;
0476     ppm->lldev = lldev;
0477     ppm->ppmax = ppmax;
0478     ppm->next = 0;
0479     ppm->llimit = llimit;
0480     ppm->base_idx = start > llimit ?
0481             (start - llimit + 1) >> PPOD_SIZE_SHIFT : 0;
0482     ppm->bmap_index_max = ppmax - ppmax_pool;
0483 
0484     ppm->pool = pool;
0485     ppm->pool_rsvd = ppmax_pool;
0486     ppm->pool_index_max = pool_index_max;
0487 
0488     /* check one more time */
0489     if (*ppm_pp) {
0490         ppm_free(ppm);
0491         ppm = (struct cxgbi_ppm *)(*ppm_pp);
0492 
0493         pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
0494             ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax);
0495 
0496         kref_get(&ppm->refcnt);
0497         return 1;
0498     }
0499     *ppm_pp = ppm;
0500 
0501     ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE);
0502 
0503     pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
0504         ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE,
0505         ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd,
0506         ppm->pool_index_max);
0507 
0508     return 0;
0509 
0510 release_ppm_pool:
0511     free_percpu(pool);
0512     return -ENOMEM;
0513 }
0514 EXPORT_SYMBOL(cxgbi_ppm_init);
0515 
0516 unsigned int cxgbi_tagmask_set(unsigned int ppmax)
0517 {
0518     unsigned int bits = fls(ppmax);
0519 
0520     if (bits > PPOD_IDX_MAX_SIZE)
0521         bits = PPOD_IDX_MAX_SIZE;
0522 
0523     pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
0524         ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT));
0525 
0526     return 1 << (bits + PPOD_IDX_SHIFT);
0527 }
0528 EXPORT_SYMBOL(cxgbi_tagmask_set);
0529 
0530 MODULE_AUTHOR("Chelsio Communications");
0531 MODULE_DESCRIPTION("Chelsio common library");
0532 MODULE_LICENSE("Dual BSD/GPL");