Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Common NFS I/O  operations for the pnfs file based
0004  * layout drivers.
0005  *
0006  * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
0007  *
0008  * Tom Haynes <loghyr@primarydata.com>
0009  */
0010 
0011 #include <linux/nfs_fs.h>
0012 #include <linux/nfs_page.h>
0013 #include <linux/sunrpc/addr.h>
0014 #include <linux/module.h>
0015 
0016 #include "nfs4session.h"
0017 #include "internal.h"
0018 #include "pnfs.h"
0019 
0020 #define NFSDBG_FACILITY     NFSDBG_PNFS
0021 
0022 void pnfs_generic_rw_release(void *data)
0023 {
0024     struct nfs_pgio_header *hdr = data;
0025 
0026     nfs_put_client(hdr->ds_clp);
0027     hdr->mds_ops->rpc_release(data);
0028 }
0029 EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
0030 
0031 /* Fake up some data that will cause nfs_commit_release to retry the writes. */
0032 void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
0033 {
0034     struct nfs_writeverf *verf = data->res.verf;
0035 
0036     data->task.tk_status = 0;
0037     memset(&verf->verifier, 0, sizeof(verf->verifier));
0038     verf->committed = NFS_UNSTABLE;
0039 }
0040 EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
0041 
0042 void pnfs_generic_write_commit_done(struct rpc_task *task, void *data)
0043 {
0044     struct nfs_commit_data *wdata = data;
0045 
0046     /* Note this may cause RPC to be resent */
0047     wdata->mds_ops->rpc_call_done(task, data);
0048 }
0049 EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done);
0050 
0051 void pnfs_generic_commit_release(void *calldata)
0052 {
0053     struct nfs_commit_data *data = calldata;
0054 
0055     data->completion_ops->completion(data);
0056     pnfs_put_lseg(data->lseg);
0057     nfs_put_client(data->ds_clp);
0058     nfs_commitdata_release(data);
0059 }
0060 EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
0061 
0062 static struct pnfs_layout_segment *
0063 pnfs_free_bucket_lseg(struct pnfs_commit_bucket *bucket)
0064 {
0065     if (list_empty(&bucket->committing) && list_empty(&bucket->written)) {
0066         struct pnfs_layout_segment *freeme = bucket->lseg;
0067         bucket->lseg = NULL;
0068         return freeme;
0069     }
0070     return NULL;
0071 }
0072 
0073 /* The generic layer is about to remove the req from the commit list.
0074  * If this will make the bucket empty, it will need to put the lseg reference.
0075  * Note this must be called holding nfsi->commit_mutex
0076  */
0077 void
0078 pnfs_generic_clear_request_commit(struct nfs_page *req,
0079                   struct nfs_commit_info *cinfo)
0080 {
0081     struct pnfs_commit_bucket *bucket = NULL;
0082 
0083     if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
0084         goto out;
0085     cinfo->ds->nwritten--;
0086     if (list_is_singular(&req->wb_list))
0087         bucket = list_first_entry(&req->wb_list,
0088                       struct pnfs_commit_bucket, written);
0089 out:
0090     nfs_request_remove_commit_list(req, cinfo);
0091     if (bucket)
0092         pnfs_put_lseg(pnfs_free_bucket_lseg(bucket));
0093 }
0094 EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
0095 
0096 struct pnfs_commit_array *
0097 pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags)
0098 {
0099     struct pnfs_commit_array *p;
0100     struct pnfs_commit_bucket *b;
0101 
0102     p = kmalloc(struct_size(p, buckets, n), gfp_flags);
0103     if (!p)
0104         return NULL;
0105     p->nbuckets = n;
0106     INIT_LIST_HEAD(&p->cinfo_list);
0107     INIT_LIST_HEAD(&p->lseg_list);
0108     p->lseg = NULL;
0109     for (b = &p->buckets[0]; n != 0; b++, n--) {
0110         INIT_LIST_HEAD(&b->written);
0111         INIT_LIST_HEAD(&b->committing);
0112         b->lseg = NULL;
0113         b->direct_verf.committed = NFS_INVALID_STABLE_HOW;
0114     }
0115     return p;
0116 }
0117 EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array);
0118 
0119 void
0120 pnfs_free_commit_array(struct pnfs_commit_array *p)
0121 {
0122     kfree_rcu(p, rcu);
0123 }
0124 EXPORT_SYMBOL_GPL(pnfs_free_commit_array);
0125 
0126 static struct pnfs_commit_array *
0127 pnfs_find_commit_array_by_lseg(struct pnfs_ds_commit_info *fl_cinfo,
0128         struct pnfs_layout_segment *lseg)
0129 {
0130     struct pnfs_commit_array *array;
0131 
0132     list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
0133         if (array->lseg == lseg)
0134             return array;
0135     }
0136     return NULL;
0137 }
0138 
0139 struct pnfs_commit_array *
0140 pnfs_add_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
0141         struct pnfs_commit_array *new,
0142         struct pnfs_layout_segment *lseg)
0143 {
0144     struct pnfs_commit_array *array;
0145 
0146     array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
0147     if (array)
0148         return array;
0149     new->lseg = lseg;
0150     refcount_set(&new->refcount, 1);
0151     list_add_rcu(&new->cinfo_list, &fl_cinfo->commits);
0152     list_add(&new->lseg_list, &lseg->pls_commits);
0153     return new;
0154 }
0155 EXPORT_SYMBOL_GPL(pnfs_add_commit_array);
0156 
0157 static struct pnfs_commit_array *
0158 pnfs_lookup_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
0159         struct pnfs_layout_segment *lseg)
0160 {
0161     struct pnfs_commit_array *array;
0162 
0163     rcu_read_lock();
0164     array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
0165     if (!array) {
0166         rcu_read_unlock();
0167         fl_cinfo->ops->setup_ds_info(fl_cinfo, lseg);
0168         rcu_read_lock();
0169         array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
0170     }
0171     rcu_read_unlock();
0172     return array;
0173 }
0174 
0175 static void
0176 pnfs_release_commit_array_locked(struct pnfs_commit_array *array)
0177 {
0178     list_del_rcu(&array->cinfo_list);
0179     list_del(&array->lseg_list);
0180     pnfs_free_commit_array(array);
0181 }
0182 
0183 static void
0184 pnfs_put_commit_array_locked(struct pnfs_commit_array *array)
0185 {
0186     if (refcount_dec_and_test(&array->refcount))
0187         pnfs_release_commit_array_locked(array);
0188 }
0189 
0190 static void
0191 pnfs_put_commit_array(struct pnfs_commit_array *array, struct inode *inode)
0192 {
0193     if (refcount_dec_and_lock(&array->refcount, &inode->i_lock)) {
0194         pnfs_release_commit_array_locked(array);
0195         spin_unlock(&inode->i_lock);
0196     }
0197 }
0198 
0199 static struct pnfs_commit_array *
0200 pnfs_get_commit_array(struct pnfs_commit_array *array)
0201 {
0202     if (refcount_inc_not_zero(&array->refcount))
0203         return array;
0204     return NULL;
0205 }
0206 
0207 static void
0208 pnfs_remove_and_free_commit_array(struct pnfs_commit_array *array)
0209 {
0210     array->lseg = NULL;
0211     list_del_init(&array->lseg_list);
0212     pnfs_put_commit_array_locked(array);
0213 }
0214 
0215 void
0216 pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo,
0217         struct pnfs_layout_segment *lseg)
0218 {
0219     struct pnfs_commit_array *array, *tmp;
0220 
0221     list_for_each_entry_safe(array, tmp, &lseg->pls_commits, lseg_list)
0222         pnfs_remove_and_free_commit_array(array);
0223 }
0224 EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg);
0225 
0226 void
0227 pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo)
0228 {
0229     struct pnfs_commit_array *array, *tmp;
0230 
0231     list_for_each_entry_safe(array, tmp, &fl_cinfo->commits, cinfo_list)
0232         pnfs_remove_and_free_commit_array(array);
0233 }
0234 EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy);
0235 
0236 /*
0237  * Locks the nfs_page requests for commit and moves them to
0238  * @bucket->committing.
0239  */
0240 static int
0241 pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
0242                 struct nfs_commit_info *cinfo,
0243                 int max)
0244 {
0245     struct list_head *src = &bucket->written;
0246     struct list_head *dst = &bucket->committing;
0247     int ret;
0248 
0249     lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
0250     ret = nfs_scan_commit_list(src, dst, cinfo, max);
0251     if (ret) {
0252         cinfo->ds->nwritten -= ret;
0253         cinfo->ds->ncommitting += ret;
0254     }
0255     return ret;
0256 }
0257 
0258 static int pnfs_bucket_scan_array(struct nfs_commit_info *cinfo,
0259                   struct pnfs_commit_bucket *buckets,
0260                   unsigned int nbuckets,
0261                   int max)
0262 {
0263     unsigned int i;
0264     int rv = 0, cnt;
0265 
0266     for (i = 0; i < nbuckets && max != 0; i++) {
0267         cnt = pnfs_bucket_scan_ds_commit_list(&buckets[i], cinfo, max);
0268         rv += cnt;
0269         max -= cnt;
0270     }
0271     return rv;
0272 }
0273 
0274 /* Move reqs from written to committing lists, returning count
0275  * of number moved.
0276  */
0277 int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max)
0278 {
0279     struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
0280     struct pnfs_commit_array *array;
0281     int rv = 0, cnt;
0282 
0283     rcu_read_lock();
0284     list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
0285         if (!array->lseg || !pnfs_get_commit_array(array))
0286             continue;
0287         rcu_read_unlock();
0288         cnt = pnfs_bucket_scan_array(cinfo, array->buckets,
0289                 array->nbuckets, max);
0290         rcu_read_lock();
0291         pnfs_put_commit_array(array, cinfo->inode);
0292         rv += cnt;
0293         max -= cnt;
0294         if (!max)
0295             break;
0296     }
0297     rcu_read_unlock();
0298     return rv;
0299 }
0300 EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists);
0301 
0302 static unsigned int
0303 pnfs_bucket_recover_commit_reqs(struct list_head *dst,
0304                     struct pnfs_commit_bucket *buckets,
0305                 unsigned int nbuckets,
0306                 struct nfs_commit_info *cinfo)
0307 {
0308     struct pnfs_commit_bucket *b;
0309     struct pnfs_layout_segment *freeme;
0310     unsigned int nwritten, ret = 0;
0311     unsigned int i;
0312 
0313 restart:
0314     for (i = 0, b = buckets; i < nbuckets; i++, b++) {
0315         nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
0316         if (!nwritten)
0317             continue;
0318         ret += nwritten;
0319         freeme = pnfs_free_bucket_lseg(b);
0320         if (freeme) {
0321             pnfs_put_lseg(freeme);
0322             goto restart;
0323         }
0324     }
0325     return ret;
0326 }
0327 
0328 /* Pull everything off the committing lists and dump into @dst.  */
0329 void pnfs_generic_recover_commit_reqs(struct list_head *dst,
0330                       struct nfs_commit_info *cinfo)
0331 {
0332     struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
0333     struct pnfs_commit_array *array;
0334     unsigned int nwritten;
0335 
0336     lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
0337     rcu_read_lock();
0338     list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
0339         if (!array->lseg || !pnfs_get_commit_array(array))
0340             continue;
0341         rcu_read_unlock();
0342         nwritten = pnfs_bucket_recover_commit_reqs(dst,
0343                                array->buckets,
0344                                array->nbuckets,
0345                                cinfo);
0346         rcu_read_lock();
0347         pnfs_put_commit_array(array, cinfo->inode);
0348         fl_cinfo->nwritten -= nwritten;
0349     }
0350     rcu_read_unlock();
0351 }
0352 EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
0353 
0354 static struct nfs_page *
0355 pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
0356         unsigned int nbuckets, struct page *page)
0357 {
0358     struct nfs_page *req;
0359     struct pnfs_commit_bucket *b;
0360     unsigned int i;
0361 
0362     /* Linearly search the commit lists for each bucket until a matching
0363      * request is found */
0364     for (i = 0, b = buckets; i < nbuckets; i++, b++) {
0365         list_for_each_entry(req, &b->written, wb_list) {
0366             if (req->wb_page == page)
0367                 return req->wb_head;
0368         }
0369         list_for_each_entry(req, &b->committing, wb_list) {
0370             if (req->wb_page == page)
0371                 return req->wb_head;
0372         }
0373     }
0374     return NULL;
0375 }
0376 
0377 /* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head reqest
0378  *                 for @page
0379  * @cinfo - commit info for current inode
0380  * @page - page to search for matching head request
0381  *
0382  * Returns a the head request if one is found, otherwise returns NULL.
0383  */
0384 struct nfs_page *
0385 pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
0386 {
0387     struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
0388     struct pnfs_commit_array *array;
0389     struct nfs_page *req;
0390 
0391     list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) {
0392         req = pnfs_bucket_search_commit_reqs(array->buckets,
0393                 array->nbuckets, page);
0394         if (req)
0395             return req;
0396     }
0397     return NULL;
0398 }
0399 EXPORT_SYMBOL_GPL(pnfs_generic_search_commit_reqs);
0400 
0401 static struct pnfs_layout_segment *
0402 pnfs_bucket_get_committing(struct list_head *head,
0403                struct pnfs_commit_bucket *bucket,
0404                struct nfs_commit_info *cinfo)
0405 {
0406     struct pnfs_layout_segment *lseg;
0407     struct list_head *pos;
0408 
0409     list_for_each(pos, &bucket->committing)
0410         cinfo->ds->ncommitting--;
0411     list_splice_init(&bucket->committing, head);
0412     lseg = pnfs_free_bucket_lseg(bucket);
0413     if (!lseg)
0414         lseg = pnfs_get_lseg(bucket->lseg);
0415     return lseg;
0416 }
0417 
0418 static struct nfs_commit_data *
0419 pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket,
0420                  struct nfs_commit_info *cinfo)
0421 {
0422     struct nfs_commit_data *data = nfs_commitdata_alloc();
0423 
0424     if (!data)
0425         return NULL;
0426     data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo);
0427     return data;
0428 }
0429 
0430 static void pnfs_generic_retry_commit(struct pnfs_commit_bucket *buckets,
0431                       unsigned int nbuckets,
0432                       struct nfs_commit_info *cinfo,
0433                       unsigned int idx)
0434 {
0435     struct pnfs_commit_bucket *bucket;
0436     struct pnfs_layout_segment *freeme;
0437     LIST_HEAD(pages);
0438 
0439     for (bucket = buckets; idx < nbuckets; bucket++, idx++) {
0440         if (list_empty(&bucket->committing))
0441             continue;
0442         mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
0443         freeme = pnfs_bucket_get_committing(&pages, bucket, cinfo);
0444         mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
0445         nfs_retry_commit(&pages, freeme, cinfo, idx);
0446         pnfs_put_lseg(freeme);
0447     }
0448 }
0449 
0450 static unsigned int
0451 pnfs_bucket_alloc_ds_commits(struct list_head *list,
0452                  struct pnfs_commit_bucket *buckets,
0453                  unsigned int nbuckets,
0454                  struct nfs_commit_info *cinfo)
0455 {
0456     struct pnfs_commit_bucket *bucket;
0457     struct nfs_commit_data *data;
0458     unsigned int i;
0459     unsigned int nreq = 0;
0460 
0461     for (i = 0, bucket = buckets; i < nbuckets; i++, bucket++) {
0462         if (list_empty(&bucket->committing))
0463             continue;
0464         mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
0465         if (!list_empty(&bucket->committing)) {
0466             data = pnfs_bucket_fetch_commitdata(bucket, cinfo);
0467             if (!data)
0468                 goto out_error;
0469             data->ds_commit_index = i;
0470             list_add_tail(&data->list, list);
0471             nreq++;
0472         }
0473         mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
0474     }
0475     return nreq;
0476 out_error:
0477     mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
0478     /* Clean up on error */
0479     pnfs_generic_retry_commit(buckets, nbuckets, cinfo, i);
0480     return nreq;
0481 }
0482 
0483 static unsigned int
0484 pnfs_alloc_ds_commits_list(struct list_head *list,
0485                struct pnfs_ds_commit_info *fl_cinfo,
0486                struct nfs_commit_info *cinfo)
0487 {
0488     struct pnfs_commit_array *array;
0489     unsigned int ret = 0;
0490 
0491     rcu_read_lock();
0492     list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
0493         if (!array->lseg || !pnfs_get_commit_array(array))
0494             continue;
0495         rcu_read_unlock();
0496         ret += pnfs_bucket_alloc_ds_commits(list, array->buckets,
0497                 array->nbuckets, cinfo);
0498         rcu_read_lock();
0499         pnfs_put_commit_array(array, cinfo->inode);
0500     }
0501     rcu_read_unlock();
0502     return ret;
0503 }
0504 
0505 /* This follows nfs_commit_list pretty closely */
0506 int
0507 pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
0508                  int how, struct nfs_commit_info *cinfo,
0509                  int (*initiate_commit)(struct nfs_commit_data *data,
0510                             int how))
0511 {
0512     struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
0513     struct nfs_commit_data *data, *tmp;
0514     LIST_HEAD(list);
0515     unsigned int nreq = 0;
0516 
0517     if (!list_empty(mds_pages)) {
0518         data = nfs_commitdata_alloc();
0519         if (!data) {
0520             nfs_retry_commit(mds_pages, NULL, cinfo, -1);
0521             return -ENOMEM;
0522         }
0523         data->ds_commit_index = -1;
0524         list_splice_init(mds_pages, &data->pages);
0525         list_add_tail(&data->list, &list);
0526         nreq++;
0527     }
0528 
0529     nreq += pnfs_alloc_ds_commits_list(&list, fl_cinfo, cinfo);
0530     if (nreq == 0)
0531         goto out;
0532 
0533     list_for_each_entry_safe(data, tmp, &list, list) {
0534         list_del(&data->list);
0535         if (data->ds_commit_index < 0) {
0536             nfs_init_commit(data, NULL, NULL, cinfo);
0537             nfs_initiate_commit(NFS_CLIENT(inode), data,
0538                         NFS_PROTO(data->inode),
0539                         data->mds_ops, how,
0540                         RPC_TASK_CRED_NOREF);
0541         } else {
0542             nfs_init_commit(data, NULL, data->lseg, cinfo);
0543             initiate_commit(data, how);
0544         }
0545     }
0546 out:
0547     return PNFS_ATTEMPTED;
0548 }
0549 EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
0550 
0551 /*
0552  * Data server cache
0553  *
0554  * Data servers can be mapped to different device ids.
0555  * nfs4_pnfs_ds reference counting
0556  *   - set to 1 on allocation
0557  *   - incremented when a device id maps a data server already in the cache.
0558  *   - decremented when deviceid is removed from the cache.
0559  */
0560 static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
0561 static LIST_HEAD(nfs4_data_server_cache);
0562 
0563 /* Debug routines */
0564 static void
0565 print_ds(struct nfs4_pnfs_ds *ds)
0566 {
0567     if (ds == NULL) {
0568         printk(KERN_WARNING "%s NULL device\n", __func__);
0569         return;
0570     }
0571     printk(KERN_WARNING "        ds %s\n"
0572         "        ref count %d\n"
0573         "        client %p\n"
0574         "        cl_exchange_flags %x\n",
0575         ds->ds_remotestr,
0576         refcount_read(&ds->ds_count), ds->ds_clp,
0577         ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
0578 }
0579 
0580 static bool
0581 same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
0582 {
0583     struct sockaddr_in *a, *b;
0584     struct sockaddr_in6 *a6, *b6;
0585 
0586     if (addr1->sa_family != addr2->sa_family)
0587         return false;
0588 
0589     switch (addr1->sa_family) {
0590     case AF_INET:
0591         a = (struct sockaddr_in *)addr1;
0592         b = (struct sockaddr_in *)addr2;
0593 
0594         if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
0595             a->sin_port == b->sin_port)
0596             return true;
0597         break;
0598 
0599     case AF_INET6:
0600         a6 = (struct sockaddr_in6 *)addr1;
0601         b6 = (struct sockaddr_in6 *)addr2;
0602 
0603         /* LINKLOCAL addresses must have matching scope_id */
0604         if (ipv6_addr_src_scope(&a6->sin6_addr) ==
0605             IPV6_ADDR_SCOPE_LINKLOCAL &&
0606             a6->sin6_scope_id != b6->sin6_scope_id)
0607             return false;
0608 
0609         if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
0610             a6->sin6_port == b6->sin6_port)
0611             return true;
0612         break;
0613 
0614     default:
0615         dprintk("%s: unhandled address family: %u\n",
0616             __func__, addr1->sa_family);
0617         return false;
0618     }
0619 
0620     return false;
0621 }
0622 
0623 /*
0624  * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
0625  * declare a match.
0626  */
0627 static bool
0628 _same_data_server_addrs_locked(const struct list_head *dsaddrs1,
0629                    const struct list_head *dsaddrs2)
0630 {
0631     struct nfs4_pnfs_ds_addr *da1, *da2;
0632     struct sockaddr *sa1, *sa2;
0633     bool match = false;
0634 
0635     list_for_each_entry(da1, dsaddrs1, da_node) {
0636         sa1 = (struct sockaddr *)&da1->da_addr;
0637         match = false;
0638         list_for_each_entry(da2, dsaddrs2, da_node) {
0639             sa2 = (struct sockaddr *)&da2->da_addr;
0640             match = same_sockaddr(sa1, sa2);
0641             if (match)
0642                 break;
0643         }
0644         if (!match)
0645             break;
0646     }
0647     return match;
0648 }
0649 
0650 /*
0651  * Lookup DS by addresses.  nfs4_ds_cache_lock is held
0652  */
0653 static struct nfs4_pnfs_ds *
0654 _data_server_lookup_locked(const struct list_head *dsaddrs)
0655 {
0656     struct nfs4_pnfs_ds *ds;
0657 
0658     list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
0659         if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
0660             return ds;
0661     return NULL;
0662 }
0663 
0664 static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
0665 {
0666     struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags);
0667     if (da)
0668         INIT_LIST_HEAD(&da->da_node);
0669     return da;
0670 }
0671 
0672 static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
0673 {
0674     kfree(da->da_remotestr);
0675     kfree(da->da_netid);
0676     kfree(da);
0677 }
0678 
0679 static void destroy_ds(struct nfs4_pnfs_ds *ds)
0680 {
0681     struct nfs4_pnfs_ds_addr *da;
0682 
0683     dprintk("--> %s\n", __func__);
0684     ifdebug(FACILITY)
0685         print_ds(ds);
0686 
0687     nfs_put_client(ds->ds_clp);
0688 
0689     while (!list_empty(&ds->ds_addrs)) {
0690         da = list_first_entry(&ds->ds_addrs,
0691                       struct nfs4_pnfs_ds_addr,
0692                       da_node);
0693         list_del_init(&da->da_node);
0694         nfs4_pnfs_ds_addr_free(da);
0695     }
0696 
0697     kfree(ds->ds_remotestr);
0698     kfree(ds);
0699 }
0700 
0701 void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
0702 {
0703     if (refcount_dec_and_lock(&ds->ds_count,
0704                 &nfs4_ds_cache_lock)) {
0705         list_del_init(&ds->ds_node);
0706         spin_unlock(&nfs4_ds_cache_lock);
0707         destroy_ds(ds);
0708     }
0709 }
0710 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put);
0711 
0712 /*
0713  * Create a string with a human readable address and port to avoid
0714  * complicated setup around many dprinks.
0715  */
0716 static char *
0717 nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
0718 {
0719     struct nfs4_pnfs_ds_addr *da;
0720     char *remotestr;
0721     size_t len;
0722     char *p;
0723 
0724     len = 3;        /* '{', '}' and eol */
0725     list_for_each_entry(da, dsaddrs, da_node) {
0726         len += strlen(da->da_remotestr) + 1;    /* string plus comma */
0727     }
0728 
0729     remotestr = kzalloc(len, gfp_flags);
0730     if (!remotestr)
0731         return NULL;
0732 
0733     p = remotestr;
0734     *(p++) = '{';
0735     len--;
0736     list_for_each_entry(da, dsaddrs, da_node) {
0737         size_t ll = strlen(da->da_remotestr);
0738 
0739         if (ll > len)
0740             goto out_err;
0741 
0742         memcpy(p, da->da_remotestr, ll);
0743         p += ll;
0744         len -= ll;
0745 
0746         if (len < 1)
0747             goto out_err;
0748         (*p++) = ',';
0749         len--;
0750     }
0751     if (len < 2)
0752         goto out_err;
0753     *(p++) = '}';
0754     *p = '\0';
0755     return remotestr;
0756 out_err:
0757     kfree(remotestr);
0758     return NULL;
0759 }
0760 
0761 /*
0762  * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
0763  * uncached and return cached struct nfs4_pnfs_ds.
0764  */
0765 struct nfs4_pnfs_ds *
0766 nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
0767 {
0768     struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
0769     char *remotestr;
0770 
0771     if (list_empty(dsaddrs)) {
0772         dprintk("%s: no addresses defined\n", __func__);
0773         goto out;
0774     }
0775 
0776     ds = kzalloc(sizeof(*ds), gfp_flags);
0777     if (!ds)
0778         goto out;
0779 
0780     /* this is only used for debugging, so it's ok if its NULL */
0781     remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
0782 
0783     spin_lock(&nfs4_ds_cache_lock);
0784     tmp_ds = _data_server_lookup_locked(dsaddrs);
0785     if (tmp_ds == NULL) {
0786         INIT_LIST_HEAD(&ds->ds_addrs);
0787         list_splice_init(dsaddrs, &ds->ds_addrs);
0788         ds->ds_remotestr = remotestr;
0789         refcount_set(&ds->ds_count, 1);
0790         INIT_LIST_HEAD(&ds->ds_node);
0791         ds->ds_clp = NULL;
0792         list_add(&ds->ds_node, &nfs4_data_server_cache);
0793         dprintk("%s add new data server %s\n", __func__,
0794             ds->ds_remotestr);
0795     } else {
0796         kfree(remotestr);
0797         kfree(ds);
0798         refcount_inc(&tmp_ds->ds_count);
0799         dprintk("%s data server %s found, inc'ed ds_count to %d\n",
0800             __func__, tmp_ds->ds_remotestr,
0801             refcount_read(&tmp_ds->ds_count));
0802         ds = tmp_ds;
0803     }
0804     spin_unlock(&nfs4_ds_cache_lock);
0805 out:
0806     return ds;
0807 }
0808 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
0809 
0810 static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
0811 {
0812     might_sleep();
0813     return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE);
0814 }
0815 
0816 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
0817 {
0818     smp_mb__before_atomic();
0819     clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state);
0820 }
0821 
0822 static struct nfs_client *(*get_v3_ds_connect)(
0823             struct nfs_server *mds_srv,
0824             const struct sockaddr *ds_addr,
0825             int ds_addrlen,
0826             int ds_proto,
0827             unsigned int ds_timeo,
0828             unsigned int ds_retrans);
0829 
0830 static bool load_v3_ds_connect(void)
0831 {
0832     if (!get_v3_ds_connect) {
0833         get_v3_ds_connect = symbol_request(nfs3_set_ds_client);
0834         WARN_ON_ONCE(!get_v3_ds_connect);
0835     }
0836 
0837     return(get_v3_ds_connect != NULL);
0838 }
0839 
0840 void nfs4_pnfs_v3_ds_connect_unload(void)
0841 {
0842     if (get_v3_ds_connect) {
0843         symbol_put(nfs3_set_ds_client);
0844         get_v3_ds_connect = NULL;
0845     }
0846 }
0847 
0848 static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
0849                  struct nfs4_pnfs_ds *ds,
0850                  unsigned int timeo,
0851                  unsigned int retrans)
0852 {
0853     struct nfs_client *clp = ERR_PTR(-EIO);
0854     struct nfs4_pnfs_ds_addr *da;
0855     int status = 0;
0856 
0857     dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
0858 
0859     if (!load_v3_ds_connect())
0860         return -EPROTONOSUPPORT;
0861 
0862     list_for_each_entry(da, &ds->ds_addrs, da_node) {
0863         dprintk("%s: DS %s: trying address %s\n",
0864             __func__, ds->ds_remotestr, da->da_remotestr);
0865 
0866         if (!IS_ERR(clp)) {
0867             struct xprt_create xprt_args = {
0868                 .ident = da->da_transport,
0869                 .net = clp->cl_net,
0870                 .dstaddr = (struct sockaddr *)&da->da_addr,
0871                 .addrlen = da->da_addrlen,
0872                 .servername = clp->cl_hostname,
0873             };
0874 
0875             if (da->da_transport != clp->cl_proto)
0876                 continue;
0877             if (da->da_addr.ss_family != clp->cl_addr.ss_family)
0878                 continue;
0879             /* Add this address as an alias */
0880             rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
0881                     rpc_clnt_test_and_add_xprt, NULL);
0882             continue;
0883         }
0884         clp = get_v3_ds_connect(mds_srv,
0885                 (struct sockaddr *)&da->da_addr,
0886                 da->da_addrlen, da->da_transport,
0887                 timeo, retrans);
0888         if (IS_ERR(clp))
0889             continue;
0890         clp->cl_rpcclient->cl_softerr = 0;
0891         clp->cl_rpcclient->cl_softrtry = 0;
0892     }
0893 
0894     if (IS_ERR(clp)) {
0895         status = PTR_ERR(clp);
0896         goto out;
0897     }
0898 
0899     smp_wmb();
0900     WRITE_ONCE(ds->ds_clp, clp);
0901     dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
0902 out:
0903     return status;
0904 }
0905 
0906 static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
0907                  struct nfs4_pnfs_ds *ds,
0908                  unsigned int timeo,
0909                  unsigned int retrans,
0910                  u32 minor_version)
0911 {
0912     struct nfs_client *clp = ERR_PTR(-EIO);
0913     struct nfs4_pnfs_ds_addr *da;
0914     int status = 0;
0915 
0916     dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
0917 
0918     list_for_each_entry(da, &ds->ds_addrs, da_node) {
0919         dprintk("%s: DS %s: trying address %s\n",
0920             __func__, ds->ds_remotestr, da->da_remotestr);
0921 
0922         if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
0923             struct xprt_create xprt_args = {
0924                 .ident = da->da_transport,
0925                 .net = clp->cl_net,
0926                 .dstaddr = (struct sockaddr *)&da->da_addr,
0927                 .addrlen = da->da_addrlen,
0928                 .servername = clp->cl_hostname,
0929             };
0930             struct nfs4_add_xprt_data xprtdata = {
0931                 .clp = clp,
0932             };
0933             struct rpc_add_xprt_test rpcdata = {
0934                 .add_xprt_test = clp->cl_mvops->session_trunk,
0935                 .data = &xprtdata,
0936             };
0937 
0938             if (da->da_transport != clp->cl_proto)
0939                 continue;
0940             if (da->da_addr.ss_family != clp->cl_addr.ss_family)
0941                 continue;
0942             /**
0943             * Test this address for session trunking and
0944             * add as an alias
0945             */
0946             xprtdata.cred = nfs4_get_clid_cred(clp),
0947             rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
0948                       rpc_clnt_setup_test_and_add_xprt,
0949                       &rpcdata);
0950             if (xprtdata.cred)
0951                 put_cred(xprtdata.cred);
0952         } else {
0953             clp = nfs4_set_ds_client(mds_srv,
0954                         (struct sockaddr *)&da->da_addr,
0955                         da->da_addrlen,
0956                         da->da_transport, timeo,
0957                         retrans, minor_version);
0958             if (IS_ERR(clp))
0959                 continue;
0960 
0961             status = nfs4_init_ds_session(clp,
0962                     mds_srv->nfs_client->cl_lease_time);
0963             if (status) {
0964                 nfs_put_client(clp);
0965                 clp = ERR_PTR(-EIO);
0966                 continue;
0967             }
0968 
0969         }
0970     }
0971 
0972     if (IS_ERR(clp)) {
0973         status = PTR_ERR(clp);
0974         goto out;
0975     }
0976 
0977     smp_wmb();
0978     WRITE_ONCE(ds->ds_clp, clp);
0979     dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
0980 out:
0981     return status;
0982 }
0983 
0984 /*
0985  * Create an rpc connection to the nfs4_pnfs_ds data server.
0986  * Currently only supports IPv4 and IPv6 addresses.
0987  * If connection fails, make devid unavailable and return a -errno.
0988  */
0989 int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
0990               struct nfs4_deviceid_node *devid, unsigned int timeo,
0991               unsigned int retrans, u32 version, u32 minor_version)
0992 {
0993     int err;
0994 
0995     do {
0996         err = nfs4_wait_ds_connect(ds);
0997         if (err || ds->ds_clp)
0998             goto out;
0999         if (nfs4_test_deviceid_unavailable(devid))
1000             return -ENODEV;
1001     } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
1002 
1003     if (ds->ds_clp)
1004         goto connect_done;
1005 
1006     switch (version) {
1007     case 3:
1008         err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans);
1009         break;
1010     case 4:
1011         err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans,
1012                            minor_version);
1013         break;
1014     default:
1015         dprintk("%s: unsupported DS version %d\n", __func__, version);
1016         err = -EPROTONOSUPPORT;
1017     }
1018 
1019 connect_done:
1020     nfs4_clear_ds_conn_bit(ds);
1021 out:
1022     /*
1023      * At this point the ds->ds_clp should be ready, but it might have
1024      * hit an error.
1025      */
1026     if (!err) {
1027         if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
1028             WARN_ON_ONCE(ds->ds_clp ||
1029                 !nfs4_test_deviceid_unavailable(devid));
1030             return -EINVAL;
1031         }
1032         err = nfs_client_init_status(ds->ds_clp);
1033     }
1034 
1035     return err;
1036 }
1037 EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
1038 
1039 /*
1040  * Currently only supports ipv4, ipv6 and one multi-path address.
1041  */
1042 struct nfs4_pnfs_ds_addr *
1043 nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
1044 {
1045     struct nfs4_pnfs_ds_addr *da = NULL;
1046     char *buf, *portstr;
1047     __be16 port;
1048     ssize_t nlen, rlen;
1049     int tmp[2];
1050     char *netid;
1051     size_t len;
1052     char *startsep = "";
1053     char *endsep = "";
1054 
1055 
1056     /* r_netid */
1057     nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
1058                         gfp_flags);
1059     if (unlikely(nlen < 0))
1060         goto out_err;
1061 
1062     /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
1063     /* port is ".ABC.DEF", 8 chars max */
1064     rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
1065                         IPV6_SCOPE_ID_LEN + 8, gfp_flags);
1066     if (unlikely(rlen < 0))
1067         goto out_free_netid;
1068 
1069     /* replace port '.' with '-' */
1070     portstr = strrchr(buf, '.');
1071     if (!portstr) {
1072         dprintk("%s: Failed finding expected dot in port\n",
1073             __func__);
1074         goto out_free_buf;
1075     }
1076     *portstr = '-';
1077 
1078     /* find '.' between address and port */
1079     portstr = strrchr(buf, '.');
1080     if (!portstr) {
1081         dprintk("%s: Failed finding expected dot between address and "
1082             "port\n", __func__);
1083         goto out_free_buf;
1084     }
1085     *portstr = '\0';
1086 
1087     da = nfs4_pnfs_ds_addr_alloc(gfp_flags);
1088     if (unlikely(!da))
1089         goto out_free_buf;
1090 
1091     if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
1092               sizeof(da->da_addr))) {
1093         dprintk("%s: error parsing address %s\n", __func__, buf);
1094         goto out_free_da;
1095     }
1096 
1097     portstr++;
1098     sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
1099     port = htons((tmp[0] << 8) | (tmp[1]));
1100 
1101     switch (da->da_addr.ss_family) {
1102     case AF_INET:
1103         ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
1104         da->da_addrlen = sizeof(struct sockaddr_in);
1105         break;
1106 
1107     case AF_INET6:
1108         ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
1109         da->da_addrlen = sizeof(struct sockaddr_in6);
1110         startsep = "[";
1111         endsep = "]";
1112         break;
1113 
1114     default:
1115         dprintk("%s: unsupported address family: %u\n",
1116             __func__, da->da_addr.ss_family);
1117         goto out_free_da;
1118     }
1119 
1120     da->da_transport = xprt_find_transport_ident(netid);
1121     if (da->da_transport < 0) {
1122         dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
1123             __func__, netid);
1124         goto out_free_da;
1125     }
1126 
1127     da->da_netid = netid;
1128 
1129     /* save human readable address */
1130     len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
1131     da->da_remotestr = kzalloc(len, gfp_flags);
1132 
1133     /* NULL is ok, only used for dprintk */
1134     if (da->da_remotestr)
1135         snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
1136              buf, endsep, ntohs(port));
1137 
1138     dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
1139     kfree(buf);
1140     return da;
1141 
1142 out_free_da:
1143     kfree(da);
1144 out_free_buf:
1145     dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
1146     kfree(buf);
1147 out_free_netid:
1148     kfree(netid);
1149 out_err:
1150     return NULL;
1151 }
1152 EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
1153 
1154 void
1155 pnfs_layout_mark_request_commit(struct nfs_page *req,
1156                 struct pnfs_layout_segment *lseg,
1157                 struct nfs_commit_info *cinfo,
1158                 u32 ds_commit_idx)
1159 {
1160     struct list_head *list;
1161     struct pnfs_commit_array *array;
1162     struct pnfs_commit_bucket *bucket;
1163 
1164     mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
1165     array = pnfs_lookup_commit_array(cinfo->ds, lseg);
1166     if (!array || !pnfs_is_valid_lseg(lseg))
1167         goto out_resched;
1168     bucket = &array->buckets[ds_commit_idx];
1169     list = &bucket->written;
1170     /* Non-empty buckets hold a reference on the lseg.  That ref
1171      * is normally transferred to the COMMIT call and released
1172      * there.  It could also be released if the last req is pulled
1173      * off due to a rewrite, in which case it will be done in
1174      * pnfs_common_clear_request_commit
1175      */
1176     if (!bucket->lseg)
1177         bucket->lseg = pnfs_get_lseg(lseg);
1178     set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1179     cinfo->ds->nwritten++;
1180 
1181     nfs_request_add_commit_list_locked(req, list, cinfo);
1182     mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
1183     nfs_mark_page_unstable(req->wb_page, cinfo);
1184     return;
1185 out_resched:
1186     mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
1187     cinfo->completion_ops->resched_write(cinfo, req);
1188 }
1189 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
1190 
1191 int
1192 pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
1193 {
1194     int ret;
1195 
1196     if (!pnfs_layoutcommit_outstanding(inode))
1197         return 0;
1198     ret = nfs_commit_inode(inode, FLUSH_SYNC);
1199     if (ret < 0)
1200         return ret;
1201     if (datasync)
1202         return 0;
1203     return pnfs_layoutcommit_inode(inode, true);
1204 }
1205 EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
1206