Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* Handle fileserver selection and rotation.
0003  *
0004  * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
0005  * Written by David Howells (dhowells@redhat.com)
0006  */
0007 
0008 #include <linux/kernel.h>
0009 #include <linux/slab.h>
0010 #include <linux/fs.h>
0011 #include <linux/sched.h>
0012 #include <linux/delay.h>
0013 #include <linux/sched/signal.h>
0014 #include "internal.h"
0015 #include "afs_fs.h"
0016 
0017 /*
0018  * Begin iteration through a server list, starting with the vnode's last used
0019  * server if possible, or the last recorded good server if not.
0020  */
0021 static bool afs_start_fs_iteration(struct afs_operation *op,
0022                    struct afs_vnode *vnode)
0023 {
0024     struct afs_server *server;
0025     void *cb_server;
0026     int i;
0027 
0028     read_lock(&op->volume->servers_lock);
0029     op->server_list = afs_get_serverlist(
0030         rcu_dereference_protected(op->volume->servers,
0031                       lockdep_is_held(&op->volume->servers_lock)));
0032     read_unlock(&op->volume->servers_lock);
0033 
0034     op->untried = (1UL << op->server_list->nr_servers) - 1;
0035     op->index = READ_ONCE(op->server_list->preferred);
0036 
0037     cb_server = vnode->cb_server;
0038     if (cb_server) {
0039         /* See if the vnode's preferred record is still available */
0040         for (i = 0; i < op->server_list->nr_servers; i++) {
0041             server = op->server_list->servers[i].server;
0042             if (server == cb_server) {
0043                 op->index = i;
0044                 goto found_interest;
0045             }
0046         }
0047 
0048         /* If we have a lock outstanding on a server that's no longer
0049          * serving this vnode, then we can't switch to another server
0050          * and have to return an error.
0051          */
0052         if (op->flags & AFS_OPERATION_CUR_ONLY) {
0053             op->error = -ESTALE;
0054             return false;
0055         }
0056 
0057         /* Note that the callback promise is effectively broken */
0058         write_seqlock(&vnode->cb_lock);
0059         ASSERTCMP(cb_server, ==, vnode->cb_server);
0060         vnode->cb_server = NULL;
0061         if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
0062             vnode->cb_break++;
0063         write_sequnlock(&vnode->cb_lock);
0064     }
0065 
0066 found_interest:
0067     return true;
0068 }
0069 
0070 /*
0071  * Post volume busy note.
0072  */
0073 static void afs_busy(struct afs_volume *volume, u32 abort_code)
0074 {
0075     const char *m;
0076 
0077     switch (abort_code) {
0078     case VOFFLINE:      m = "offline";      break;
0079     case VRESTARTING:   m = "restarting";   break;
0080     case VSALVAGING:    m = "being salvaged";   break;
0081     default:        m = "busy";     break;
0082     }
0083 
0084     pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
0085 }
0086 
0087 /*
0088  * Sleep and retry the operation to the same fileserver.
0089  */
0090 static bool afs_sleep_and_retry(struct afs_operation *op)
0091 {
0092     if (!(op->flags & AFS_OPERATION_UNINTR)) {
0093         msleep_interruptible(1000);
0094         if (signal_pending(current)) {
0095             op->error = -ERESTARTSYS;
0096             return false;
0097         }
0098     } else {
0099         msleep(1000);
0100     }
0101 
0102     return true;
0103 }
0104 
0105 /*
0106  * Select the fileserver to use.  May be called multiple times to rotate
0107  * through the fileservers.
0108  */
0109 bool afs_select_fileserver(struct afs_operation *op)
0110 {
0111     struct afs_addr_list *alist;
0112     struct afs_server *server;
0113     struct afs_vnode *vnode = op->file[0].vnode;
0114     struct afs_error e;
0115     u32 rtt;
0116     int error = op->ac.error, i;
0117 
0118     _enter("%lx[%d],%lx[%d],%d,%d",
0119            op->untried, op->index,
0120            op->ac.tried, op->ac.index,
0121            error, op->ac.abort_code);
0122 
0123     if (op->flags & AFS_OPERATION_STOP) {
0124         _leave(" = f [stopped]");
0125         return false;
0126     }
0127 
0128     op->nr_iterations++;
0129 
0130     /* Evaluate the result of the previous operation, if there was one. */
0131     switch (error) {
0132     case SHRT_MAX:
0133         goto start;
0134 
0135     case 0:
0136     default:
0137         /* Success or local failure.  Stop. */
0138         op->error = error;
0139         op->flags |= AFS_OPERATION_STOP;
0140         _leave(" = f [okay/local %d]", error);
0141         return false;
0142 
0143     case -ECONNABORTED:
0144         /* The far side rejected the operation on some grounds.  This
0145          * might involve the server being busy or the volume having been moved.
0146          */
0147         switch (op->ac.abort_code) {
0148         case VNOVOL:
0149             /* This fileserver doesn't know about the volume.
0150              * - May indicate that the VL is wrong - retry once and compare
0151              *   the results.
0152              * - May indicate that the fileserver couldn't attach to the vol.
0153              */
0154             if (op->flags & AFS_OPERATION_VNOVOL) {
0155                 op->error = -EREMOTEIO;
0156                 goto next_server;
0157             }
0158 
0159             write_lock(&op->volume->servers_lock);
0160             op->server_list->vnovol_mask |= 1 << op->index;
0161             write_unlock(&op->volume->servers_lock);
0162 
0163             set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
0164             error = afs_check_volume_status(op->volume, op);
0165             if (error < 0)
0166                 goto failed_set_error;
0167 
0168             if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
0169                 op->error = -ENOMEDIUM;
0170                 goto failed;
0171             }
0172 
0173             /* If the server list didn't change, then assume that
0174              * it's the fileserver having trouble.
0175              */
0176             if (rcu_access_pointer(op->volume->servers) == op->server_list) {
0177                 op->error = -EREMOTEIO;
0178                 goto next_server;
0179             }
0180 
0181             /* Try again */
0182             op->flags |= AFS_OPERATION_VNOVOL;
0183             _leave(" = t [vnovol]");
0184             return true;
0185 
0186         case VSALVAGE: /* TODO: Should this return an error or iterate? */
0187         case VVOLEXISTS:
0188         case VNOSERVICE:
0189         case VONLINE:
0190         case VDISKFULL:
0191         case VOVERQUOTA:
0192             op->error = afs_abort_to_error(op->ac.abort_code);
0193             goto next_server;
0194 
0195         case VOFFLINE:
0196             if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
0197                 afs_busy(op->volume, op->ac.abort_code);
0198                 clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
0199             }
0200             if (op->flags & AFS_OPERATION_NO_VSLEEP) {
0201                 op->error = -EADV;
0202                 goto failed;
0203             }
0204             if (op->flags & AFS_OPERATION_CUR_ONLY) {
0205                 op->error = -ESTALE;
0206                 goto failed;
0207             }
0208             goto busy;
0209 
0210         case VSALVAGING:
0211         case VRESTARTING:
0212         case VBUSY:
0213             /* Retry after going round all the servers unless we
0214              * have a file lock we need to maintain.
0215              */
0216             if (op->flags & AFS_OPERATION_NO_VSLEEP) {
0217                 op->error = -EBUSY;
0218                 goto failed;
0219             }
0220             if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
0221                 afs_busy(op->volume, op->ac.abort_code);
0222                 clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
0223             }
0224         busy:
0225             if (op->flags & AFS_OPERATION_CUR_ONLY) {
0226                 if (!afs_sleep_and_retry(op))
0227                     goto failed;
0228 
0229                  /* Retry with same server & address */
0230                 _leave(" = t [vbusy]");
0231                 return true;
0232             }
0233 
0234             op->flags |= AFS_OPERATION_VBUSY;
0235             goto next_server;
0236 
0237         case VMOVED:
0238             /* The volume migrated to another server.  We consider
0239              * consider all locks and callbacks broken and request
0240              * an update from the VLDB.
0241              *
0242              * We also limit the number of VMOVED hops we will
0243              * honour, just in case someone sets up a loop.
0244              */
0245             if (op->flags & AFS_OPERATION_VMOVED) {
0246                 op->error = -EREMOTEIO;
0247                 goto failed;
0248             }
0249             op->flags |= AFS_OPERATION_VMOVED;
0250 
0251             set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
0252             set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
0253             error = afs_check_volume_status(op->volume, op);
0254             if (error < 0)
0255                 goto failed_set_error;
0256 
0257             /* If the server list didn't change, then the VLDB is
0258              * out of sync with the fileservers.  This is hopefully
0259              * a temporary condition, however, so we don't want to
0260              * permanently block access to the file.
0261              *
0262              * TODO: Try other fileservers if we can.
0263              *
0264              * TODO: Retry a few times with sleeps.
0265              */
0266             if (rcu_access_pointer(op->volume->servers) == op->server_list) {
0267                 op->error = -ENOMEDIUM;
0268                 goto failed;
0269             }
0270 
0271             goto restart_from_beginning;
0272 
0273         default:
0274             clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
0275             clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
0276             op->error = afs_abort_to_error(op->ac.abort_code);
0277             goto failed;
0278         }
0279 
0280     case -ETIMEDOUT:
0281     case -ETIME:
0282         if (op->error != -EDESTADDRREQ)
0283             goto iterate_address;
0284         fallthrough;
0285     case -ERFKILL:
0286     case -EADDRNOTAVAIL:
0287     case -ENETUNREACH:
0288     case -EHOSTUNREACH:
0289     case -EHOSTDOWN:
0290     case -ECONNREFUSED:
0291         _debug("no conn");
0292         op->error = error;
0293         goto iterate_address;
0294 
0295     case -ENETRESET:
0296         pr_warn("kAFS: Peer reset %s (op=%x)\n",
0297             op->type ? op->type->name : "???", op->debug_id);
0298         fallthrough;
0299     case -ECONNRESET:
0300         _debug("call reset");
0301         op->error = error;
0302         goto failed;
0303     }
0304 
0305 restart_from_beginning:
0306     _debug("restart");
0307     afs_end_cursor(&op->ac);
0308     op->server = NULL;
0309     afs_put_serverlist(op->net, op->server_list);
0310     op->server_list = NULL;
0311 start:
0312     _debug("start");
0313     /* See if we need to do an update of the volume record.  Note that the
0314      * volume may have moved or even have been deleted.
0315      */
0316     error = afs_check_volume_status(op->volume, op);
0317     if (error < 0)
0318         goto failed_set_error;
0319 
0320     if (!afs_start_fs_iteration(op, vnode))
0321         goto failed;
0322 
0323     _debug("__ VOL %llx __", op->volume->vid);
0324 
0325 pick_server:
0326     _debug("pick [%lx]", op->untried);
0327 
0328     error = afs_wait_for_fs_probes(op->server_list, op->untried);
0329     if (error < 0)
0330         goto failed_set_error;
0331 
0332     /* Pick the untried server with the lowest RTT.  If we have outstanding
0333      * callbacks, we stick with the server we're already using if we can.
0334      */
0335     if (op->server) {
0336         _debug("server %u", op->index);
0337         if (test_bit(op->index, &op->untried))
0338             goto selected_server;
0339         op->server = NULL;
0340         _debug("no server");
0341     }
0342 
0343     op->index = -1;
0344     rtt = U32_MAX;
0345     for (i = 0; i < op->server_list->nr_servers; i++) {
0346         struct afs_server *s = op->server_list->servers[i].server;
0347 
0348         if (!test_bit(i, &op->untried) ||
0349             !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
0350             continue;
0351         if (s->probe.rtt < rtt) {
0352             op->index = i;
0353             rtt = s->probe.rtt;
0354         }
0355     }
0356 
0357     if (op->index == -1)
0358         goto no_more_servers;
0359 
0360 selected_server:
0361     _debug("use %d", op->index);
0362     __clear_bit(op->index, &op->untried);
0363 
0364     /* We're starting on a different fileserver from the list.  We need to
0365      * check it, create a callback intercept, find its address list and
0366      * probe its capabilities before we use it.
0367      */
0368     ASSERTCMP(op->ac.alist, ==, NULL);
0369     server = op->server_list->servers[op->index].server;
0370 
0371     if (!afs_check_server_record(op, server))
0372         goto failed;
0373 
0374     _debug("USING SERVER: %pU", &server->uuid);
0375 
0376     op->flags |= AFS_OPERATION_RETRY_SERVER;
0377     op->server = server;
0378     if (vnode->cb_server != server) {
0379         vnode->cb_server = server;
0380         vnode->cb_s_break = server->cb_s_break;
0381         vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
0382         vnode->cb_v_break = vnode->volume->cb_v_break;
0383         clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
0384     }
0385 
0386     read_lock(&server->fs_lock);
0387     alist = rcu_dereference_protected(server->addresses,
0388                       lockdep_is_held(&server->fs_lock));
0389     afs_get_addrlist(alist);
0390     read_unlock(&server->fs_lock);
0391 
0392 retry_server:
0393     memset(&op->ac, 0, sizeof(op->ac));
0394 
0395     if (!op->ac.alist)
0396         op->ac.alist = alist;
0397     else
0398         afs_put_addrlist(alist);
0399 
0400     op->ac.index = -1;
0401 
0402 iterate_address:
0403     ASSERT(op->ac.alist);
0404     /* Iterate over the current server's address list to try and find an
0405      * address on which it will respond to us.
0406      */
0407     if (!afs_iterate_addresses(&op->ac))
0408         goto out_of_addresses;
0409 
0410     _debug("address [%u] %u/%u %pISp",
0411            op->index, op->ac.index, op->ac.alist->nr_addrs,
0412            &op->ac.alist->addrs[op->ac.index].transport);
0413 
0414     _leave(" = t");
0415     return true;
0416 
0417 out_of_addresses:
0418     /* We've now had a failure to respond on all of a server's addresses -
0419      * immediately probe them again and consider retrying the server.
0420      */
0421     afs_probe_fileserver(op->net, op->server);
0422     if (op->flags & AFS_OPERATION_RETRY_SERVER) {
0423         alist = op->ac.alist;
0424         error = afs_wait_for_one_fs_probe(
0425             op->server, !(op->flags & AFS_OPERATION_UNINTR));
0426         switch (error) {
0427         case 0:
0428             op->flags &= ~AFS_OPERATION_RETRY_SERVER;
0429             goto retry_server;
0430         case -ERESTARTSYS:
0431             goto failed_set_error;
0432         case -ETIME:
0433         case -EDESTADDRREQ:
0434             goto next_server;
0435         }
0436     }
0437 
0438 next_server:
0439     _debug("next");
0440     afs_end_cursor(&op->ac);
0441     goto pick_server;
0442 
0443 no_more_servers:
0444     /* That's all the servers poked to no good effect.  Try again if some
0445      * of them were busy.
0446      */
0447     if (op->flags & AFS_OPERATION_VBUSY)
0448         goto restart_from_beginning;
0449 
0450     e.error = -EDESTADDRREQ;
0451     e.responded = false;
0452     for (i = 0; i < op->server_list->nr_servers; i++) {
0453         struct afs_server *s = op->server_list->servers[i].server;
0454 
0455         afs_prioritise_error(&e, READ_ONCE(s->probe.error),
0456                      s->probe.abort_code);
0457     }
0458 
0459     error = e.error;
0460 
0461 failed_set_error:
0462     op->error = error;
0463 failed:
0464     op->flags |= AFS_OPERATION_STOP;
0465     afs_end_cursor(&op->ac);
0466     _leave(" = f [failed %d]", op->error);
0467     return false;
0468 }
0469 
0470 /*
0471  * Dump cursor state in the case of the error being EDESTADDRREQ.
0472  */
0473 void afs_dump_edestaddrreq(const struct afs_operation *op)
0474 {
0475     static int count;
0476     int i;
0477 
0478     if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
0479         return;
0480     count++;
0481 
0482     rcu_read_lock();
0483 
0484     pr_notice("EDESTADDR occurred\n");
0485     pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
0486           op->file[0].cb_break_before,
0487           op->file[1].cb_break_before, op->flags, op->error);
0488     pr_notice("FC: ut=%lx ix=%d ni=%u\n",
0489           op->untried, op->index, op->nr_iterations);
0490 
0491     if (op->server_list) {
0492         const struct afs_server_list *sl = op->server_list;
0493         pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
0494               sl->nr_servers, sl->preferred, sl->vnovol_mask);
0495         for (i = 0; i < sl->nr_servers; i++) {
0496             const struct afs_server *s = sl->servers[i].server;
0497             pr_notice("FC: server fl=%lx av=%u %pU\n",
0498                   s->flags, s->addr_version, &s->uuid);
0499             if (s->addresses) {
0500                 const struct afs_addr_list *a =
0501                     rcu_dereference(s->addresses);
0502                 pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
0503                       a->version,
0504                       a->nr_ipv4, a->nr_addrs, a->max_addrs,
0505                       a->preferred);
0506                 pr_notice("FC:  - R=%lx F=%lx\n",
0507                       a->responded, a->failed);
0508                 if (a == op->ac.alist)
0509                     pr_notice("FC:  - current\n");
0510             }
0511         }
0512     }
0513 
0514     pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
0515           op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
0516           op->ac.responded, op->ac.nr_iterations);
0517     rcu_read_unlock();
0518 }