Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /* AFS fileserver probing
0003  *
0004  * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
0005  * Written by David Howells (dhowells@redhat.com)
0006  */
0007 
0008 #include <linux/sched.h>
0009 #include <linux/slab.h>
0010 #include "afs_fs.h"
0011 #include "internal.h"
0012 #include "protocol_afs.h"
0013 #include "protocol_yfs.h"
0014 
0015 static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
0016 static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
0017 
0018 /*
0019  * Start the probe polling timer.  We have to supply it with an inc on the
0020  * outstanding server count.
0021  */
0022 static void afs_schedule_fs_probe(struct afs_net *net,
0023                   struct afs_server *server, bool fast)
0024 {
0025     unsigned long atj;
0026 
0027     if (!net->live)
0028         return;
0029 
0030     atj = server->probed_at;
0031     atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
0032 
0033     afs_inc_servers_outstanding(net);
0034     if (timer_reduce(&net->fs_probe_timer, atj))
0035         afs_dec_servers_outstanding(net);
0036 }
0037 
0038 /*
0039  * Handle the completion of a set of probes.
0040  */
0041 static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
0042 {
0043     bool responded = server->probe.responded;
0044 
0045     write_seqlock(&net->fs_lock);
0046     if (responded) {
0047         list_add_tail(&server->probe_link, &net->fs_probe_slow);
0048     } else {
0049         server->rtt = UINT_MAX;
0050         clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
0051         list_add_tail(&server->probe_link, &net->fs_probe_fast);
0052     }
0053     write_sequnlock(&net->fs_lock);
0054 
0055     afs_schedule_fs_probe(net, server, !responded);
0056 }
0057 
0058 /*
0059  * Handle the completion of a probe.
0060  */
0061 static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
0062 {
0063     _enter("");
0064 
0065     if (atomic_dec_and_test(&server->probe_outstanding))
0066         afs_finished_fs_probe(net, server);
0067 
0068     wake_up_all(&server->probe_wq);
0069 }
0070 
0071 /*
0072  * Handle inability to send a probe due to ENOMEM when trying to allocate a
0073  * call struct.
0074  */
0075 static void afs_fs_probe_not_done(struct afs_net *net,
0076                   struct afs_server *server,
0077                   struct afs_addr_cursor *ac)
0078 {
0079     struct afs_addr_list *alist = ac->alist;
0080     unsigned int index = ac->index;
0081 
0082     _enter("");
0083 
0084     trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
0085     spin_lock(&server->probe_lock);
0086 
0087     server->probe.local_failure = true;
0088     if (server->probe.error == 0)
0089         server->probe.error = -ENOMEM;
0090 
0091     set_bit(index, &alist->failed);
0092 
0093     spin_unlock(&server->probe_lock);
0094     return afs_done_one_fs_probe(net, server);
0095 }
0096 
0097 /*
0098  * Process the result of probing a fileserver.  This is called after successful
0099  * or failed delivery of an FS.GetCapabilities operation.
0100  */
0101 void afs_fileserver_probe_result(struct afs_call *call)
0102 {
0103     struct afs_addr_list *alist = call->alist;
0104     struct afs_server *server = call->server;
0105     unsigned int index = call->addr_ix;
0106     unsigned int rtt_us = 0, cap0;
0107     int ret = call->error;
0108 
0109     _enter("%pU,%u", &server->uuid, index);
0110 
0111     spin_lock(&server->probe_lock);
0112 
0113     switch (ret) {
0114     case 0:
0115         server->probe.error = 0;
0116         goto responded;
0117     case -ECONNABORTED:
0118         if (!server->probe.responded) {
0119             server->probe.abort_code = call->abort_code;
0120             server->probe.error = ret;
0121         }
0122         goto responded;
0123     case -ENOMEM:
0124     case -ENONET:
0125         clear_bit(index, &alist->responded);
0126         server->probe.local_failure = true;
0127         trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
0128         goto out;
0129     case -ECONNRESET: /* Responded, but call expired. */
0130     case -ERFKILL:
0131     case -EADDRNOTAVAIL:
0132     case -ENETUNREACH:
0133     case -EHOSTUNREACH:
0134     case -EHOSTDOWN:
0135     case -ECONNREFUSED:
0136     case -ETIMEDOUT:
0137     case -ETIME:
0138     default:
0139         clear_bit(index, &alist->responded);
0140         set_bit(index, &alist->failed);
0141         if (!server->probe.responded &&
0142             (server->probe.error == 0 ||
0143              server->probe.error == -ETIMEDOUT ||
0144              server->probe.error == -ETIME))
0145             server->probe.error = ret;
0146         trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
0147         goto out;
0148     }
0149 
0150 responded:
0151     clear_bit(index, &alist->failed);
0152 
0153     if (call->service_id == YFS_FS_SERVICE) {
0154         server->probe.is_yfs = true;
0155         set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
0156         alist->addrs[index].srx_service = call->service_id;
0157     } else {
0158         server->probe.not_yfs = true;
0159         if (!server->probe.is_yfs) {
0160             clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
0161             alist->addrs[index].srx_service = call->service_id;
0162         }
0163         cap0 = ntohl(call->tmp);
0164         if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
0165             set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
0166         else
0167             clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
0168     }
0169 
0170     if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
0171         rtt_us < server->probe.rtt) {
0172         server->probe.rtt = rtt_us;
0173         server->rtt = rtt_us;
0174         alist->preferred = index;
0175     }
0176 
0177     smp_wmb(); /* Set rtt before responded. */
0178     server->probe.responded = true;
0179     set_bit(index, &alist->responded);
0180     set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
0181 out:
0182     spin_unlock(&server->probe_lock);
0183 
0184     _debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
0185            &server->uuid, index, &alist->addrs[index].transport,
0186            rtt_us, ret);
0187 
0188     return afs_done_one_fs_probe(call->net, server);
0189 }
0190 
0191 /*
0192  * Probe one or all of a fileserver's addresses to find out the best route and
0193  * to query its capabilities.
0194  */
0195 void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
0196                  struct key *key, bool all)
0197 {
0198     struct afs_addr_cursor ac = {
0199         .index = 0,
0200     };
0201 
0202     _enter("%pU", &server->uuid);
0203 
0204     read_lock(&server->fs_lock);
0205     ac.alist = rcu_dereference_protected(server->addresses,
0206                          lockdep_is_held(&server->fs_lock));
0207     afs_get_addrlist(ac.alist);
0208     read_unlock(&server->fs_lock);
0209 
0210     server->probed_at = jiffies;
0211     atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
0212     memset(&server->probe, 0, sizeof(server->probe));
0213     server->probe.rtt = UINT_MAX;
0214 
0215     ac.index = ac.alist->preferred;
0216     if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
0217         all = true;
0218 
0219     if (all) {
0220         for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
0221             if (!afs_fs_get_capabilities(net, server, &ac, key))
0222                 afs_fs_probe_not_done(net, server, &ac);
0223     } else {
0224         if (!afs_fs_get_capabilities(net, server, &ac, key))
0225             afs_fs_probe_not_done(net, server, &ac);
0226     }
0227 
0228     afs_put_addrlist(ac.alist);
0229 }
0230 
0231 /*
0232  * Wait for the first as-yet untried fileserver to respond.
0233  */
0234 int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
0235 {
0236     struct wait_queue_entry *waits;
0237     struct afs_server *server;
0238     unsigned int rtt = UINT_MAX, rtt_s;
0239     bool have_responders = false;
0240     int pref = -1, i;
0241 
0242     _enter("%u,%lx", slist->nr_servers, untried);
0243 
0244     /* Only wait for servers that have a probe outstanding. */
0245     for (i = 0; i < slist->nr_servers; i++) {
0246         if (test_bit(i, &untried)) {
0247             server = slist->servers[i].server;
0248             if (!atomic_read(&server->probe_outstanding))
0249                 __clear_bit(i, &untried);
0250             if (server->probe.responded)
0251                 have_responders = true;
0252         }
0253     }
0254     if (have_responders || !untried)
0255         return 0;
0256 
0257     waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
0258     if (!waits)
0259         return -ENOMEM;
0260 
0261     for (i = 0; i < slist->nr_servers; i++) {
0262         if (test_bit(i, &untried)) {
0263             server = slist->servers[i].server;
0264             init_waitqueue_entry(&waits[i], current);
0265             add_wait_queue(&server->probe_wq, &waits[i]);
0266         }
0267     }
0268 
0269     for (;;) {
0270         bool still_probing = false;
0271 
0272         set_current_state(TASK_INTERRUPTIBLE);
0273         for (i = 0; i < slist->nr_servers; i++) {
0274             if (test_bit(i, &untried)) {
0275                 server = slist->servers[i].server;
0276                 if (server->probe.responded)
0277                     goto stop;
0278                 if (atomic_read(&server->probe_outstanding))
0279                     still_probing = true;
0280             }
0281         }
0282 
0283         if (!still_probing || signal_pending(current))
0284             goto stop;
0285         schedule();
0286     }
0287 
0288 stop:
0289     set_current_state(TASK_RUNNING);
0290 
0291     for (i = 0; i < slist->nr_servers; i++) {
0292         if (test_bit(i, &untried)) {
0293             server = slist->servers[i].server;
0294             rtt_s = READ_ONCE(server->rtt);
0295             if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
0296                 rtt_s < rtt) {
0297                 pref = i;
0298                 rtt = rtt_s;
0299             }
0300 
0301             remove_wait_queue(&server->probe_wq, &waits[i]);
0302         }
0303     }
0304 
0305     kfree(waits);
0306 
0307     if (pref == -1 && signal_pending(current))
0308         return -ERESTARTSYS;
0309 
0310     if (pref >= 0)
0311         slist->preferred = pref;
0312     return 0;
0313 }
0314 
0315 /*
0316  * Probe timer.  We have an increment on fs_outstanding that we need to pass
0317  * along to the work item.
0318  */
0319 void afs_fs_probe_timer(struct timer_list *timer)
0320 {
0321     struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
0322 
0323     if (!net->live || !queue_work(afs_wq, &net->fs_prober))
0324         afs_dec_servers_outstanding(net);
0325 }
0326 
0327 /*
0328  * Dispatch a probe to a server.
0329  */
0330 static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
0331     __releases(&net->fs_lock)
0332 {
0333     struct key *key = NULL;
0334 
0335     /* We remove it from the queues here - it will be added back to
0336      * one of the queues on the completion of the probe.
0337      */
0338     list_del_init(&server->probe_link);
0339 
0340     afs_get_server(server, afs_server_trace_get_probe);
0341     write_sequnlock(&net->fs_lock);
0342 
0343     afs_fs_probe_fileserver(net, server, key, all);
0344     afs_put_server(net, server, afs_server_trace_put_probe);
0345 }
0346 
0347 /*
0348  * Probe a server immediately without waiting for its due time to come
0349  * round.  This is used when all of the addresses have been tried.
0350  */
0351 void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
0352 {
0353     write_seqlock(&net->fs_lock);
0354     if (!list_empty(&server->probe_link))
0355         return afs_dispatch_fs_probe(net, server, true);
0356     write_sequnlock(&net->fs_lock);
0357 }
0358 
0359 /*
0360  * Probe dispatcher to regularly dispatch probes to keep NAT alive.
0361  */
0362 void afs_fs_probe_dispatcher(struct work_struct *work)
0363 {
0364     struct afs_net *net = container_of(work, struct afs_net, fs_prober);
0365     struct afs_server *fast, *slow, *server;
0366     unsigned long nowj, timer_at, poll_at;
0367     bool first_pass = true, set_timer = false;
0368 
0369     if (!net->live)
0370         return;
0371 
0372     _enter("");
0373 
0374     if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
0375         _leave(" [none]");
0376         return;
0377     }
0378 
0379 again:
0380     write_seqlock(&net->fs_lock);
0381 
0382     fast = slow = server = NULL;
0383     nowj = jiffies;
0384     timer_at = nowj + MAX_JIFFY_OFFSET;
0385 
0386     if (!list_empty(&net->fs_probe_fast)) {
0387         fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
0388         poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
0389         if (time_before(nowj, poll_at)) {
0390             timer_at = poll_at;
0391             set_timer = true;
0392             fast = NULL;
0393         }
0394     }
0395 
0396     if (!list_empty(&net->fs_probe_slow)) {
0397         slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
0398         poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
0399         if (time_before(nowj, poll_at)) {
0400             if (time_before(poll_at, timer_at))
0401                 timer_at = poll_at;
0402             set_timer = true;
0403             slow = NULL;
0404         }
0405     }
0406 
0407     server = fast ?: slow;
0408     if (server)
0409         _debug("probe %pU", &server->uuid);
0410 
0411     if (server && (first_pass || !need_resched())) {
0412         afs_dispatch_fs_probe(net, server, server == fast);
0413         first_pass = false;
0414         goto again;
0415     }
0416 
0417     write_sequnlock(&net->fs_lock);
0418 
0419     if (server) {
0420         if (!queue_work(afs_wq, &net->fs_prober))
0421             afs_dec_servers_outstanding(net);
0422         _leave(" [requeue]");
0423     } else if (set_timer) {
0424         if (timer_reduce(&net->fs_probe_timer, timer_at))
0425             afs_dec_servers_outstanding(net);
0426         _leave(" [timer]");
0427     } else {
0428         afs_dec_servers_outstanding(net);
0429         _leave(" [quiesce]");
0430     }
0431 }
0432 
0433 /*
0434  * Wait for a probe on a particular fileserver to complete for 2s.
0435  */
0436 int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
0437 {
0438     struct wait_queue_entry wait;
0439     unsigned long timo = 2 * HZ;
0440 
0441     if (atomic_read(&server->probe_outstanding) == 0)
0442         goto dont_wait;
0443 
0444     init_wait_entry(&wait, 0);
0445     for (;;) {
0446         prepare_to_wait_event(&server->probe_wq, &wait,
0447                       is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
0448         if (timo == 0 ||
0449             server->probe.responded ||
0450             atomic_read(&server->probe_outstanding) == 0 ||
0451             (is_intr && signal_pending(current)))
0452             break;
0453         timo = schedule_timeout(timo);
0454     }
0455 
0456     finish_wait(&server->probe_wq, &wait);
0457 
0458 dont_wait:
0459     if (server->probe.responded)
0460         return 0;
0461     if (is_intr && signal_pending(current))
0462         return -ERESTARTSYS;
0463     if (timo == 0)
0464         return -ETIME;
0465     return -EDESTADDRREQ;
0466 }
0467 
0468 /*
0469  * Clean up the probing when the namespace is killed off.
0470  */
0471 void afs_fs_probe_cleanup(struct afs_net *net)
0472 {
0473     if (del_timer_sync(&net->fs_probe_timer))
0474         afs_dec_servers_outstanding(net);
0475 }