0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/slab.h>
0009 #include <linux/delay.h>
0010 #include <linux/module.h>
0011 #include <asm/unaligned.h>
0012 #include <scsi/scsi.h>
0013 #include <scsi/scsi_proto.h>
0014 #include <scsi/scsi_dbg.h>
0015 #include <scsi/scsi_eh.h>
0016 #include <scsi/scsi_dh.h>
0017
0018 #define ALUA_DH_NAME "alua"
0019 #define ALUA_DH_VER "2.0"
0020
0021 #define TPGS_SUPPORT_NONE 0x00
0022 #define TPGS_SUPPORT_OPTIMIZED 0x01
0023 #define TPGS_SUPPORT_NONOPTIMIZED 0x02
0024 #define TPGS_SUPPORT_STANDBY 0x04
0025 #define TPGS_SUPPORT_UNAVAILABLE 0x08
0026 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10
0027 #define TPGS_SUPPORT_OFFLINE 0x40
0028 #define TPGS_SUPPORT_TRANSITION 0x80
0029 #define TPGS_SUPPORT_ALL 0xdf
0030
0031 #define RTPG_FMT_MASK 0x70
0032 #define RTPG_FMT_EXT_HDR 0x10
0033
0034 #define TPGS_MODE_UNINITIALIZED -1
0035 #define TPGS_MODE_NONE 0x0
0036 #define TPGS_MODE_IMPLICIT 0x1
0037 #define TPGS_MODE_EXPLICIT 0x2
0038
0039 #define ALUA_RTPG_SIZE 128
0040 #define ALUA_FAILOVER_TIMEOUT 60
0041 #define ALUA_FAILOVER_RETRIES 5
0042 #define ALUA_RTPG_DELAY_MSECS 5
0043 #define ALUA_RTPG_RETRY_DELAY 2
0044
0045
0046 #define ALUA_OPTIMIZE_STPG 0x01
0047 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02
0048
0049 #define ALUA_PG_RUN_RTPG 0x10
0050 #define ALUA_PG_RUN_STPG 0x20
0051 #define ALUA_PG_RUNNING 0x40
0052
0053 static uint optimize_stpg;
0054 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
0055 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
0056
0057 static LIST_HEAD(port_group_list);
0058 static DEFINE_SPINLOCK(port_group_lock);
0059 static struct workqueue_struct *kaluad_wq;
0060
0061 struct alua_port_group {
0062 struct kref kref;
0063 struct rcu_head rcu;
0064 struct list_head node;
0065 struct list_head dh_list;
0066 unsigned char device_id_str[256];
0067 int device_id_len;
0068 int group_id;
0069 int tpgs;
0070 int state;
0071 int pref;
0072 int valid_states;
0073 unsigned flags;
0074 unsigned char transition_tmo;
0075 unsigned long expiry;
0076 unsigned long interval;
0077 struct delayed_work rtpg_work;
0078 spinlock_t lock;
0079 struct list_head rtpg_list;
0080 struct scsi_device *rtpg_sdev;
0081 };
0082
0083 struct alua_dh_data {
0084 struct list_head node;
0085 struct alua_port_group __rcu *pg;
0086 int group_id;
0087 spinlock_t pg_lock;
0088 struct scsi_device *sdev;
0089 int init_error;
0090 struct mutex init_mutex;
0091 bool disabled;
0092 };
0093
0094 struct alua_queue_data {
0095 struct list_head entry;
0096 activate_complete callback_fn;
0097 void *callback_data;
0098 };
0099
0100 #define ALUA_POLICY_SWITCH_CURRENT 0
0101 #define ALUA_POLICY_SWITCH_ALL 1
0102
0103 static void alua_rtpg_work(struct work_struct *work);
0104 static bool alua_rtpg_queue(struct alua_port_group *pg,
0105 struct scsi_device *sdev,
0106 struct alua_queue_data *qdata, bool force);
0107 static void alua_check(struct scsi_device *sdev, bool force);
0108
0109 static void release_port_group(struct kref *kref)
0110 {
0111 struct alua_port_group *pg;
0112
0113 pg = container_of(kref, struct alua_port_group, kref);
0114 if (pg->rtpg_sdev)
0115 flush_delayed_work(&pg->rtpg_work);
0116 spin_lock(&port_group_lock);
0117 list_del(&pg->node);
0118 spin_unlock(&port_group_lock);
0119 kfree_rcu(pg, rcu);
0120 }
0121
0122
0123
0124
0125
0126 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
0127 int bufflen, struct scsi_sense_hdr *sshdr, int flags)
0128 {
0129 u8 cdb[MAX_COMMAND_SIZE];
0130 blk_opf_t req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
0131 REQ_FAILFAST_DRIVER;
0132
0133
0134 memset(cdb, 0x0, MAX_COMMAND_SIZE);
0135 cdb[0] = MAINTENANCE_IN;
0136 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
0137 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
0138 else
0139 cdb[1] = MI_REPORT_TARGET_PGS;
0140 put_unaligned_be32(bufflen, &cdb[6]);
0141
0142 return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL,
0143 sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
0144 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
0145 }
0146
0147
0148
0149
0150
0151
0152
0153
0154 static int submit_stpg(struct scsi_device *sdev, int group_id,
0155 struct scsi_sense_hdr *sshdr)
0156 {
0157 u8 cdb[MAX_COMMAND_SIZE];
0158 unsigned char stpg_data[8];
0159 int stpg_len = 8;
0160 blk_opf_t req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
0161 REQ_FAILFAST_DRIVER;
0162
0163
0164 memset(stpg_data, 0, stpg_len);
0165 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
0166 put_unaligned_be16(group_id, &stpg_data[6]);
0167
0168
0169 memset(cdb, 0x0, MAX_COMMAND_SIZE);
0170 cdb[0] = MAINTENANCE_OUT;
0171 cdb[1] = MO_SET_TARGET_PGS;
0172 put_unaligned_be32(stpg_len, &cdb[6]);
0173
0174 return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL,
0175 sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
0176 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
0177 }
0178
0179 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
0180 int group_id)
0181 {
0182 struct alua_port_group *pg;
0183
0184 if (!id_str || !id_size || !strlen(id_str))
0185 return NULL;
0186
0187 list_for_each_entry(pg, &port_group_list, node) {
0188 if (pg->group_id != group_id)
0189 continue;
0190 if (!pg->device_id_len || pg->device_id_len != id_size)
0191 continue;
0192 if (strncmp(pg->device_id_str, id_str, id_size))
0193 continue;
0194 if (!kref_get_unless_zero(&pg->kref))
0195 continue;
0196 return pg;
0197 }
0198
0199 return NULL;
0200 }
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
0212 int group_id, int tpgs)
0213 {
0214 struct alua_port_group *pg, *tmp_pg;
0215
0216 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
0217 if (!pg)
0218 return ERR_PTR(-ENOMEM);
0219
0220 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
0221 sizeof(pg->device_id_str));
0222 if (pg->device_id_len <= 0) {
0223
0224
0225
0226
0227 sdev_printk(KERN_INFO, sdev,
0228 "%s: No device descriptors found\n",
0229 ALUA_DH_NAME);
0230 pg->device_id_str[0] = '\0';
0231 pg->device_id_len = 0;
0232 }
0233 pg->group_id = group_id;
0234 pg->tpgs = tpgs;
0235 pg->state = SCSI_ACCESS_STATE_OPTIMAL;
0236 pg->valid_states = TPGS_SUPPORT_ALL;
0237 if (optimize_stpg)
0238 pg->flags |= ALUA_OPTIMIZE_STPG;
0239 kref_init(&pg->kref);
0240 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
0241 INIT_LIST_HEAD(&pg->rtpg_list);
0242 INIT_LIST_HEAD(&pg->node);
0243 INIT_LIST_HEAD(&pg->dh_list);
0244 spin_lock_init(&pg->lock);
0245
0246 spin_lock(&port_group_lock);
0247 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
0248 group_id);
0249 if (tmp_pg) {
0250 spin_unlock(&port_group_lock);
0251 kfree(pg);
0252 return tmp_pg;
0253 }
0254
0255 list_add(&pg->node, &port_group_list);
0256 spin_unlock(&port_group_lock);
0257
0258 return pg;
0259 }
0260
0261
0262
0263
0264
0265
0266
0267
0268 static int alua_check_tpgs(struct scsi_device *sdev)
0269 {
0270 int tpgs = TPGS_MODE_NONE;
0271
0272
0273
0274
0275
0276 if (sdev->type != TYPE_DISK) {
0277 sdev_printk(KERN_INFO, sdev,
0278 "%s: disable for non-disk devices\n",
0279 ALUA_DH_NAME);
0280 return tpgs;
0281 }
0282
0283 tpgs = scsi_device_tpgs(sdev);
0284 switch (tpgs) {
0285 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
0286 sdev_printk(KERN_INFO, sdev,
0287 "%s: supports implicit and explicit TPGS\n",
0288 ALUA_DH_NAME);
0289 break;
0290 case TPGS_MODE_EXPLICIT:
0291 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
0292 ALUA_DH_NAME);
0293 break;
0294 case TPGS_MODE_IMPLICIT:
0295 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
0296 ALUA_DH_NAME);
0297 break;
0298 case TPGS_MODE_NONE:
0299 sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
0300 ALUA_DH_NAME);
0301 break;
0302 default:
0303 sdev_printk(KERN_INFO, sdev,
0304 "%s: unsupported TPGS setting %d\n",
0305 ALUA_DH_NAME, tpgs);
0306 tpgs = TPGS_MODE_NONE;
0307 break;
0308 }
0309
0310 return tpgs;
0311 }
0312
0313
0314
0315
0316
0317
0318
0319
0320 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
0321 int tpgs)
0322 {
0323 int rel_port = -1, group_id;
0324 struct alua_port_group *pg, *old_pg = NULL;
0325 bool pg_updated = false;
0326 unsigned long flags;
0327
0328 group_id = scsi_vpd_tpg_id(sdev, &rel_port);
0329 if (group_id < 0) {
0330
0331
0332
0333
0334
0335 sdev_printk(KERN_INFO, sdev,
0336 "%s: No target port descriptors found\n",
0337 ALUA_DH_NAME);
0338 return SCSI_DH_DEV_UNSUPP;
0339 }
0340
0341 pg = alua_alloc_pg(sdev, group_id, tpgs);
0342 if (IS_ERR(pg)) {
0343 if (PTR_ERR(pg) == -ENOMEM)
0344 return SCSI_DH_NOMEM;
0345 return SCSI_DH_DEV_UNSUPP;
0346 }
0347 if (pg->device_id_len)
0348 sdev_printk(KERN_INFO, sdev,
0349 "%s: device %s port group %x rel port %x\n",
0350 ALUA_DH_NAME, pg->device_id_str,
0351 group_id, rel_port);
0352 else
0353 sdev_printk(KERN_INFO, sdev,
0354 "%s: port group %x rel port %x\n",
0355 ALUA_DH_NAME, group_id, rel_port);
0356
0357
0358 spin_lock(&h->pg_lock);
0359 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
0360 if (old_pg != pg) {
0361
0362 if (h->pg) {
0363 spin_lock_irqsave(&old_pg->lock, flags);
0364 list_del_rcu(&h->node);
0365 spin_unlock_irqrestore(&old_pg->lock, flags);
0366 }
0367 rcu_assign_pointer(h->pg, pg);
0368 pg_updated = true;
0369 }
0370
0371 spin_lock_irqsave(&pg->lock, flags);
0372 if (pg_updated)
0373 list_add_rcu(&h->node, &pg->dh_list);
0374 spin_unlock_irqrestore(&pg->lock, flags);
0375
0376 alua_rtpg_queue(rcu_dereference_protected(h->pg,
0377 lockdep_is_held(&h->pg_lock)),
0378 sdev, NULL, true);
0379 spin_unlock(&h->pg_lock);
0380
0381 if (old_pg)
0382 kref_put(&old_pg->kref, release_port_group);
0383
0384 return SCSI_DH_OK;
0385 }
0386
0387 static char print_alua_state(unsigned char state)
0388 {
0389 switch (state) {
0390 case SCSI_ACCESS_STATE_OPTIMAL:
0391 return 'A';
0392 case SCSI_ACCESS_STATE_ACTIVE:
0393 return 'N';
0394 case SCSI_ACCESS_STATE_STANDBY:
0395 return 'S';
0396 case SCSI_ACCESS_STATE_UNAVAILABLE:
0397 return 'U';
0398 case SCSI_ACCESS_STATE_LBA:
0399 return 'L';
0400 case SCSI_ACCESS_STATE_OFFLINE:
0401 return 'O';
0402 case SCSI_ACCESS_STATE_TRANSITIONING:
0403 return 'T';
0404 default:
0405 return 'X';
0406 }
0407 }
0408
0409 static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
0410 struct scsi_sense_hdr *sense_hdr)
0411 {
0412 struct alua_dh_data *h = sdev->handler_data;
0413 struct alua_port_group *pg;
0414
0415 switch (sense_hdr->sense_key) {
0416 case NOT_READY:
0417 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
0418
0419
0420
0421 rcu_read_lock();
0422 pg = rcu_dereference(h->pg);
0423 if (pg)
0424 pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
0425 rcu_read_unlock();
0426 alua_check(sdev, false);
0427 return NEEDS_RETRY;
0428 }
0429 break;
0430 case UNIT_ATTENTION:
0431 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
0432
0433
0434
0435
0436
0437 alua_check(sdev, true);
0438 return ADD_TO_MLQUEUE;
0439 }
0440 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
0441
0442
0443
0444 return ADD_TO_MLQUEUE;
0445 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
0446
0447
0448
0449 return ADD_TO_MLQUEUE;
0450 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
0451
0452
0453
0454 alua_check(sdev, true);
0455 return ADD_TO_MLQUEUE;
0456 }
0457 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
0458
0459
0460
0461 alua_check(sdev, true);
0462 return ADD_TO_MLQUEUE;
0463 }
0464 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
0465
0466
0467
0468 return ADD_TO_MLQUEUE;
0469 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
0470
0471
0472
0473
0474
0475 return ADD_TO_MLQUEUE;
0476 break;
0477 }
0478
0479 return SCSI_RETURN_NOT_HANDLED;
0480 }
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490 static int alua_tur(struct scsi_device *sdev)
0491 {
0492 struct scsi_sense_hdr sense_hdr;
0493 int retval;
0494
0495 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
0496 ALUA_FAILOVER_RETRIES, &sense_hdr);
0497 if (sense_hdr.sense_key == NOT_READY &&
0498 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
0499 return SCSI_DH_RETRY;
0500 else if (retval)
0501 return SCSI_DH_IO;
0502 else
0503 return SCSI_DH_OK;
0504 }
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
0515 {
0516 struct scsi_sense_hdr sense_hdr;
0517 struct alua_port_group *tmp_pg;
0518 int len, k, off, bufflen = ALUA_RTPG_SIZE;
0519 int group_id_old, state_old, pref_old, valid_states_old;
0520 unsigned char *desc, *buff;
0521 unsigned err;
0522 int retval;
0523 unsigned int tpg_desc_tbl_off;
0524 unsigned char orig_transition_tmo;
0525 unsigned long flags;
0526 bool transitioning_sense = false;
0527
0528 group_id_old = pg->group_id;
0529 state_old = pg->state;
0530 pref_old = pg->pref;
0531 valid_states_old = pg->valid_states;
0532
0533 if (!pg->expiry) {
0534 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
0535
0536 if (pg->transition_tmo)
0537 transition_tmo = pg->transition_tmo * HZ;
0538
0539 pg->expiry = round_jiffies_up(jiffies + transition_tmo);
0540 }
0541
0542 buff = kzalloc(bufflen, GFP_KERNEL);
0543 if (!buff)
0544 return SCSI_DH_DEV_TEMP_BUSY;
0545
0546 retry:
0547 err = 0;
0548 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
0549
0550 if (retval) {
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560 if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
0561 sdev_printk(KERN_INFO, sdev,
0562 "%s: ignoring rtpg result %d\n",
0563 ALUA_DH_NAME, retval);
0564 kfree(buff);
0565 return SCSI_DH_OK;
0566 }
0567 if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
0568 sdev_printk(KERN_INFO, sdev,
0569 "%s: rtpg failed, result %d\n",
0570 ALUA_DH_NAME, retval);
0571 kfree(buff);
0572 if (retval < 0)
0573 return SCSI_DH_DEV_TEMP_BUSY;
0574 if (host_byte(retval) == DID_NO_CONNECT)
0575 return SCSI_DH_RES_TEMP_UNAVAIL;
0576 return SCSI_DH_IO;
0577 }
0578
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
0590 sense_hdr.sense_key == ILLEGAL_REQUEST) {
0591 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
0592 goto retry;
0593 }
0594
0595
0596
0597
0598
0599 if (sense_hdr.sense_key == NOT_READY &&
0600 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
0601 transitioning_sense = true;
0602 goto skip_rtpg;
0603 }
0604
0605
0606
0607 if (sense_hdr.sense_key == UNIT_ATTENTION)
0608 err = SCSI_DH_RETRY;
0609 if (err == SCSI_DH_RETRY &&
0610 pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
0611 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
0612 ALUA_DH_NAME);
0613 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
0614 kfree(buff);
0615 return err;
0616 }
0617 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
0618 ALUA_DH_NAME);
0619 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
0620 kfree(buff);
0621 pg->expiry = 0;
0622 return SCSI_DH_IO;
0623 }
0624
0625 len = get_unaligned_be32(&buff[0]) + 4;
0626
0627 if (len > bufflen) {
0628
0629 kfree(buff);
0630 bufflen = len;
0631 buff = kmalloc(bufflen, GFP_KERNEL);
0632 if (!buff) {
0633 sdev_printk(KERN_WARNING, sdev,
0634 "%s: kmalloc buffer failed\n",__func__);
0635
0636 pg->expiry = 0;
0637 return SCSI_DH_DEV_TEMP_BUSY;
0638 }
0639 goto retry;
0640 }
0641
0642 orig_transition_tmo = pg->transition_tmo;
0643 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
0644 pg->transition_tmo = buff[5];
0645 else
0646 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
0647
0648 if (orig_transition_tmo != pg->transition_tmo) {
0649 sdev_printk(KERN_INFO, sdev,
0650 "%s: transition timeout set to %d seconds\n",
0651 ALUA_DH_NAME, pg->transition_tmo);
0652 pg->expiry = jiffies + pg->transition_tmo * HZ;
0653 }
0654
0655 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
0656 tpg_desc_tbl_off = 8;
0657 else
0658 tpg_desc_tbl_off = 4;
0659
0660 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
0661 k < len;
0662 k += off, desc += off) {
0663 u16 group_id = get_unaligned_be16(&desc[2]);
0664
0665 spin_lock_irqsave(&port_group_lock, flags);
0666 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
0667 group_id);
0668 spin_unlock_irqrestore(&port_group_lock, flags);
0669 if (tmp_pg) {
0670 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
0671 if ((tmp_pg == pg) ||
0672 !(tmp_pg->flags & ALUA_PG_RUNNING)) {
0673 struct alua_dh_data *h;
0674
0675 tmp_pg->state = desc[0] & 0x0f;
0676 tmp_pg->pref = desc[0] >> 7;
0677 rcu_read_lock();
0678 list_for_each_entry_rcu(h,
0679 &tmp_pg->dh_list, node) {
0680 if (!h->sdev)
0681 continue;
0682 h->sdev->access_state = desc[0];
0683 }
0684 rcu_read_unlock();
0685 }
0686 if (tmp_pg == pg)
0687 tmp_pg->valid_states = desc[1];
0688 spin_unlock_irqrestore(&tmp_pg->lock, flags);
0689 }
0690 kref_put(&tmp_pg->kref, release_port_group);
0691 }
0692 off = 8 + (desc[7] * 4);
0693 }
0694
0695 skip_rtpg:
0696 spin_lock_irqsave(&pg->lock, flags);
0697 if (transitioning_sense)
0698 pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
0699
0700 if (group_id_old != pg->group_id || state_old != pg->state ||
0701 pref_old != pg->pref || valid_states_old != pg->valid_states)
0702 sdev_printk(KERN_INFO, sdev,
0703 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
0704 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
0705 pg->pref ? "preferred" : "non-preferred",
0706 pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
0707 pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
0708 pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
0709 pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
0710 pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
0711 pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
0712 pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
0713
0714 switch (pg->state) {
0715 case SCSI_ACCESS_STATE_TRANSITIONING:
0716 if (time_before(jiffies, pg->expiry)) {
0717
0718 pg->interval = ALUA_RTPG_RETRY_DELAY;
0719 err = SCSI_DH_RETRY;
0720 } else {
0721 struct alua_dh_data *h;
0722
0723
0724 err = SCSI_DH_IO;
0725 pg->state = SCSI_ACCESS_STATE_STANDBY;
0726 pg->expiry = 0;
0727 rcu_read_lock();
0728 list_for_each_entry_rcu(h, &pg->dh_list, node) {
0729 if (!h->sdev)
0730 continue;
0731 h->sdev->access_state =
0732 (pg->state & SCSI_ACCESS_STATE_MASK);
0733 if (pg->pref)
0734 h->sdev->access_state |=
0735 SCSI_ACCESS_STATE_PREFERRED;
0736 }
0737 rcu_read_unlock();
0738 }
0739 break;
0740 case SCSI_ACCESS_STATE_OFFLINE:
0741
0742 err = SCSI_DH_DEV_OFFLINED;
0743 pg->expiry = 0;
0744 break;
0745 default:
0746
0747 err = SCSI_DH_OK;
0748 pg->expiry = 0;
0749 break;
0750 }
0751 spin_unlock_irqrestore(&pg->lock, flags);
0752 kfree(buff);
0753 return err;
0754 }
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
0765 {
0766 int retval;
0767 struct scsi_sense_hdr sense_hdr;
0768
0769 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
0770
0771 return SCSI_DH_RETRY;
0772 }
0773 switch (pg->state) {
0774 case SCSI_ACCESS_STATE_OPTIMAL:
0775 return SCSI_DH_OK;
0776 case SCSI_ACCESS_STATE_ACTIVE:
0777 if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
0778 !pg->pref &&
0779 (pg->tpgs & TPGS_MODE_IMPLICIT))
0780 return SCSI_DH_OK;
0781 break;
0782 case SCSI_ACCESS_STATE_STANDBY:
0783 case SCSI_ACCESS_STATE_UNAVAILABLE:
0784 break;
0785 case SCSI_ACCESS_STATE_OFFLINE:
0786 return SCSI_DH_IO;
0787 case SCSI_ACCESS_STATE_TRANSITIONING:
0788 break;
0789 default:
0790 sdev_printk(KERN_INFO, sdev,
0791 "%s: stpg failed, unhandled TPGS state %d",
0792 ALUA_DH_NAME, pg->state);
0793 return SCSI_DH_NOSYS;
0794 }
0795 retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
0796
0797 if (retval) {
0798 if (retval < 0 || !scsi_sense_valid(&sense_hdr)) {
0799 sdev_printk(KERN_INFO, sdev,
0800 "%s: stpg failed, result %d",
0801 ALUA_DH_NAME, retval);
0802 if (retval < 0)
0803 return SCSI_DH_DEV_TEMP_BUSY;
0804 } else {
0805 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
0806 ALUA_DH_NAME);
0807 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
0808 }
0809 }
0810
0811 return SCSI_DH_RETRY;
0812 }
0813
0814 static bool alua_rtpg_select_sdev(struct alua_port_group *pg)
0815 {
0816 struct alua_dh_data *h;
0817 struct scsi_device *sdev = NULL;
0818
0819 lockdep_assert_held(&pg->lock);
0820 if (WARN_ON(!pg->rtpg_sdev))
0821 return false;
0822
0823
0824
0825
0826
0827 rcu_read_lock();
0828 list_for_each_entry_rcu(h, &pg->dh_list, node) {
0829 if (!h->sdev)
0830 continue;
0831 if (h->sdev == pg->rtpg_sdev) {
0832 h->disabled = true;
0833 continue;
0834 }
0835 if (rcu_dereference(h->pg) == pg &&
0836 !h->disabled &&
0837 !scsi_device_get(h->sdev)) {
0838 sdev = h->sdev;
0839 break;
0840 }
0841 }
0842 rcu_read_unlock();
0843
0844 if (!sdev) {
0845 pr_warn("%s: no device found for rtpg\n",
0846 (pg->device_id_len ?
0847 (char *)pg->device_id_str : "(nameless PG)"));
0848 return false;
0849 }
0850
0851 sdev_printk(KERN_INFO, sdev, "rtpg retry on different device\n");
0852
0853 scsi_device_put(pg->rtpg_sdev);
0854 pg->rtpg_sdev = sdev;
0855
0856 return true;
0857 }
0858
0859 static void alua_rtpg_work(struct work_struct *work)
0860 {
0861 struct alua_port_group *pg =
0862 container_of(work, struct alua_port_group, rtpg_work.work);
0863 struct scsi_device *sdev;
0864 LIST_HEAD(qdata_list);
0865 int err = SCSI_DH_OK;
0866 struct alua_queue_data *qdata, *tmp;
0867 struct alua_dh_data *h;
0868 unsigned long flags;
0869
0870 spin_lock_irqsave(&pg->lock, flags);
0871 sdev = pg->rtpg_sdev;
0872 if (!sdev) {
0873 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
0874 WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
0875 spin_unlock_irqrestore(&pg->lock, flags);
0876 kref_put(&pg->kref, release_port_group);
0877 return;
0878 }
0879 pg->flags |= ALUA_PG_RUNNING;
0880 if (pg->flags & ALUA_PG_RUN_RTPG) {
0881 int state = pg->state;
0882
0883 pg->flags &= ~ALUA_PG_RUN_RTPG;
0884 spin_unlock_irqrestore(&pg->lock, flags);
0885 if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
0886 if (alua_tur(sdev) == SCSI_DH_RETRY) {
0887 spin_lock_irqsave(&pg->lock, flags);
0888 pg->flags &= ~ALUA_PG_RUNNING;
0889 pg->flags |= ALUA_PG_RUN_RTPG;
0890 if (!pg->interval)
0891 pg->interval = ALUA_RTPG_RETRY_DELAY;
0892 spin_unlock_irqrestore(&pg->lock, flags);
0893 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
0894 pg->interval * HZ);
0895 return;
0896 }
0897
0898 }
0899 err = alua_rtpg(sdev, pg);
0900 spin_lock_irqsave(&pg->lock, flags);
0901
0902
0903 if (err == SCSI_DH_RES_TEMP_UNAVAIL &&
0904 alua_rtpg_select_sdev(pg))
0905 err = SCSI_DH_IMM_RETRY;
0906
0907 if (err == SCSI_DH_RETRY || err == SCSI_DH_IMM_RETRY ||
0908 pg->flags & ALUA_PG_RUN_RTPG) {
0909 pg->flags &= ~ALUA_PG_RUNNING;
0910 if (err == SCSI_DH_IMM_RETRY)
0911 pg->interval = 0;
0912 else if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG))
0913 pg->interval = ALUA_RTPG_RETRY_DELAY;
0914 pg->flags |= ALUA_PG_RUN_RTPG;
0915 spin_unlock_irqrestore(&pg->lock, flags);
0916 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
0917 pg->interval * HZ);
0918 return;
0919 }
0920 if (err != SCSI_DH_OK)
0921 pg->flags &= ~ALUA_PG_RUN_STPG;
0922 }
0923 if (pg->flags & ALUA_PG_RUN_STPG) {
0924 pg->flags &= ~ALUA_PG_RUN_STPG;
0925 spin_unlock_irqrestore(&pg->lock, flags);
0926 err = alua_stpg(sdev, pg);
0927 spin_lock_irqsave(&pg->lock, flags);
0928 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
0929 pg->flags |= ALUA_PG_RUN_RTPG;
0930 pg->interval = 0;
0931 pg->flags &= ~ALUA_PG_RUNNING;
0932 spin_unlock_irqrestore(&pg->lock, flags);
0933 queue_delayed_work(kaluad_wq, &pg->rtpg_work,
0934 pg->interval * HZ);
0935 return;
0936 }
0937 }
0938
0939 list_splice_init(&pg->rtpg_list, &qdata_list);
0940
0941
0942
0943
0944 list_for_each_entry(h, &pg->dh_list, node)
0945 h->disabled = false;
0946 pg->rtpg_sdev = NULL;
0947 spin_unlock_irqrestore(&pg->lock, flags);
0948
0949 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
0950 list_del(&qdata->entry);
0951 if (qdata->callback_fn)
0952 qdata->callback_fn(qdata->callback_data, err);
0953 kfree(qdata);
0954 }
0955 spin_lock_irqsave(&pg->lock, flags);
0956 pg->flags &= ~ALUA_PG_RUNNING;
0957 spin_unlock_irqrestore(&pg->lock, flags);
0958 scsi_device_put(sdev);
0959 kref_put(&pg->kref, release_port_group);
0960 }
0961
0962
0963
0964
0965
0966
0967
0968
0969
0970
0971
0972
0973 static bool alua_rtpg_queue(struct alua_port_group *pg,
0974 struct scsi_device *sdev,
0975 struct alua_queue_data *qdata, bool force)
0976 {
0977 int start_queue = 0;
0978 unsigned long flags;
0979 if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
0980 return false;
0981
0982 spin_lock_irqsave(&pg->lock, flags);
0983 if (qdata) {
0984 list_add_tail(&qdata->entry, &pg->rtpg_list);
0985 pg->flags |= ALUA_PG_RUN_STPG;
0986 force = true;
0987 }
0988 if (pg->rtpg_sdev == NULL) {
0989 pg->interval = 0;
0990 pg->flags |= ALUA_PG_RUN_RTPG;
0991 kref_get(&pg->kref);
0992 pg->rtpg_sdev = sdev;
0993 start_queue = 1;
0994 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
0995 pg->flags |= ALUA_PG_RUN_RTPG;
0996
0997 if (!(pg->flags & ALUA_PG_RUNNING)) {
0998 kref_get(&pg->kref);
0999 start_queue = 1;
1000 }
1001 }
1002
1003 spin_unlock_irqrestore(&pg->lock, flags);
1004
1005 if (start_queue) {
1006 if (queue_delayed_work(kaluad_wq, &pg->rtpg_work,
1007 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
1008 sdev = NULL;
1009 else
1010 kref_put(&pg->kref, release_port_group);
1011 }
1012 if (sdev)
1013 scsi_device_put(sdev);
1014
1015 return true;
1016 }
1017
1018
1019
1020
1021
1022
1023
1024
1025 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
1026 {
1027 int err = SCSI_DH_DEV_UNSUPP, tpgs;
1028
1029 mutex_lock(&h->init_mutex);
1030 h->disabled = false;
1031 tpgs = alua_check_tpgs(sdev);
1032 if (tpgs != TPGS_MODE_NONE)
1033 err = alua_check_vpd(sdev, h, tpgs);
1034 h->init_error = err;
1035 mutex_unlock(&h->init_mutex);
1036 return err;
1037 }
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 static int alua_set_params(struct scsi_device *sdev, const char *params)
1048 {
1049 struct alua_dh_data *h = sdev->handler_data;
1050 struct alua_port_group *pg = NULL;
1051 unsigned int optimize = 0, argc;
1052 const char *p = params;
1053 int result = SCSI_DH_OK;
1054 unsigned long flags;
1055
1056 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
1057 return -EINVAL;
1058
1059 while (*p++)
1060 ;
1061 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
1062 return -EINVAL;
1063
1064 rcu_read_lock();
1065 pg = rcu_dereference(h->pg);
1066 if (!pg) {
1067 rcu_read_unlock();
1068 return -ENXIO;
1069 }
1070 spin_lock_irqsave(&pg->lock, flags);
1071 if (optimize)
1072 pg->flags |= ALUA_OPTIMIZE_STPG;
1073 else
1074 pg->flags &= ~ALUA_OPTIMIZE_STPG;
1075 spin_unlock_irqrestore(&pg->lock, flags);
1076 rcu_read_unlock();
1077
1078 return result;
1079 }
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091 static int alua_activate(struct scsi_device *sdev,
1092 activate_complete fn, void *data)
1093 {
1094 struct alua_dh_data *h = sdev->handler_data;
1095 int err = SCSI_DH_OK;
1096 struct alua_queue_data *qdata;
1097 struct alua_port_group *pg;
1098
1099 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
1100 if (!qdata) {
1101 err = SCSI_DH_RES_TEMP_UNAVAIL;
1102 goto out;
1103 }
1104 qdata->callback_fn = fn;
1105 qdata->callback_data = data;
1106
1107 mutex_lock(&h->init_mutex);
1108 rcu_read_lock();
1109 pg = rcu_dereference(h->pg);
1110 if (!pg || !kref_get_unless_zero(&pg->kref)) {
1111 rcu_read_unlock();
1112 kfree(qdata);
1113 err = h->init_error;
1114 mutex_unlock(&h->init_mutex);
1115 goto out;
1116 }
1117 rcu_read_unlock();
1118 mutex_unlock(&h->init_mutex);
1119
1120 if (alua_rtpg_queue(pg, sdev, qdata, true))
1121 fn = NULL;
1122 else
1123 err = SCSI_DH_DEV_OFFLINED;
1124 kref_put(&pg->kref, release_port_group);
1125 out:
1126 if (fn)
1127 fn(data, err);
1128 return 0;
1129 }
1130
1131
1132
1133
1134
1135
1136
1137 static void alua_check(struct scsi_device *sdev, bool force)
1138 {
1139 struct alua_dh_data *h = sdev->handler_data;
1140 struct alua_port_group *pg;
1141
1142 rcu_read_lock();
1143 pg = rcu_dereference(h->pg);
1144 if (!pg || !kref_get_unless_zero(&pg->kref)) {
1145 rcu_read_unlock();
1146 return;
1147 }
1148 rcu_read_unlock();
1149 alua_rtpg_queue(pg, sdev, NULL, force);
1150 kref_put(&pg->kref, release_port_group);
1151 }
1152
1153
1154
1155
1156
1157
1158
1159 static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
1160 {
1161 struct alua_dh_data *h = sdev->handler_data;
1162 struct alua_port_group *pg;
1163 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
1164
1165 rcu_read_lock();
1166 pg = rcu_dereference(h->pg);
1167 if (pg)
1168 state = pg->state;
1169 rcu_read_unlock();
1170
1171 switch (state) {
1172 case SCSI_ACCESS_STATE_OPTIMAL:
1173 case SCSI_ACCESS_STATE_ACTIVE:
1174 case SCSI_ACCESS_STATE_LBA:
1175 case SCSI_ACCESS_STATE_TRANSITIONING:
1176 return BLK_STS_OK;
1177 default:
1178 req->rq_flags |= RQF_QUIET;
1179 return BLK_STS_IOERR;
1180 }
1181 }
1182
1183 static void alua_rescan(struct scsi_device *sdev)
1184 {
1185 struct alua_dh_data *h = sdev->handler_data;
1186
1187 alua_initialize(sdev, h);
1188 }
1189
1190
1191
1192
1193
1194 static int alua_bus_attach(struct scsi_device *sdev)
1195 {
1196 struct alua_dh_data *h;
1197 int err;
1198
1199 h = kzalloc(sizeof(*h) , GFP_KERNEL);
1200 if (!h)
1201 return SCSI_DH_NOMEM;
1202 spin_lock_init(&h->pg_lock);
1203 rcu_assign_pointer(h->pg, NULL);
1204 h->init_error = SCSI_DH_OK;
1205 h->sdev = sdev;
1206 INIT_LIST_HEAD(&h->node);
1207
1208 mutex_init(&h->init_mutex);
1209 err = alua_initialize(sdev, h);
1210 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
1211 goto failed;
1212
1213 sdev->handler_data = h;
1214 return SCSI_DH_OK;
1215 failed:
1216 kfree(h);
1217 return err;
1218 }
1219
1220
1221
1222
1223
1224 static void alua_bus_detach(struct scsi_device *sdev)
1225 {
1226 struct alua_dh_data *h = sdev->handler_data;
1227 struct alua_port_group *pg;
1228
1229 spin_lock(&h->pg_lock);
1230 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
1231 rcu_assign_pointer(h->pg, NULL);
1232 spin_unlock(&h->pg_lock);
1233 if (pg) {
1234 spin_lock_irq(&pg->lock);
1235 list_del_rcu(&h->node);
1236 spin_unlock_irq(&pg->lock);
1237 kref_put(&pg->kref, release_port_group);
1238 }
1239 sdev->handler_data = NULL;
1240 synchronize_rcu();
1241 kfree(h);
1242 }
1243
1244 static struct scsi_device_handler alua_dh = {
1245 .name = ALUA_DH_NAME,
1246 .module = THIS_MODULE,
1247 .attach = alua_bus_attach,
1248 .detach = alua_bus_detach,
1249 .prep_fn = alua_prep_fn,
1250 .check_sense = alua_check_sense,
1251 .activate = alua_activate,
1252 .rescan = alua_rescan,
1253 .set_params = alua_set_params,
1254 };
1255
1256 static int __init alua_init(void)
1257 {
1258 int r;
1259
1260 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0);
1261 if (!kaluad_wq)
1262 return -ENOMEM;
1263
1264 r = scsi_register_device_handler(&alua_dh);
1265 if (r != 0) {
1266 printk(KERN_ERR "%s: Failed to register scsi device handler",
1267 ALUA_DH_NAME);
1268 destroy_workqueue(kaluad_wq);
1269 }
1270 return r;
1271 }
1272
1273 static void __exit alua_exit(void)
1274 {
1275 scsi_unregister_device_handler(&alua_dh);
1276 destroy_workqueue(kaluad_wq);
1277 }
1278
1279 module_init(alua_init);
1280 module_exit(alua_exit);
1281
1282 MODULE_DESCRIPTION("DM Multipath ALUA support");
1283 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
1284 MODULE_LICENSE("GPL");
1285 MODULE_VERSION(ALUA_DH_VER);