Back to home page

OSCL-LXR

 
 

    


0001 
0002 /*
0003  * edac_device.c
0004  * (C) 2007 www.douglaskthompson.com
0005  *
0006  * This file may be distributed under the terms of the
0007  * GNU General Public License.
0008  *
0009  * Written by Doug Thompson <norsk5@xmission.com>
0010  *
0011  * edac_device API implementation
0012  * 19 Jan 2007
0013  */
0014 
0015 #include <asm/page.h>
0016 #include <linux/uaccess.h>
0017 #include <linux/ctype.h>
0018 #include <linux/highmem.h>
0019 #include <linux/init.h>
0020 #include <linux/jiffies.h>
0021 #include <linux/module.h>
0022 #include <linux/slab.h>
0023 #include <linux/smp.h>
0024 #include <linux/spinlock.h>
0025 #include <linux/sysctl.h>
0026 #include <linux/timer.h>
0027 
0028 #include "edac_device.h"
0029 #include "edac_module.h"
0030 
0031 /* lock for the list: 'edac_device_list', manipulation of this list
0032  * is protected by the 'device_ctls_mutex' lock
0033  */
0034 static DEFINE_MUTEX(device_ctls_mutex);
0035 static LIST_HEAD(edac_device_list);
0036 
0037 #ifdef CONFIG_EDAC_DEBUG
0038 static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
0039 {
0040     edac_dbg(3, "\tedac_dev = %p dev_idx=%d\n",
0041          edac_dev, edac_dev->dev_idx);
0042     edac_dbg(4, "\tedac_dev->edac_check = %p\n", edac_dev->edac_check);
0043     edac_dbg(3, "\tdev = %p\n", edac_dev->dev);
0044     edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
0045          edac_dev->mod_name, edac_dev->ctl_name);
0046     edac_dbg(3, "\tpvt_info = %p\n\n", edac_dev->pvt_info);
0047 }
0048 #endif              /* CONFIG_EDAC_DEBUG */
0049 
0050 /*
0051  * @off_val: zero, 1, or other based offset
0052  */
0053 struct edac_device_ctl_info *
0054 edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instances,
0055                char *blk_name, unsigned nr_blocks, unsigned off_val,
0056                struct edac_dev_sysfs_block_attribute *attrib_spec,
0057                unsigned nr_attrib, int device_index)
0058 {
0059     struct edac_dev_sysfs_block_attribute *dev_attrib, *attrib_p, *attrib;
0060     struct edac_device_block *dev_blk, *blk_p, *blk;
0061     struct edac_device_instance *dev_inst, *inst;
0062     struct edac_device_ctl_info *dev_ctl;
0063     unsigned instance, block, attr;
0064     void *pvt;
0065     int err;
0066 
0067     edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks);
0068 
0069     dev_ctl = kzalloc(sizeof(struct edac_device_ctl_info), GFP_KERNEL);
0070     if (!dev_ctl)
0071         return NULL;
0072 
0073     dev_inst = kcalloc(nr_instances, sizeof(struct edac_device_instance), GFP_KERNEL);
0074     if (!dev_inst)
0075         goto free;
0076 
0077     dev_ctl->instances = dev_inst;
0078 
0079     dev_blk = kcalloc(nr_instances * nr_blocks, sizeof(struct edac_device_block), GFP_KERNEL);
0080     if (!dev_blk)
0081         goto free;
0082 
0083     dev_ctl->blocks = dev_blk;
0084 
0085     if (nr_attrib) {
0086         dev_attrib = kcalloc(nr_attrib, sizeof(struct edac_dev_sysfs_block_attribute),
0087                      GFP_KERNEL);
0088         if (!dev_attrib)
0089             goto free;
0090 
0091         dev_ctl->attribs = dev_attrib;
0092     }
0093 
0094     if (pvt_sz) {
0095         pvt = kzalloc(pvt_sz, GFP_KERNEL);
0096         if (!pvt)
0097             goto free;
0098 
0099         dev_ctl->pvt_info = pvt;
0100     }
0101 
0102     dev_ctl->dev_idx    = device_index;
0103     dev_ctl->nr_instances   = nr_instances;
0104 
0105     /* Default logging of CEs and UEs */
0106     dev_ctl->log_ce = 1;
0107     dev_ctl->log_ue = 1;
0108 
0109     /* Name of this edac device */
0110     snprintf(dev_ctl->name, sizeof(dev_ctl->name),"%s", dev_name);
0111 
0112     /* Initialize every Instance */
0113     for (instance = 0; instance < nr_instances; instance++) {
0114         inst = &dev_inst[instance];
0115         inst->ctl = dev_ctl;
0116         inst->nr_blocks = nr_blocks;
0117         blk_p = &dev_blk[instance * nr_blocks];
0118         inst->blocks = blk_p;
0119 
0120         /* name of this instance */
0121         snprintf(inst->name, sizeof(inst->name), "%s%u", dev_name, instance);
0122 
0123         /* Initialize every block in each instance */
0124         for (block = 0; block < nr_blocks; block++) {
0125             blk = &blk_p[block];
0126             blk->instance = inst;
0127             snprintf(blk->name, sizeof(blk->name),
0128                  "%s%d", blk_name, block + off_val);
0129 
0130             edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n",
0131                  instance, inst, block, blk, blk->name);
0132 
0133             /* if there are NO attributes OR no attribute pointer
0134              * then continue on to next block iteration
0135              */
0136             if ((nr_attrib == 0) || (attrib_spec == NULL))
0137                 continue;
0138 
0139             /* setup the attribute array for this block */
0140             blk->nr_attribs = nr_attrib;
0141             attrib_p = &dev_attrib[block*nr_instances*nr_attrib];
0142             blk->block_attributes = attrib_p;
0143 
0144             edac_dbg(4, "THIS BLOCK_ATTRIB=%p\n",
0145                  blk->block_attributes);
0146 
0147             /* Initialize every user specified attribute in this
0148              * block with the data the caller passed in
0149              * Each block gets its own copy of pointers,
0150              * and its unique 'value'
0151              */
0152             for (attr = 0; attr < nr_attrib; attr++) {
0153                 attrib = &attrib_p[attr];
0154 
0155                 /* populate the unique per attrib
0156                  * with the code pointers and info
0157                  */
0158                 attrib->attr = attrib_spec[attr].attr;
0159                 attrib->show = attrib_spec[attr].show;
0160                 attrib->store = attrib_spec[attr].store;
0161 
0162                 attrib->block = blk;    /* up link */
0163 
0164                 edac_dbg(4, "alloc-attrib=%p attrib_name='%s' attrib-spec=%p spec-name=%s\n",
0165                      attrib, attrib->attr.name,
0166                      &attrib_spec[attr],
0167                      attrib_spec[attr].attr.name
0168                     );
0169             }
0170         }
0171     }
0172 
0173     /* Mark this instance as merely ALLOCATED */
0174     dev_ctl->op_state = OP_ALLOC;
0175 
0176     /*
0177      * Initialize the 'root' kobj for the edac_device controller
0178      */
0179     err = edac_device_register_sysfs_main_kobj(dev_ctl);
0180     if (err)
0181         goto free;
0182 
0183     /* at this point, the root kobj is valid, and in order to
0184      * 'free' the object, then the function:
0185      *  edac_device_unregister_sysfs_main_kobj() must be called
0186      * which will perform kobj unregistration and the actual free
0187      * will occur during the kobject callback operation
0188      */
0189 
0190     return dev_ctl;
0191 
0192 free:
0193     __edac_device_free_ctl_info(dev_ctl);
0194 
0195     return NULL;
0196 }
0197 EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info);
0198 
0199 void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info)
0200 {
0201     edac_device_unregister_sysfs_main_kobj(ctl_info);
0202 }
0203 EXPORT_SYMBOL_GPL(edac_device_free_ctl_info);
0204 
0205 /*
0206  * find_edac_device_by_dev
0207  *  scans the edac_device list for a specific 'struct device *'
0208  *
0209  *  lock to be held prior to call:  device_ctls_mutex
0210  *
0211  *  Return:
0212  *      pointer to control structure managing 'dev'
0213  *      NULL if not found on list
0214  */
0215 static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev)
0216 {
0217     struct edac_device_ctl_info *edac_dev;
0218     struct list_head *item;
0219 
0220     edac_dbg(0, "\n");
0221 
0222     list_for_each(item, &edac_device_list) {
0223         edac_dev = list_entry(item, struct edac_device_ctl_info, link);
0224 
0225         if (edac_dev->dev == dev)
0226             return edac_dev;
0227     }
0228 
0229     return NULL;
0230 }
0231 
0232 /*
0233  * add_edac_dev_to_global_list
0234  *  Before calling this function, caller must
0235  *  assign a unique value to edac_dev->dev_idx.
0236  *
0237  *  lock to be held prior to call:  device_ctls_mutex
0238  *
0239  *  Return:
0240  *      0 on success
0241  *      1 on failure.
0242  */
0243 static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev)
0244 {
0245     struct list_head *item, *insert_before;
0246     struct edac_device_ctl_info *rover;
0247 
0248     insert_before = &edac_device_list;
0249 
0250     /* Determine if already on the list */
0251     rover = find_edac_device_by_dev(edac_dev->dev);
0252     if (unlikely(rover != NULL))
0253         goto fail0;
0254 
0255     /* Insert in ascending order by 'dev_idx', so find position */
0256     list_for_each(item, &edac_device_list) {
0257         rover = list_entry(item, struct edac_device_ctl_info, link);
0258 
0259         if (rover->dev_idx >= edac_dev->dev_idx) {
0260             if (unlikely(rover->dev_idx == edac_dev->dev_idx))
0261                 goto fail1;
0262 
0263             insert_before = item;
0264             break;
0265         }
0266     }
0267 
0268     list_add_tail_rcu(&edac_dev->link, insert_before);
0269     return 0;
0270 
0271 fail0:
0272     edac_printk(KERN_WARNING, EDAC_MC,
0273             "%s (%s) %s %s already assigned %d\n",
0274             dev_name(rover->dev), edac_dev_name(rover),
0275             rover->mod_name, rover->ctl_name, rover->dev_idx);
0276     return 1;
0277 
0278 fail1:
0279     edac_printk(KERN_WARNING, EDAC_MC,
0280             "bug in low-level driver: attempt to assign\n"
0281             "    duplicate dev_idx %d in %s()\n", rover->dev_idx,
0282             __func__);
0283     return 1;
0284 }
0285 
0286 /*
0287  * del_edac_device_from_global_list
0288  */
0289 static void del_edac_device_from_global_list(struct edac_device_ctl_info
0290                         *edac_device)
0291 {
0292     list_del_rcu(&edac_device->link);
0293 
0294     /* these are for safe removal of devices from global list while
0295      * NMI handlers may be traversing list
0296      */
0297     synchronize_rcu();
0298     INIT_LIST_HEAD(&edac_device->link);
0299 }
0300 
0301 /*
0302  * edac_device_workq_function
0303  *  performs the operation scheduled by a workq request
0304  *
0305  *  this workq is embedded within an edac_device_ctl_info
0306  *  structure, that needs to be polled for possible error events.
0307  *
0308  *  This operation is to acquire the list mutex lock
0309  *  (thus preventing insertation or deletion)
0310  *  and then call the device's poll function IFF this device is
0311  *  running polled and there is a poll function defined.
0312  */
0313 static void edac_device_workq_function(struct work_struct *work_req)
0314 {
0315     struct delayed_work *d_work = to_delayed_work(work_req);
0316     struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work);
0317 
0318     mutex_lock(&device_ctls_mutex);
0319 
0320     /* If we are being removed, bail out immediately */
0321     if (edac_dev->op_state == OP_OFFLINE) {
0322         mutex_unlock(&device_ctls_mutex);
0323         return;
0324     }
0325 
0326     /* Only poll controllers that are running polled and have a check */
0327     if ((edac_dev->op_state == OP_RUNNING_POLL) &&
0328         (edac_dev->edac_check != NULL)) {
0329             edac_dev->edac_check(edac_dev);
0330     }
0331 
0332     mutex_unlock(&device_ctls_mutex);
0333 
0334     /* Reschedule the workq for the next time period to start again
0335      * if the number of msec is for 1 sec, then adjust to the next
0336      * whole one second to save timers firing all over the period
0337      * between integral seconds
0338      */
0339     if (edac_dev->poll_msec == 1000)
0340         edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
0341     else
0342         edac_queue_work(&edac_dev->work, edac_dev->delay);
0343 }
0344 
0345 /*
0346  * edac_device_workq_setup
0347  *  initialize a workq item for this edac_device instance
0348  *  passing in the new delay period in msec
0349  */
0350 static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
0351                     unsigned msec)
0352 {
0353     edac_dbg(0, "\n");
0354 
0355     /* take the arg 'msec' and set it into the control structure
0356      * to used in the time period calculation
0357      * then calc the number of jiffies that represents
0358      */
0359     edac_dev->poll_msec = msec;
0360     edac_dev->delay = msecs_to_jiffies(msec);
0361 
0362     INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function);
0363 
0364     /* optimize here for the 1 second case, which will be normal value, to
0365      * fire ON the 1 second time event. This helps reduce all sorts of
0366      * timers firing on sub-second basis, while they are happy
0367      * to fire together on the 1 second exactly
0368      */
0369     if (edac_dev->poll_msec == 1000)
0370         edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
0371     else
0372         edac_queue_work(&edac_dev->work, edac_dev->delay);
0373 }
0374 
0375 /*
0376  * edac_device_workq_teardown
0377  *  stop the workq processing on this edac_dev
0378  */
0379 static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev)
0380 {
0381     if (!edac_dev->edac_check)
0382         return;
0383 
0384     edac_dev->op_state = OP_OFFLINE;
0385 
0386     edac_stop_work(&edac_dev->work);
0387 }
0388 
0389 /*
0390  * edac_device_reset_delay_period
0391  *
0392  *  need to stop any outstanding workq queued up at this time
0393  *  because we will be resetting the sleep time.
0394  *  Then restart the workq on the new delay
0395  */
0396 void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
0397                     unsigned long value)
0398 {
0399     unsigned long jiffs = msecs_to_jiffies(value);
0400 
0401     if (value == 1000)
0402         jiffs = round_jiffies_relative(value);
0403 
0404     edac_dev->poll_msec = value;
0405     edac_dev->delay     = jiffs;
0406 
0407     edac_mod_work(&edac_dev->work, jiffs);
0408 }
0409 
0410 int edac_device_alloc_index(void)
0411 {
0412     static atomic_t device_indexes = ATOMIC_INIT(0);
0413 
0414     return atomic_inc_return(&device_indexes) - 1;
0415 }
0416 EXPORT_SYMBOL_GPL(edac_device_alloc_index);
0417 
0418 int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
0419 {
0420     edac_dbg(0, "\n");
0421 
0422 #ifdef CONFIG_EDAC_DEBUG
0423     if (edac_debug_level >= 3)
0424         edac_device_dump_device(edac_dev);
0425 #endif
0426     mutex_lock(&device_ctls_mutex);
0427 
0428     if (add_edac_dev_to_global_list(edac_dev))
0429         goto fail0;
0430 
0431     /* set load time so that error rate can be tracked */
0432     edac_dev->start_time = jiffies;
0433 
0434     /* create this instance's sysfs entries */
0435     if (edac_device_create_sysfs(edac_dev)) {
0436         edac_device_printk(edac_dev, KERN_WARNING,
0437                     "failed to create sysfs device\n");
0438         goto fail1;
0439     }
0440 
0441     /* If there IS a check routine, then we are running POLLED */
0442     if (edac_dev->edac_check != NULL) {
0443         /* This instance is NOW RUNNING */
0444         edac_dev->op_state = OP_RUNNING_POLL;
0445 
0446         /*
0447          * enable workq processing on this instance,
0448          * default = 1000 msec
0449          */
0450         edac_device_workq_setup(edac_dev, 1000);
0451     } else {
0452         edac_dev->op_state = OP_RUNNING_INTERRUPT;
0453     }
0454 
0455     /* Report action taken */
0456     edac_device_printk(edac_dev, KERN_INFO,
0457         "Giving out device to module %s controller %s: DEV %s (%s)\n",
0458         edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name,
0459         edac_op_state_to_string(edac_dev->op_state));
0460 
0461     mutex_unlock(&device_ctls_mutex);
0462     return 0;
0463 
0464 fail1:
0465     /* Some error, so remove the entry from the lsit */
0466     del_edac_device_from_global_list(edac_dev);
0467 
0468 fail0:
0469     mutex_unlock(&device_ctls_mutex);
0470     return 1;
0471 }
0472 EXPORT_SYMBOL_GPL(edac_device_add_device);
0473 
0474 struct edac_device_ctl_info *edac_device_del_device(struct device *dev)
0475 {
0476     struct edac_device_ctl_info *edac_dev;
0477 
0478     edac_dbg(0, "\n");
0479 
0480     mutex_lock(&device_ctls_mutex);
0481 
0482     /* Find the structure on the list, if not there, then leave */
0483     edac_dev = find_edac_device_by_dev(dev);
0484     if (edac_dev == NULL) {
0485         mutex_unlock(&device_ctls_mutex);
0486         return NULL;
0487     }
0488 
0489     /* mark this instance as OFFLINE */
0490     edac_dev->op_state = OP_OFFLINE;
0491 
0492     /* deregister from global list */
0493     del_edac_device_from_global_list(edac_dev);
0494 
0495     mutex_unlock(&device_ctls_mutex);
0496 
0497     /* clear workq processing on this instance */
0498     edac_device_workq_teardown(edac_dev);
0499 
0500     /* Tear down the sysfs entries for this instance */
0501     edac_device_remove_sysfs(edac_dev);
0502 
0503     edac_printk(KERN_INFO, EDAC_MC,
0504         "Removed device %d for %s %s: DEV %s\n",
0505         edac_dev->dev_idx,
0506         edac_dev->mod_name, edac_dev->ctl_name, edac_dev_name(edac_dev));
0507 
0508     return edac_dev;
0509 }
0510 EXPORT_SYMBOL_GPL(edac_device_del_device);
0511 
0512 static inline int edac_device_get_log_ce(struct edac_device_ctl_info *edac_dev)
0513 {
0514     return edac_dev->log_ce;
0515 }
0516 
0517 static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev)
0518 {
0519     return edac_dev->log_ue;
0520 }
0521 
0522 static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
0523                     *edac_dev)
0524 {
0525     return edac_dev->panic_on_ue;
0526 }
0527 
0528 void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
0529                  unsigned int count, int inst_nr, int block_nr,
0530                  const char *msg)
0531 {
0532     struct edac_device_instance *instance;
0533     struct edac_device_block *block = NULL;
0534 
0535     if (!count)
0536         return;
0537 
0538     if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
0539         edac_device_printk(edac_dev, KERN_ERR,
0540                 "INTERNAL ERROR: 'instance' out of range "
0541                 "(%d >= %d)\n", inst_nr,
0542                 edac_dev->nr_instances);
0543         return;
0544     }
0545 
0546     instance = edac_dev->instances + inst_nr;
0547 
0548     if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
0549         edac_device_printk(edac_dev, KERN_ERR,
0550                 "INTERNAL ERROR: instance %d 'block' "
0551                 "out of range (%d >= %d)\n",
0552                 inst_nr, block_nr,
0553                 instance->nr_blocks);
0554         return;
0555     }
0556 
0557     if (instance->nr_blocks > 0) {
0558         block = instance->blocks + block_nr;
0559         block->counters.ce_count += count;
0560     }
0561 
0562     /* Propagate the count up the 'totals' tree */
0563     instance->counters.ce_count += count;
0564     edac_dev->counters.ce_count += count;
0565 
0566     if (edac_device_get_log_ce(edac_dev))
0567         edac_device_printk(edac_dev, KERN_WARNING,
0568                    "CE: %s instance: %s block: %s count: %d '%s'\n",
0569                    edac_dev->ctl_name, instance->name,
0570                    block ? block->name : "N/A", count, msg);
0571 }
0572 EXPORT_SYMBOL_GPL(edac_device_handle_ce_count);
0573 
0574 void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
0575                  unsigned int count, int inst_nr, int block_nr,
0576                  const char *msg)
0577 {
0578     struct edac_device_instance *instance;
0579     struct edac_device_block *block = NULL;
0580 
0581     if (!count)
0582         return;
0583 
0584     if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
0585         edac_device_printk(edac_dev, KERN_ERR,
0586                 "INTERNAL ERROR: 'instance' out of range "
0587                 "(%d >= %d)\n", inst_nr,
0588                 edac_dev->nr_instances);
0589         return;
0590     }
0591 
0592     instance = edac_dev->instances + inst_nr;
0593 
0594     if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
0595         edac_device_printk(edac_dev, KERN_ERR,
0596                 "INTERNAL ERROR: instance %d 'block' "
0597                 "out of range (%d >= %d)\n",
0598                 inst_nr, block_nr,
0599                 instance->nr_blocks);
0600         return;
0601     }
0602 
0603     if (instance->nr_blocks > 0) {
0604         block = instance->blocks + block_nr;
0605         block->counters.ue_count += count;
0606     }
0607 
0608     /* Propagate the count up the 'totals' tree */
0609     instance->counters.ue_count += count;
0610     edac_dev->counters.ue_count += count;
0611 
0612     if (edac_device_get_log_ue(edac_dev))
0613         edac_device_printk(edac_dev, KERN_EMERG,
0614                    "UE: %s instance: %s block: %s count: %d '%s'\n",
0615                    edac_dev->ctl_name, instance->name,
0616                    block ? block->name : "N/A", count, msg);
0617 
0618     if (edac_device_get_panic_on_ue(edac_dev))
0619         panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
0620               edac_dev->ctl_name, instance->name,
0621               block ? block->name : "N/A", count, msg);
0622 }
0623 EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);