Back to home page

LXR

 
 

    


0001 /*
0002  * xor.c : Multiple Devices driver for Linux
0003  *
0004  * Copyright (C) 1996, 1997, 1998, 1999, 2000,
0005  * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
0006  *
0007  * Dispatch optimized RAID-5 checksumming functions.
0008  *
0009  * This program is free software; you can redistribute it and/or modify
0010  * it under the terms of the GNU General Public License as published by
0011  * the Free Software Foundation; either version 2, or (at your option)
0012  * any later version.
0013  *
0014  * You should have received a copy of the GNU General Public License
0015  * (for example /usr/src/linux/COPYING); if not, write to the Free
0016  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0017  */
0018 
0019 #define BH_TRACE 0
0020 #include <linux/module.h>
0021 #include <linux/gfp.h>
0022 #include <linux/raid/xor.h>
0023 #include <linux/jiffies.h>
0024 #include <linux/preempt.h>
0025 #include <asm/xor.h>
0026 
0027 #ifndef XOR_SELECT_TEMPLATE
0028 #define XOR_SELECT_TEMPLATE(x) (x)
0029 #endif
0030 
0031 /* The xor routines to use.  */
0032 static struct xor_block_template *active_template;
0033 
0034 void
0035 xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
0036 {
0037     unsigned long *p1, *p2, *p3, *p4;
0038 
0039     p1 = (unsigned long *) srcs[0];
0040     if (src_count == 1) {
0041         active_template->do_2(bytes, dest, p1);
0042         return;
0043     }
0044 
0045     p2 = (unsigned long *) srcs[1];
0046     if (src_count == 2) {
0047         active_template->do_3(bytes, dest, p1, p2);
0048         return;
0049     }
0050 
0051     p3 = (unsigned long *) srcs[2];
0052     if (src_count == 3) {
0053         active_template->do_4(bytes, dest, p1, p2, p3);
0054         return;
0055     }
0056 
0057     p4 = (unsigned long *) srcs[3];
0058     active_template->do_5(bytes, dest, p1, p2, p3, p4);
0059 }
0060 EXPORT_SYMBOL(xor_blocks);
0061 
0062 /* Set of all registered templates.  */
0063 static struct xor_block_template *__initdata template_list;
0064 
0065 #define BENCH_SIZE (PAGE_SIZE)
0066 
0067 static void __init
0068 do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
0069 {
0070     int speed;
0071     unsigned long now, j;
0072     int i, count, max;
0073 
0074     tmpl->next = template_list;
0075     template_list = tmpl;
0076 
0077     preempt_disable();
0078 
0079     /*
0080      * Count the number of XORs done during a whole jiffy, and use
0081      * this to calculate the speed of checksumming.  We use a 2-page
0082      * allocation to have guaranteed color L1-cache layout.
0083      */
0084     max = 0;
0085     for (i = 0; i < 5; i++) {
0086         j = jiffies;
0087         count = 0;
0088         while ((now = jiffies) == j)
0089             cpu_relax();
0090         while (time_before(jiffies, now + 1)) {
0091             mb(); /* prevent loop optimzation */
0092             tmpl->do_2(BENCH_SIZE, b1, b2);
0093             mb();
0094             count++;
0095             mb();
0096         }
0097         if (count > max)
0098             max = count;
0099     }
0100 
0101     preempt_enable();
0102 
0103     speed = max * (HZ * BENCH_SIZE / 1024);
0104     tmpl->speed = speed;
0105 
0106     printk(KERN_INFO "   %-10s: %5d.%03d MB/sec\n", tmpl->name,
0107            speed / 1000, speed % 1000);
0108 }
0109 
0110 static int __init
0111 calibrate_xor_blocks(void)
0112 {
0113     void *b1, *b2;
0114     struct xor_block_template *f, *fastest;
0115 
0116     fastest = XOR_SELECT_TEMPLATE(NULL);
0117 
0118     if (fastest) {
0119         printk(KERN_INFO "xor: automatically using best "
0120                  "checksumming function   %-10s\n",
0121                fastest->name);
0122         goto out;
0123     }
0124 
0125     /*
0126      * Note: Since the memory is not actually used for _anything_ but to
0127      * test the XOR speed, we don't really want kmemcheck to warn about
0128      * reading uninitialized bytes here.
0129      */
0130     b1 = (void *) __get_free_pages(GFP_KERNEL | __GFP_NOTRACK, 2);
0131     if (!b1) {
0132         printk(KERN_WARNING "xor: Yikes!  No memory available.\n");
0133         return -ENOMEM;
0134     }
0135     b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
0136 
0137     /*
0138      * If this arch/cpu has a short-circuited selection, don't loop through
0139      * all the possible functions, just test the best one
0140      */
0141 
0142 #define xor_speed(templ)    do_xor_speed((templ), b1, b2)
0143 
0144     printk(KERN_INFO "xor: measuring software checksum speed\n");
0145     XOR_TRY_TEMPLATES;
0146     fastest = template_list;
0147     for (f = fastest; f; f = f->next)
0148         if (f->speed > fastest->speed)
0149             fastest = f;
0150 
0151     printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
0152            fastest->name, fastest->speed / 1000, fastest->speed % 1000);
0153 
0154 #undef xor_speed
0155 
0156     free_pages((unsigned long)b1, 2);
0157 out:
0158     active_template = fastest;
0159     return 0;
0160 }
0161 
0162 static __exit void xor_exit(void) { }
0163 
0164 MODULE_LICENSE("GPL");
0165 
0166 /* when built-in xor.o must initialize before drivers/md/md.o */
0167 core_initcall(calibrate_xor_blocks);
0168 module_exit(xor_exit);