Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * This file is subject to the terms and conditions of the GNU General Public
0003  * License.  See the file "COPYING" in the main directory of this archive
0004  * for more details.
0005  *
0006  * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
0007  * Copyright (C) 2007  Maciej W. Rozycki
0008  * Copyright (C) 2008  Thiemo Seufer
0009  * Copyright (C) 2012  MIPS Technologies, Inc.
0010  */
0011 #include <linux/kernel.h>
0012 #include <linux/sched.h>
0013 #include <linux/smp.h>
0014 #include <linux/mm.h>
0015 #include <linux/proc_fs.h>
0016 
0017 #include <asm/bugs.h>
0018 #include <asm/cacheops.h>
0019 #include <asm/cpu-type.h>
0020 #include <asm/inst.h>
0021 #include <asm/io.h>
0022 #include <asm/page.h>
0023 #include <asm/prefetch.h>
0024 #include <asm/bootinfo.h>
0025 #include <asm/mipsregs.h>
0026 #include <asm/mmu_context.h>
0027 #include <asm/cpu.h>
0028 
0029 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
0030 #include <asm/sibyte/sb1250.h>
0031 #include <asm/sibyte/sb1250_regs.h>
0032 #include <asm/sibyte/sb1250_dma.h>
0033 #endif
0034 
0035 #include <asm/uasm.h>
0036 
0037 /* Registers used in the assembled routines. */
0038 #define ZERO 0
0039 #define AT 2
0040 #define A0 4
0041 #define A1 5
0042 #define A2 6
0043 #define T0 8
0044 #define T1 9
0045 #define T2 10
0046 #define T3 11
0047 #define T9 25
0048 #define RA 31
0049 
0050 /* Handle labels (which must be positive integers). */
0051 enum label_id {
0052     label_clear_nopref = 1,
0053     label_clear_pref,
0054     label_copy_nopref,
0055     label_copy_pref_both,
0056     label_copy_pref_store,
0057 };
0058 
0059 UASM_L_LA(_clear_nopref)
0060 UASM_L_LA(_clear_pref)
0061 UASM_L_LA(_copy_nopref)
0062 UASM_L_LA(_copy_pref_both)
0063 UASM_L_LA(_copy_pref_store)
0064 
0065 /* We need one branch and therefore one relocation per target label. */
0066 static struct uasm_label labels[5];
0067 static struct uasm_reloc relocs[5];
0068 
0069 #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010)
0070 #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020)
0071 
0072 /*
0073  * R6 has a limited offset of the pref instruction.
0074  * Skip it if the offset is more than 9 bits.
0075  */
0076 #define _uasm_i_pref(a, b, c, d)        \
0077 do {                        \
0078     if (cpu_has_mips_r6) {          \
0079         if (c <= 0xff && c >= -0x100)   \
0080             uasm_i_pref(a, b, c, d);\
0081     } else {                \
0082         uasm_i_pref(a, b, c, d);    \
0083     }                   \
0084 } while(0)
0085 
0086 static int pref_bias_clear_store;
0087 static int pref_bias_copy_load;
0088 static int pref_bias_copy_store;
0089 
0090 static u32 pref_src_mode;
0091 static u32 pref_dst_mode;
0092 
0093 static int clear_word_size;
0094 static int copy_word_size;
0095 
0096 static int half_clear_loop_size;
0097 static int half_copy_loop_size;
0098 
0099 static int cache_line_size;
0100 #define cache_line_mask() (cache_line_size - 1)
0101 
0102 static inline void
0103 pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
0104 {
0105     if (cpu_has_64bit_gp_regs &&
0106         IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) &&
0107         r4k_daddiu_bug()) {
0108         if (off > 0x7fff) {
0109             uasm_i_lui(buf, T9, uasm_rel_hi(off));
0110             uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
0111         } else
0112             uasm_i_addiu(buf, T9, ZERO, off);
0113         uasm_i_daddu(buf, reg1, reg2, T9);
0114     } else {
0115         if (off > 0x7fff) {
0116             uasm_i_lui(buf, T9, uasm_rel_hi(off));
0117             uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
0118             UASM_i_ADDU(buf, reg1, reg2, T9);
0119         } else
0120             UASM_i_ADDIU(buf, reg1, reg2, off);
0121     }
0122 }
0123 
0124 static void set_prefetch_parameters(void)
0125 {
0126     if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
0127         clear_word_size = 8;
0128     else
0129         clear_word_size = 4;
0130 
0131     if (cpu_has_64bit_gp_regs)
0132         copy_word_size = 8;
0133     else
0134         copy_word_size = 4;
0135 
0136     /*
0137      * The pref's used here are using "streaming" hints, which cause the
0138      * copied data to be kicked out of the cache sooner.  A page copy often
0139      * ends up copying a lot more data than is commonly used, so this seems
0140      * to make sense in terms of reducing cache pollution, but I've no real
0141      * performance data to back this up.
0142      */
0143     if (cpu_has_prefetch) {
0144         /*
0145          * XXX: Most prefetch bias values in here are based on
0146          * guesswork.
0147          */
0148         cache_line_size = cpu_dcache_line_size();
0149         switch (current_cpu_type()) {
0150         case CPU_R5500:
0151         case CPU_TX49XX:
0152             /* These processors only support the Pref_Load. */
0153             pref_bias_copy_load = 256;
0154             break;
0155 
0156         case CPU_R10000:
0157         case CPU_R12000:
0158         case CPU_R14000:
0159         case CPU_R16000:
0160             /*
0161              * Those values have been experimentally tuned for an
0162              * Origin 200.
0163              */
0164             pref_bias_clear_store = 512;
0165             pref_bias_copy_load = 256;
0166             pref_bias_copy_store = 256;
0167             pref_src_mode = Pref_LoadStreamed;
0168             pref_dst_mode = Pref_StoreStreamed;
0169             break;
0170 
0171         case CPU_SB1:
0172         case CPU_SB1A:
0173             pref_bias_clear_store = 128;
0174             pref_bias_copy_load = 128;
0175             pref_bias_copy_store = 128;
0176             /*
0177              * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
0178              * hints are broken.
0179              */
0180             if (current_cpu_type() == CPU_SB1 &&
0181                 (current_cpu_data.processor_id & 0xff) < 0x02) {
0182                 pref_src_mode = Pref_Load;
0183                 pref_dst_mode = Pref_Store;
0184             } else {
0185                 pref_src_mode = Pref_LoadStreamed;
0186                 pref_dst_mode = Pref_StoreStreamed;
0187             }
0188             break;
0189 
0190         case CPU_LOONGSON64:
0191             /* Loongson-3 only support the Pref_Load/Pref_Store. */
0192             pref_bias_clear_store = 128;
0193             pref_bias_copy_load = 128;
0194             pref_bias_copy_store = 128;
0195             pref_src_mode = Pref_Load;
0196             pref_dst_mode = Pref_Store;
0197             break;
0198 
0199         default:
0200             pref_bias_clear_store = 128;
0201             pref_bias_copy_load = 256;
0202             pref_bias_copy_store = 128;
0203             pref_src_mode = Pref_LoadStreamed;
0204             if (cpu_has_mips_r6)
0205                 /*
0206                  * Bit 30 (Pref_PrepareForStore) has been
0207                  * removed from MIPS R6. Use bit 5
0208                  * (Pref_StoreStreamed).
0209                  */
0210                 pref_dst_mode = Pref_StoreStreamed;
0211             else
0212                 pref_dst_mode = Pref_PrepareForStore;
0213             break;
0214         }
0215     } else {
0216         if (cpu_has_cache_cdex_s)
0217             cache_line_size = cpu_scache_line_size();
0218         else if (cpu_has_cache_cdex_p)
0219             cache_line_size = cpu_dcache_line_size();
0220     }
0221     /*
0222      * Too much unrolling will overflow the available space in
0223      * clear_space_array / copy_page_array.
0224      */
0225     half_clear_loop_size = min(16 * clear_word_size,
0226                    max(cache_line_size >> 1,
0227                        4 * clear_word_size));
0228     half_copy_loop_size = min(16 * copy_word_size,
0229                   max(cache_line_size >> 1,
0230                       4 * copy_word_size));
0231 }
0232 
0233 static void build_clear_store(u32 **buf, int off)
0234 {
0235     if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
0236         uasm_i_sd(buf, ZERO, off, A0);
0237     } else {
0238         uasm_i_sw(buf, ZERO, off, A0);
0239     }
0240 }
0241 
0242 static inline void build_clear_pref(u32 **buf, int off)
0243 {
0244     if (off & cache_line_mask())
0245         return;
0246 
0247     if (pref_bias_clear_store) {
0248         _uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
0249                 A0);
0250     } else if (cache_line_size == (half_clear_loop_size << 1)) {
0251         if (cpu_has_cache_cdex_s) {
0252             uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
0253         } else if (cpu_has_cache_cdex_p) {
0254             if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
0255                 cpu_is_r4600_v1_x()) {
0256                 uasm_i_nop(buf);
0257                 uasm_i_nop(buf);
0258                 uasm_i_nop(buf);
0259                 uasm_i_nop(buf);
0260             }
0261 
0262             if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
0263                 cpu_is_r4600_v2_x())
0264                 uasm_i_lw(buf, ZERO, ZERO, AT);
0265 
0266             uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
0267         }
0268     }
0269 }
0270 
0271 extern u32 __clear_page_start;
0272 extern u32 __clear_page_end;
0273 extern u32 __copy_page_start;
0274 extern u32 __copy_page_end;
0275 
0276 void build_clear_page(void)
0277 {
0278     int off;
0279     u32 *buf = &__clear_page_start;
0280     struct uasm_label *l = labels;
0281     struct uasm_reloc *r = relocs;
0282     int i;
0283     static atomic_t run_once = ATOMIC_INIT(0);
0284 
0285     if (atomic_xchg(&run_once, 1)) {
0286         return;
0287     }
0288 
0289     memset(labels, 0, sizeof(labels));
0290     memset(relocs, 0, sizeof(relocs));
0291 
0292     set_prefetch_parameters();
0293 
0294     /*
0295      * This algorithm makes the following assumptions:
0296      *   - The prefetch bias is a multiple of 2 words.
0297      *   - The prefetch bias is less than one page.
0298      */
0299     BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
0300     BUG_ON(PAGE_SIZE < pref_bias_clear_store);
0301 
0302     off = PAGE_SIZE - pref_bias_clear_store;
0303     if (off > 0xffff || !pref_bias_clear_store)
0304         pg_addiu(&buf, A2, A0, off);
0305     else
0306         uasm_i_ori(&buf, A2, A0, off);
0307 
0308     if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
0309         uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
0310 
0311     off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
0312                 * cache_line_size : 0;
0313     while (off) {
0314         build_clear_pref(&buf, -off);
0315         off -= cache_line_size;
0316     }
0317     uasm_l_clear_pref(&l, buf);
0318     do {
0319         build_clear_pref(&buf, off);
0320         build_clear_store(&buf, off);
0321         off += clear_word_size;
0322     } while (off < half_clear_loop_size);
0323     pg_addiu(&buf, A0, A0, 2 * off);
0324     off = -off;
0325     do {
0326         build_clear_pref(&buf, off);
0327         if (off == -clear_word_size)
0328             uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
0329         build_clear_store(&buf, off);
0330         off += clear_word_size;
0331     } while (off < 0);
0332 
0333     if (pref_bias_clear_store) {
0334         pg_addiu(&buf, A2, A0, pref_bias_clear_store);
0335         uasm_l_clear_nopref(&l, buf);
0336         off = 0;
0337         do {
0338             build_clear_store(&buf, off);
0339             off += clear_word_size;
0340         } while (off < half_clear_loop_size);
0341         pg_addiu(&buf, A0, A0, 2 * off);
0342         off = -off;
0343         do {
0344             if (off == -clear_word_size)
0345                 uasm_il_bne(&buf, &r, A0, A2,
0346                         label_clear_nopref);
0347             build_clear_store(&buf, off);
0348             off += clear_word_size;
0349         } while (off < 0);
0350     }
0351 
0352     uasm_i_jr(&buf, RA);
0353     uasm_i_nop(&buf);
0354 
0355     BUG_ON(buf > &__clear_page_end);
0356 
0357     uasm_resolve_relocs(relocs, labels);
0358 
0359     pr_debug("Synthesized clear page handler (%u instructions).\n",
0360          (u32)(buf - &__clear_page_start));
0361 
0362     pr_debug("\t.set push\n");
0363     pr_debug("\t.set noreorder\n");
0364     for (i = 0; i < (buf - &__clear_page_start); i++)
0365         pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);
0366     pr_debug("\t.set pop\n");
0367 }
0368 
0369 static void build_copy_load(u32 **buf, int reg, int off)
0370 {
0371     if (cpu_has_64bit_gp_regs) {
0372         uasm_i_ld(buf, reg, off, A1);
0373     } else {
0374         uasm_i_lw(buf, reg, off, A1);
0375     }
0376 }
0377 
0378 static void build_copy_store(u32 **buf, int reg, int off)
0379 {
0380     if (cpu_has_64bit_gp_regs) {
0381         uasm_i_sd(buf, reg, off, A0);
0382     } else {
0383         uasm_i_sw(buf, reg, off, A0);
0384     }
0385 }
0386 
0387 static inline void build_copy_load_pref(u32 **buf, int off)
0388 {
0389     if (off & cache_line_mask())
0390         return;
0391 
0392     if (pref_bias_copy_load)
0393         _uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
0394 }
0395 
0396 static inline void build_copy_store_pref(u32 **buf, int off)
0397 {
0398     if (off & cache_line_mask())
0399         return;
0400 
0401     if (pref_bias_copy_store) {
0402         _uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
0403                 A0);
0404     } else if (cache_line_size == (half_copy_loop_size << 1)) {
0405         if (cpu_has_cache_cdex_s) {
0406             uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
0407         } else if (cpu_has_cache_cdex_p) {
0408             if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
0409                 cpu_is_r4600_v1_x()) {
0410                 uasm_i_nop(buf);
0411                 uasm_i_nop(buf);
0412                 uasm_i_nop(buf);
0413                 uasm_i_nop(buf);
0414             }
0415 
0416             if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
0417                 cpu_is_r4600_v2_x())
0418                 uasm_i_lw(buf, ZERO, ZERO, AT);
0419 
0420             uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
0421         }
0422     }
0423 }
0424 
0425 void build_copy_page(void)
0426 {
0427     int off;
0428     u32 *buf = &__copy_page_start;
0429     struct uasm_label *l = labels;
0430     struct uasm_reloc *r = relocs;
0431     int i;
0432     static atomic_t run_once = ATOMIC_INIT(0);
0433 
0434     if (atomic_xchg(&run_once, 1)) {
0435         return;
0436     }
0437 
0438     memset(labels, 0, sizeof(labels));
0439     memset(relocs, 0, sizeof(relocs));
0440 
0441     set_prefetch_parameters();
0442 
0443     /*
0444      * This algorithm makes the following assumptions:
0445      *   - All prefetch biases are multiples of 8 words.
0446      *   - The prefetch biases are less than one page.
0447      *   - The store prefetch bias isn't greater than the load
0448      *     prefetch bias.
0449      */
0450     BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
0451     BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
0452     BUG_ON(PAGE_SIZE < pref_bias_copy_load);
0453     BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
0454 
0455     off = PAGE_SIZE - pref_bias_copy_load;
0456     if (off > 0xffff || !pref_bias_copy_load)
0457         pg_addiu(&buf, A2, A0, off);
0458     else
0459         uasm_i_ori(&buf, A2, A0, off);
0460 
0461     if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
0462         uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
0463 
0464     off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
0465                 cache_line_size : 0;
0466     while (off) {
0467         build_copy_load_pref(&buf, -off);
0468         off -= cache_line_size;
0469     }
0470     off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) *
0471                 cache_line_size : 0;
0472     while (off) {
0473         build_copy_store_pref(&buf, -off);
0474         off -= cache_line_size;
0475     }
0476     uasm_l_copy_pref_both(&l, buf);
0477     do {
0478         build_copy_load_pref(&buf, off);
0479         build_copy_load(&buf, T0, off);
0480         build_copy_load_pref(&buf, off + copy_word_size);
0481         build_copy_load(&buf, T1, off + copy_word_size);
0482         build_copy_load_pref(&buf, off + 2 * copy_word_size);
0483         build_copy_load(&buf, T2, off + 2 * copy_word_size);
0484         build_copy_load_pref(&buf, off + 3 * copy_word_size);
0485         build_copy_load(&buf, T3, off + 3 * copy_word_size);
0486         build_copy_store_pref(&buf, off);
0487         build_copy_store(&buf, T0, off);
0488         build_copy_store_pref(&buf, off + copy_word_size);
0489         build_copy_store(&buf, T1, off + copy_word_size);
0490         build_copy_store_pref(&buf, off + 2 * copy_word_size);
0491         build_copy_store(&buf, T2, off + 2 * copy_word_size);
0492         build_copy_store_pref(&buf, off + 3 * copy_word_size);
0493         build_copy_store(&buf, T3, off + 3 * copy_word_size);
0494         off += 4 * copy_word_size;
0495     } while (off < half_copy_loop_size);
0496     pg_addiu(&buf, A1, A1, 2 * off);
0497     pg_addiu(&buf, A0, A0, 2 * off);
0498     off = -off;
0499     do {
0500         build_copy_load_pref(&buf, off);
0501         build_copy_load(&buf, T0, off);
0502         build_copy_load_pref(&buf, off + copy_word_size);
0503         build_copy_load(&buf, T1, off + copy_word_size);
0504         build_copy_load_pref(&buf, off + 2 * copy_word_size);
0505         build_copy_load(&buf, T2, off + 2 * copy_word_size);
0506         build_copy_load_pref(&buf, off + 3 * copy_word_size);
0507         build_copy_load(&buf, T3, off + 3 * copy_word_size);
0508         build_copy_store_pref(&buf, off);
0509         build_copy_store(&buf, T0, off);
0510         build_copy_store_pref(&buf, off + copy_word_size);
0511         build_copy_store(&buf, T1, off + copy_word_size);
0512         build_copy_store_pref(&buf, off + 2 * copy_word_size);
0513         build_copy_store(&buf, T2, off + 2 * copy_word_size);
0514         build_copy_store_pref(&buf, off + 3 * copy_word_size);
0515         if (off == -(4 * copy_word_size))
0516             uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
0517         build_copy_store(&buf, T3, off + 3 * copy_word_size);
0518         off += 4 * copy_word_size;
0519     } while (off < 0);
0520 
0521     if (pref_bias_copy_load - pref_bias_copy_store) {
0522         pg_addiu(&buf, A2, A0,
0523              pref_bias_copy_load - pref_bias_copy_store);
0524         uasm_l_copy_pref_store(&l, buf);
0525         off = 0;
0526         do {
0527             build_copy_load(&buf, T0, off);
0528             build_copy_load(&buf, T1, off + copy_word_size);
0529             build_copy_load(&buf, T2, off + 2 * copy_word_size);
0530             build_copy_load(&buf, T3, off + 3 * copy_word_size);
0531             build_copy_store_pref(&buf, off);
0532             build_copy_store(&buf, T0, off);
0533             build_copy_store_pref(&buf, off + copy_word_size);
0534             build_copy_store(&buf, T1, off + copy_word_size);
0535             build_copy_store_pref(&buf, off + 2 * copy_word_size);
0536             build_copy_store(&buf, T2, off + 2 * copy_word_size);
0537             build_copy_store_pref(&buf, off + 3 * copy_word_size);
0538             build_copy_store(&buf, T3, off + 3 * copy_word_size);
0539             off += 4 * copy_word_size;
0540         } while (off < half_copy_loop_size);
0541         pg_addiu(&buf, A1, A1, 2 * off);
0542         pg_addiu(&buf, A0, A0, 2 * off);
0543         off = -off;
0544         do {
0545             build_copy_load(&buf, T0, off);
0546             build_copy_load(&buf, T1, off + copy_word_size);
0547             build_copy_load(&buf, T2, off + 2 * copy_word_size);
0548             build_copy_load(&buf, T3, off + 3 * copy_word_size);
0549             build_copy_store_pref(&buf, off);
0550             build_copy_store(&buf, T0, off);
0551             build_copy_store_pref(&buf, off + copy_word_size);
0552             build_copy_store(&buf, T1, off + copy_word_size);
0553             build_copy_store_pref(&buf, off + 2 * copy_word_size);
0554             build_copy_store(&buf, T2, off + 2 * copy_word_size);
0555             build_copy_store_pref(&buf, off + 3 * copy_word_size);
0556             if (off == -(4 * copy_word_size))
0557                 uasm_il_bne(&buf, &r, A2, A0,
0558                         label_copy_pref_store);
0559             build_copy_store(&buf, T3, off + 3 * copy_word_size);
0560             off += 4 * copy_word_size;
0561         } while (off < 0);
0562     }
0563 
0564     if (pref_bias_copy_store) {
0565         pg_addiu(&buf, A2, A0, pref_bias_copy_store);
0566         uasm_l_copy_nopref(&l, buf);
0567         off = 0;
0568         do {
0569             build_copy_load(&buf, T0, off);
0570             build_copy_load(&buf, T1, off + copy_word_size);
0571             build_copy_load(&buf, T2, off + 2 * copy_word_size);
0572             build_copy_load(&buf, T3, off + 3 * copy_word_size);
0573             build_copy_store(&buf, T0, off);
0574             build_copy_store(&buf, T1, off + copy_word_size);
0575             build_copy_store(&buf, T2, off + 2 * copy_word_size);
0576             build_copy_store(&buf, T3, off + 3 * copy_word_size);
0577             off += 4 * copy_word_size;
0578         } while (off < half_copy_loop_size);
0579         pg_addiu(&buf, A1, A1, 2 * off);
0580         pg_addiu(&buf, A0, A0, 2 * off);
0581         off = -off;
0582         do {
0583             build_copy_load(&buf, T0, off);
0584             build_copy_load(&buf, T1, off + copy_word_size);
0585             build_copy_load(&buf, T2, off + 2 * copy_word_size);
0586             build_copy_load(&buf, T3, off + 3 * copy_word_size);
0587             build_copy_store(&buf, T0, off);
0588             build_copy_store(&buf, T1, off + copy_word_size);
0589             build_copy_store(&buf, T2, off + 2 * copy_word_size);
0590             if (off == -(4 * copy_word_size))
0591                 uasm_il_bne(&buf, &r, A2, A0,
0592                         label_copy_nopref);
0593             build_copy_store(&buf, T3, off + 3 * copy_word_size);
0594             off += 4 * copy_word_size;
0595         } while (off < 0);
0596     }
0597 
0598     uasm_i_jr(&buf, RA);
0599     uasm_i_nop(&buf);
0600 
0601     BUG_ON(buf > &__copy_page_end);
0602 
0603     uasm_resolve_relocs(relocs, labels);
0604 
0605     pr_debug("Synthesized copy page handler (%u instructions).\n",
0606          (u32)(buf - &__copy_page_start));
0607 
0608     pr_debug("\t.set push\n");
0609     pr_debug("\t.set noreorder\n");
0610     for (i = 0; i < (buf - &__copy_page_start); i++)
0611         pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);
0612     pr_debug("\t.set pop\n");
0613 }
0614 
0615 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
0616 extern void clear_page_cpu(void *page);
0617 extern void copy_page_cpu(void *to, void *from);
0618 
0619 /*
0620  * Pad descriptors to cacheline, since each is exclusively owned by a
0621  * particular CPU.
0622  */
0623 struct dmadscr {
0624     u64 dscr_a;
0625     u64 dscr_b;
0626     u64 pad_a;
0627     u64 pad_b;
0628 } ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
0629 
0630 void clear_page(void *page)
0631 {
0632     u64 to_phys = CPHYSADDR((unsigned long)page);
0633     unsigned int cpu = smp_processor_id();
0634 
0635     /* if the page is not in KSEG0, use old way */
0636     if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
0637         return clear_page_cpu(page);
0638 
0639     page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
0640                  M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
0641     page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
0642     __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
0643 
0644     /*
0645      * Don't really want to do it this way, but there's no
0646      * reliable way to delay completion detection.
0647      */
0648     while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
0649          & M_DM_DSCR_BASE_INTERRUPT))
0650         ;
0651     __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
0652 }
0653 EXPORT_SYMBOL(clear_page);
0654 
0655 void copy_page(void *to, void *from)
0656 {
0657     u64 from_phys = CPHYSADDR((unsigned long)from);
0658     u64 to_phys = CPHYSADDR((unsigned long)to);
0659     unsigned int cpu = smp_processor_id();
0660 
0661     /* if any page is not in KSEG0, use old way */
0662     if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
0663         || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
0664         return copy_page_cpu(to, from);
0665 
0666     page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
0667                  M_DM_DSCRA_INTERRUPT;
0668     page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
0669     __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
0670 
0671     /*
0672      * Don't really want to do it this way, but there's no
0673      * reliable way to delay completion detection.
0674      */
0675     while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
0676          & M_DM_DSCR_BASE_INTERRUPT))
0677         ;
0678     __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
0679 }
0680 EXPORT_SYMBOL(copy_page);
0681 
0682 #endif /* CONFIG_SIBYTE_DMA_PAGEOPS */