Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* visemul.c: Emulation of VIS instructions.
0003  *
0004  * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
0005  */
0006 #include <linux/kernel.h>
0007 #include <linux/errno.h>
0008 #include <linux/thread_info.h>
0009 #include <linux/perf_event.h>
0010 
0011 #include <asm/ptrace.h>
0012 #include <asm/pstate.h>
0013 #include <asm/fpumacro.h>
0014 #include <linux/uaccess.h>
0015 #include <asm/cacheflush.h>
0016 
0017 /* OPF field of various VIS instructions.  */
0018 
0019 /* 000111011 - four 16-bit packs  */
0020 #define FPACK16_OPF 0x03b
0021 
0022 /* 000111010 - two 32-bit packs  */
0023 #define FPACK32_OPF 0x03a
0024 
0025 /* 000111101 - four 16-bit packs  */
0026 #define FPACKFIX_OPF    0x03d
0027 
0028 /* 001001101 - four 16-bit expands  */
0029 #define FEXPAND_OPF 0x04d
0030 
0031 /* 001001011 - two 32-bit merges */
0032 #define FPMERGE_OPF 0x04b
0033 
0034 /* 000110001 - 8-by-16-bit partitioned product  */
0035 #define FMUL8x16_OPF    0x031
0036 
0037 /* 000110011 - 8-by-16-bit upper alpha partitioned product  */
0038 #define FMUL8x16AU_OPF  0x033
0039 
0040 /* 000110101 - 8-by-16-bit lower alpha partitioned product  */
0041 #define FMUL8x16AL_OPF  0x035
0042 
0043 /* 000110110 - upper 8-by-16-bit partitioned product  */
0044 #define FMUL8SUx16_OPF  0x036
0045 
0046 /* 000110111 - lower 8-by-16-bit partitioned product  */
0047 #define FMUL8ULx16_OPF  0x037
0048 
0049 /* 000111000 - upper 8-by-16-bit partitioned product  */
0050 #define FMULD8SUx16_OPF 0x038
0051 
0052 /* 000111001 - lower unsigned 8-by-16-bit partitioned product  */
0053 #define FMULD8ULx16_OPF 0x039
0054 
0055 /* 000101000 - four 16-bit compare; set rd if src1 > src2  */
0056 #define FCMPGT16_OPF    0x028
0057 
0058 /* 000101100 - two 32-bit compare; set rd if src1 > src2  */
0059 #define FCMPGT32_OPF    0x02c
0060 
0061 /* 000100000 - four 16-bit compare; set rd if src1 <= src2  */
0062 #define FCMPLE16_OPF    0x020
0063 
0064 /* 000100100 - two 32-bit compare; set rd if src1 <= src2  */
0065 #define FCMPLE32_OPF    0x024
0066 
0067 /* 000100010 - four 16-bit compare; set rd if src1 != src2  */
0068 #define FCMPNE16_OPF    0x022
0069 
0070 /* 000100110 - two 32-bit compare; set rd if src1 != src2  */
0071 #define FCMPNE32_OPF    0x026
0072 
0073 /* 000101010 - four 16-bit compare; set rd if src1 == src2  */
0074 #define FCMPEQ16_OPF    0x02a
0075 
0076 /* 000101110 - two 32-bit compare; set rd if src1 == src2  */
0077 #define FCMPEQ32_OPF    0x02e
0078 
0079 /* 000000000 - Eight 8-bit edge boundary processing  */
0080 #define EDGE8_OPF   0x000
0081 
0082 /* 000000001 - Eight 8-bit edge boundary processing, no CC */
0083 #define EDGE8N_OPF  0x001
0084 
0085 /* 000000010 - Eight 8-bit edge boundary processing, little-endian  */
0086 #define EDGE8L_OPF  0x002
0087 
0088 /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC  */
0089 #define EDGE8LN_OPF 0x003
0090 
0091 /* 000000100 - Four 16-bit edge boundary processing  */
0092 #define EDGE16_OPF  0x004
0093 
0094 /* 000000101 - Four 16-bit edge boundary processing, no CC  */
0095 #define EDGE16N_OPF 0x005
0096 
0097 /* 000000110 - Four 16-bit edge boundary processing, little-endian  */
0098 #define EDGE16L_OPF 0x006
0099 
0100 /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC  */
0101 #define EDGE16LN_OPF    0x007
0102 
0103 /* 000001000 - Two 32-bit edge boundary processing  */
0104 #define EDGE32_OPF  0x008
0105 
0106 /* 000001001 - Two 32-bit edge boundary processing, no CC  */
0107 #define EDGE32N_OPF 0x009
0108 
0109 /* 000001010 - Two 32-bit edge boundary processing, little-endian  */
0110 #define EDGE32L_OPF 0x00a
0111 
0112 /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC  */
0113 #define EDGE32LN_OPF    0x00b
0114 
0115 /* 000111110 - distance between 8 8-bit components  */
0116 #define PDIST_OPF   0x03e
0117 
0118 /* 000010000 - convert 8-bit 3-D address to blocked byte address  */
0119 #define ARRAY8_OPF  0x010
0120 
0121 /* 000010010 - convert 16-bit 3-D address to blocked byte address  */
0122 #define ARRAY16_OPF 0x012
0123 
0124 /* 000010100 - convert 32-bit 3-D address to blocked byte address  */
0125 #define ARRAY32_OPF 0x014
0126 
0127 /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE  */
0128 #define BMASK_OPF   0x019
0129 
0130 /* 001001100 - Permute bytes as specified by GSR.MASK  */
0131 #define BSHUFFLE_OPF    0x04c
0132 
0133 #define VIS_OPF_SHIFT   5
0134 #define VIS_OPF_MASK    (0x1ff << VIS_OPF_SHIFT)
0135 
0136 #define RS1(INSN)   (((INSN) >> 14) & 0x1f)
0137 #define RS2(INSN)   (((INSN) >>  0) & 0x1f)
0138 #define RD(INSN)    (((INSN) >> 25) & 0x1f)
0139 
0140 static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
0141                        unsigned int rd, int from_kernel)
0142 {
0143     if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
0144         if (from_kernel != 0)
0145             __asm__ __volatile__("flushw");
0146         else
0147             flushw_user();
0148     }
0149 }
0150 
0151 static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
0152 {
0153     unsigned long value, fp;
0154     
0155     if (reg < 16)
0156         return (!reg ? 0 : regs->u_regs[reg]);
0157 
0158     fp = regs->u_regs[UREG_FP];
0159 
0160     if (regs->tstate & TSTATE_PRIV) {
0161         struct reg_window *win;
0162         win = (struct reg_window *)(fp + STACK_BIAS);
0163         value = win->locals[reg - 16];
0164     } else if (!test_thread_64bit_stack(fp)) {
0165         struct reg_window32 __user *win32;
0166         win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
0167         get_user(value, &win32->locals[reg - 16]);
0168     } else {
0169         struct reg_window __user *win;
0170         win = (struct reg_window __user *)(fp + STACK_BIAS);
0171         get_user(value, &win->locals[reg - 16]);
0172     }
0173     return value;
0174 }
0175 
0176 static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
0177                               struct pt_regs *regs)
0178 {
0179     unsigned long fp = regs->u_regs[UREG_FP];
0180 
0181     BUG_ON(reg < 16);
0182     BUG_ON(regs->tstate & TSTATE_PRIV);
0183 
0184     if (!test_thread_64bit_stack(fp)) {
0185         struct reg_window32 __user *win32;
0186         win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
0187         return (unsigned long __user *)&win32->locals[reg - 16];
0188     } else {
0189         struct reg_window __user *win;
0190         win = (struct reg_window __user *)(fp + STACK_BIAS);
0191         return &win->locals[reg - 16];
0192     }
0193 }
0194 
0195 static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
0196                            struct pt_regs *regs)
0197 {
0198     BUG_ON(reg >= 16);
0199     BUG_ON(regs->tstate & TSTATE_PRIV);
0200 
0201     return &regs->u_regs[reg];
0202 }
0203 
0204 static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
0205 {
0206     if (rd < 16) {
0207         unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
0208 
0209         *rd_kern = val;
0210     } else {
0211         unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
0212 
0213         if (!test_thread_64bit_stack(regs->u_regs[UREG_FP]))
0214             __put_user((u32)val, (u32 __user *)rd_user);
0215         else
0216             __put_user(val, rd_user);
0217     }
0218 }
0219 
0220 static inline unsigned long fpd_regval(struct fpustate *f,
0221                        unsigned int insn_regnum)
0222 {
0223     insn_regnum = (((insn_regnum & 1) << 5) |
0224                (insn_regnum & 0x1e));
0225 
0226     return *(unsigned long *) &f->regs[insn_regnum];
0227 }
0228 
0229 static inline unsigned long *fpd_regaddr(struct fpustate *f,
0230                      unsigned int insn_regnum)
0231 {
0232     insn_regnum = (((insn_regnum & 1) << 5) |
0233                (insn_regnum & 0x1e));
0234 
0235     return (unsigned long *) &f->regs[insn_regnum];
0236 }
0237 
0238 static inline unsigned int fps_regval(struct fpustate *f,
0239                       unsigned int insn_regnum)
0240 {
0241     return f->regs[insn_regnum];
0242 }
0243 
0244 static inline unsigned int *fps_regaddr(struct fpustate *f,
0245                     unsigned int insn_regnum)
0246 {
0247     return &f->regs[insn_regnum];
0248 }
0249 
0250 struct edge_tab {
0251     u16 left, right;
0252 };
0253 static struct edge_tab edge8_tab[8] = {
0254     { 0xff, 0x80 },
0255     { 0x7f, 0xc0 },
0256     { 0x3f, 0xe0 },
0257     { 0x1f, 0xf0 },
0258     { 0x0f, 0xf8 },
0259     { 0x07, 0xfc },
0260     { 0x03, 0xfe },
0261     { 0x01, 0xff },
0262 };
0263 static struct edge_tab edge8_tab_l[8] = {
0264     { 0xff, 0x01 },
0265     { 0xfe, 0x03 },
0266     { 0xfc, 0x07 },
0267     { 0xf8, 0x0f },
0268     { 0xf0, 0x1f },
0269     { 0xe0, 0x3f },
0270     { 0xc0, 0x7f },
0271     { 0x80, 0xff },
0272 };
0273 static struct edge_tab edge16_tab[4] = {
0274     { 0xf, 0x8 },
0275     { 0x7, 0xc },
0276     { 0x3, 0xe },
0277     { 0x1, 0xf },
0278 };
0279 static struct edge_tab edge16_tab_l[4] = {
0280     { 0xf, 0x1 },
0281     { 0xe, 0x3 },
0282     { 0xc, 0x7 },
0283     { 0x8, 0xf },
0284 };
0285 static struct edge_tab edge32_tab[2] = {
0286     { 0x3, 0x2 },
0287     { 0x1, 0x3 },
0288 };
0289 static struct edge_tab edge32_tab_l[2] = {
0290     { 0x3, 0x1 },
0291     { 0x2, 0x3 },
0292 };
0293 
0294 static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
0295 {
0296     unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
0297     u16 left, right;
0298 
0299     maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
0300     orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
0301     orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
0302 
0303     if (test_thread_flag(TIF_32BIT)) {
0304         rs1 = rs1 & 0xffffffff;
0305         rs2 = rs2 & 0xffffffff;
0306     }
0307     switch (opf) {
0308     default:
0309     case EDGE8_OPF:
0310     case EDGE8N_OPF:
0311         left = edge8_tab[rs1 & 0x7].left;
0312         right = edge8_tab[rs2 & 0x7].right;
0313         break;
0314     case EDGE8L_OPF:
0315     case EDGE8LN_OPF:
0316         left = edge8_tab_l[rs1 & 0x7].left;
0317         right = edge8_tab_l[rs2 & 0x7].right;
0318         break;
0319 
0320     case EDGE16_OPF:
0321     case EDGE16N_OPF:
0322         left = edge16_tab[(rs1 >> 1) & 0x3].left;
0323         right = edge16_tab[(rs2 >> 1) & 0x3].right;
0324         break;
0325 
0326     case EDGE16L_OPF:
0327     case EDGE16LN_OPF:
0328         left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
0329         right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
0330         break;
0331 
0332     case EDGE32_OPF:
0333     case EDGE32N_OPF:
0334         left = edge32_tab[(rs1 >> 2) & 0x1].left;
0335         right = edge32_tab[(rs2 >> 2) & 0x1].right;
0336         break;
0337 
0338     case EDGE32L_OPF:
0339     case EDGE32LN_OPF:
0340         left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
0341         right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
0342         break;
0343     }
0344 
0345     if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
0346         rd_val = right & left;
0347     else
0348         rd_val = left;
0349 
0350     store_reg(regs, rd_val, RD(insn));
0351 
0352     switch (opf) {
0353     case EDGE8_OPF:
0354     case EDGE8L_OPF:
0355     case EDGE16_OPF:
0356     case EDGE16L_OPF:
0357     case EDGE32_OPF:
0358     case EDGE32L_OPF: {
0359         unsigned long ccr, tstate;
0360 
0361         __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
0362                      "rd    %%ccr, %0"
0363                      : "=r" (ccr)
0364                      : "r" (orig_rs1), "r" (orig_rs2)
0365                      : "cc");
0366         tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
0367         regs->tstate = tstate | (ccr << 32UL);
0368     }
0369     }
0370 }
0371 
0372 static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
0373 {
0374     unsigned long rs1, rs2, rd_val;
0375     unsigned int bits, bits_mask;
0376 
0377     maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
0378     rs1 = fetch_reg(RS1(insn), regs);
0379     rs2 = fetch_reg(RS2(insn), regs);
0380 
0381     bits = (rs2 > 5 ? 5 : rs2);
0382     bits_mask = (1UL << bits) - 1UL;
0383 
0384     rd_val = ((((rs1 >> 11) & 0x3) <<  0) |
0385           (((rs1 >> 33) & 0x3) <<  2) |
0386           (((rs1 >> 55) & 0x1) <<  4) |
0387           (((rs1 >> 13) & 0xf) <<  5) |
0388           (((rs1 >> 35) & 0xf) <<  9) |
0389           (((rs1 >> 56) & 0xf) << 13) |
0390           (((rs1 >> 17) & bits_mask) << 17) |
0391           (((rs1 >> 39) & bits_mask) << (17 + bits)) |
0392           (((rs1 >> 60) & 0xf)       << (17 + (2*bits))));
0393 
0394     switch (opf) {
0395     case ARRAY16_OPF:
0396         rd_val <<= 1;
0397         break;
0398 
0399     case ARRAY32_OPF:
0400         rd_val <<= 2;
0401     }
0402 
0403     store_reg(regs, rd_val, RD(insn));
0404 }
0405 
0406 static void bmask(struct pt_regs *regs, unsigned int insn)
0407 {
0408     unsigned long rs1, rs2, rd_val, gsr;
0409 
0410     maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
0411     rs1 = fetch_reg(RS1(insn), regs);
0412     rs2 = fetch_reg(RS2(insn), regs);
0413     rd_val = rs1 + rs2;
0414 
0415     store_reg(regs, rd_val, RD(insn));
0416 
0417     gsr = current_thread_info()->gsr[0] & 0xffffffff;
0418     gsr |= rd_val << 32UL;
0419     current_thread_info()->gsr[0] = gsr;
0420 }
0421 
0422 static void bshuffle(struct pt_regs *regs, unsigned int insn)
0423 {
0424     struct fpustate *f = FPUSTATE;
0425     unsigned long rs1, rs2, rd_val;
0426     unsigned long bmask, i;
0427 
0428     bmask = current_thread_info()->gsr[0] >> 32UL;
0429 
0430     rs1 = fpd_regval(f, RS1(insn));
0431     rs2 = fpd_regval(f, RS2(insn));
0432 
0433     rd_val = 0UL;
0434     for (i = 0; i < 8; i++) {
0435         unsigned long which = (bmask >> (i * 4)) & 0xf;
0436         unsigned long byte;
0437 
0438         if (which < 8)
0439             byte = (rs1 >> (which * 8)) & 0xff;
0440         else
0441             byte = (rs2 >> ((which-8)*8)) & 0xff;
0442         rd_val |= (byte << (i * 8));
0443     }
0444 
0445     *fpd_regaddr(f, RD(insn)) = rd_val;
0446 }
0447 
0448 static void pdist(struct pt_regs *regs, unsigned int insn)
0449 {
0450     struct fpustate *f = FPUSTATE;
0451     unsigned long rs1, rs2, *rd, rd_val;
0452     unsigned long i;
0453 
0454     rs1 = fpd_regval(f, RS1(insn));
0455     rs2 = fpd_regval(f, RS2(insn));
0456     rd = fpd_regaddr(f, RD(insn));
0457 
0458     rd_val = *rd;
0459 
0460     for (i = 0; i < 8; i++) {
0461         s16 s1, s2;
0462 
0463         s1 = (rs1 >> (56 - (i * 8))) & 0xff;
0464         s2 = (rs2 >> (56 - (i * 8))) & 0xff;
0465 
0466         /* Absolute value of difference. */
0467         s1 -= s2;
0468         if (s1 < 0)
0469             s1 = ~s1 + 1;
0470 
0471         rd_val += s1;
0472     }
0473 
0474     *rd = rd_val;
0475 }
0476 
0477 static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
0478 {
0479     struct fpustate *f = FPUSTATE;
0480     unsigned long rs1, rs2, gsr, scale, rd_val;
0481 
0482     gsr = current_thread_info()->gsr[0];
0483     scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
0484     switch (opf) {
0485     case FPACK16_OPF: {
0486         unsigned long byte;
0487 
0488         rs2 = fpd_regval(f, RS2(insn));
0489         rd_val = 0;
0490         for (byte = 0; byte < 4; byte++) {
0491             unsigned int val;
0492             s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
0493             int scaled = src << scale;
0494             int from_fixed = scaled >> 7;
0495 
0496             val = ((from_fixed < 0) ?
0497                    0 :
0498                    (from_fixed > 255) ?
0499                    255 : from_fixed);
0500 
0501             rd_val |= (val << (8 * byte));
0502         }
0503         *fps_regaddr(f, RD(insn)) = rd_val;
0504         break;
0505     }
0506 
0507     case FPACK32_OPF: {
0508         unsigned long word;
0509 
0510         rs1 = fpd_regval(f, RS1(insn));
0511         rs2 = fpd_regval(f, RS2(insn));
0512         rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
0513         for (word = 0; word < 2; word++) {
0514             unsigned long val;
0515             s32 src = (rs2 >> (word * 32UL));
0516             s64 scaled = src << scale;
0517             s64 from_fixed = scaled >> 23;
0518 
0519             val = ((from_fixed < 0) ?
0520                    0 :
0521                    (from_fixed > 255) ?
0522                    255 : from_fixed);
0523 
0524             rd_val |= (val << (32 * word));
0525         }
0526         *fpd_regaddr(f, RD(insn)) = rd_val;
0527         break;
0528     }
0529 
0530     case FPACKFIX_OPF: {
0531         unsigned long word;
0532 
0533         rs2 = fpd_regval(f, RS2(insn));
0534 
0535         rd_val = 0;
0536         for (word = 0; word < 2; word++) {
0537             long val;
0538             s32 src = (rs2 >> (word * 32UL));
0539             s64 scaled = src << scale;
0540             s64 from_fixed = scaled >> 16;
0541 
0542             val = ((from_fixed < -32768) ?
0543                    -32768 :
0544                    (from_fixed > 32767) ?
0545                    32767 : from_fixed);
0546 
0547             rd_val |= ((val & 0xffff) << (word * 16));
0548         }
0549         *fps_regaddr(f, RD(insn)) = rd_val;
0550         break;
0551     }
0552 
0553     case FEXPAND_OPF: {
0554         unsigned long byte;
0555 
0556         rs2 = fps_regval(f, RS2(insn));
0557 
0558         rd_val = 0;
0559         for (byte = 0; byte < 4; byte++) {
0560             unsigned long val;
0561             u8 src = (rs2 >> (byte * 8)) & 0xff;
0562 
0563             val = src << 4;
0564 
0565             rd_val |= (val << (byte * 16));
0566         }
0567         *fpd_regaddr(f, RD(insn)) = rd_val;
0568         break;
0569     }
0570 
0571     case FPMERGE_OPF: {
0572         rs1 = fps_regval(f, RS1(insn));
0573         rs2 = fps_regval(f, RS2(insn));
0574 
0575         rd_val = (((rs2 & 0x000000ff) <<  0) |
0576               ((rs1 & 0x000000ff) <<  8) |
0577               ((rs2 & 0x0000ff00) <<  8) |
0578               ((rs1 & 0x0000ff00) << 16) |
0579               ((rs2 & 0x00ff0000) << 16) |
0580               ((rs1 & 0x00ff0000) << 24) |
0581               ((rs2 & 0xff000000) << 24) |
0582               ((rs1 & 0xff000000) << 32));
0583         *fpd_regaddr(f, RD(insn)) = rd_val;
0584         break;
0585     }
0586     }
0587 }
0588 
0589 static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
0590 {
0591     struct fpustate *f = FPUSTATE;
0592     unsigned long rs1, rs2, rd_val;
0593 
0594     switch (opf) {
0595     case FMUL8x16_OPF: {
0596         unsigned long byte;
0597 
0598         rs1 = fps_regval(f, RS1(insn));
0599         rs2 = fpd_regval(f, RS2(insn));
0600 
0601         rd_val = 0;
0602         for (byte = 0; byte < 4; byte++) {
0603             u16 src1 = (rs1 >> (byte *  8)) & 0x00ff;
0604             s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
0605             u32 prod = src1 * src2;
0606             u16 scaled = ((prod & 0x00ffff00) >> 8);
0607 
0608             /* Round up.  */
0609             if (prod & 0x80)
0610                 scaled++;
0611             rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
0612         }
0613 
0614         *fpd_regaddr(f, RD(insn)) = rd_val;
0615         break;
0616     }
0617 
0618     case FMUL8x16AU_OPF:
0619     case FMUL8x16AL_OPF: {
0620         unsigned long byte;
0621         s16 src2;
0622 
0623         rs1 = fps_regval(f, RS1(insn));
0624         rs2 = fps_regval(f, RS2(insn));
0625 
0626         rd_val = 0;
0627         src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0);
0628         for (byte = 0; byte < 4; byte++) {
0629             u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
0630             u32 prod = src1 * src2;
0631             u16 scaled = ((prod & 0x00ffff00) >> 8);
0632 
0633             /* Round up.  */
0634             if (prod & 0x80)
0635                 scaled++;
0636             rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
0637         }
0638 
0639         *fpd_regaddr(f, RD(insn)) = rd_val;
0640         break;
0641     }
0642 
0643     case FMUL8SUx16_OPF:
0644     case FMUL8ULx16_OPF: {
0645         unsigned long byte, ushift;
0646 
0647         rs1 = fpd_regval(f, RS1(insn));
0648         rs2 = fpd_regval(f, RS2(insn));
0649 
0650         rd_val = 0;
0651         ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
0652         for (byte = 0; byte < 4; byte++) {
0653             u16 src1;
0654             s16 src2;
0655             u32 prod;
0656             u16 scaled;
0657 
0658             src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
0659             src2 = ((rs2 >> (16 * byte)) & 0xffff);
0660             prod = src1 * src2;
0661             scaled = ((prod & 0x00ffff00) >> 8);
0662 
0663             /* Round up.  */
0664             if (prod & 0x80)
0665                 scaled++;
0666             rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
0667         }
0668 
0669         *fpd_regaddr(f, RD(insn)) = rd_val;
0670         break;
0671     }
0672 
0673     case FMULD8SUx16_OPF:
0674     case FMULD8ULx16_OPF: {
0675         unsigned long byte, ushift;
0676 
0677         rs1 = fps_regval(f, RS1(insn));
0678         rs2 = fps_regval(f, RS2(insn));
0679 
0680         rd_val = 0;
0681         ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
0682         for (byte = 0; byte < 2; byte++) {
0683             u16 src1;
0684             s16 src2;
0685             u32 prod;
0686             u16 scaled;
0687 
0688             src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
0689             src2 = ((rs2 >> (16 * byte)) & 0xffff);
0690             prod = src1 * src2;
0691             scaled = ((prod & 0x00ffff00) >> 8);
0692 
0693             /* Round up.  */
0694             if (prod & 0x80)
0695                 scaled++;
0696             rd_val |= ((scaled & 0xffffUL) <<
0697                    ((byte * 32UL) + 7UL));
0698         }
0699         *fpd_regaddr(f, RD(insn)) = rd_val;
0700         break;
0701     }
0702     }
0703 }
0704 
0705 static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
0706 {
0707     struct fpustate *f = FPUSTATE;
0708     unsigned long rs1, rs2, rd_val, i;
0709 
0710     rs1 = fpd_regval(f, RS1(insn));
0711     rs2 = fpd_regval(f, RS2(insn));
0712 
0713     rd_val = 0;
0714 
0715     switch (opf) {
0716     case FCMPGT16_OPF:
0717         for (i = 0; i < 4; i++) {
0718             s16 a = (rs1 >> (i * 16)) & 0xffff;
0719             s16 b = (rs2 >> (i * 16)) & 0xffff;
0720 
0721             if (a > b)
0722                 rd_val |= 8 >> i;
0723         }
0724         break;
0725 
0726     case FCMPGT32_OPF:
0727         for (i = 0; i < 2; i++) {
0728             s32 a = (rs1 >> (i * 32)) & 0xffffffff;
0729             s32 b = (rs2 >> (i * 32)) & 0xffffffff;
0730 
0731             if (a > b)
0732                 rd_val |= 2 >> i;
0733         }
0734         break;
0735 
0736     case FCMPLE16_OPF:
0737         for (i = 0; i < 4; i++) {
0738             s16 a = (rs1 >> (i * 16)) & 0xffff;
0739             s16 b = (rs2 >> (i * 16)) & 0xffff;
0740 
0741             if (a <= b)
0742                 rd_val |= 8 >> i;
0743         }
0744         break;
0745 
0746     case FCMPLE32_OPF:
0747         for (i = 0; i < 2; i++) {
0748             s32 a = (rs1 >> (i * 32)) & 0xffffffff;
0749             s32 b = (rs2 >> (i * 32)) & 0xffffffff;
0750 
0751             if (a <= b)
0752                 rd_val |= 2 >> i;
0753         }
0754         break;
0755 
0756     case FCMPNE16_OPF:
0757         for (i = 0; i < 4; i++) {
0758             s16 a = (rs1 >> (i * 16)) & 0xffff;
0759             s16 b = (rs2 >> (i * 16)) & 0xffff;
0760 
0761             if (a != b)
0762                 rd_val |= 8 >> i;
0763         }
0764         break;
0765 
0766     case FCMPNE32_OPF:
0767         for (i = 0; i < 2; i++) {
0768             s32 a = (rs1 >> (i * 32)) & 0xffffffff;
0769             s32 b = (rs2 >> (i * 32)) & 0xffffffff;
0770 
0771             if (a != b)
0772                 rd_val |= 2 >> i;
0773         }
0774         break;
0775 
0776     case FCMPEQ16_OPF:
0777         for (i = 0; i < 4; i++) {
0778             s16 a = (rs1 >> (i * 16)) & 0xffff;
0779             s16 b = (rs2 >> (i * 16)) & 0xffff;
0780 
0781             if (a == b)
0782                 rd_val |= 8 >> i;
0783         }
0784         break;
0785 
0786     case FCMPEQ32_OPF:
0787         for (i = 0; i < 2; i++) {
0788             s32 a = (rs1 >> (i * 32)) & 0xffffffff;
0789             s32 b = (rs2 >> (i * 32)) & 0xffffffff;
0790 
0791             if (a == b)
0792                 rd_val |= 2 >> i;
0793         }
0794         break;
0795     }
0796 
0797     maybe_flush_windows(0, 0, RD(insn), 0);
0798     store_reg(regs, rd_val, RD(insn));
0799 }
0800 
0801 /* Emulate the VIS instructions which are not implemented in
0802  * hardware on Niagara.
0803  */
0804 int vis_emul(struct pt_regs *regs, unsigned int insn)
0805 {
0806     unsigned long pc = regs->tpc;
0807     unsigned int opf;
0808 
0809     BUG_ON(regs->tstate & TSTATE_PRIV);
0810 
0811     perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
0812 
0813     if (test_thread_flag(TIF_32BIT))
0814         pc = (u32)pc;
0815 
0816     if (get_user(insn, (u32 __user *) pc))
0817         return -EFAULT;
0818 
0819     save_and_clear_fpu();
0820 
0821     opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
0822     switch (opf) {
0823     default:
0824         return -EINVAL;
0825 
0826     /* Pixel Formatting Instructions.  */
0827     case FPACK16_OPF:
0828     case FPACK32_OPF:
0829     case FPACKFIX_OPF:
0830     case FEXPAND_OPF:
0831     case FPMERGE_OPF:
0832         pformat(regs, insn, opf);
0833         break;
0834 
0835     /* Partitioned Multiply Instructions  */
0836     case FMUL8x16_OPF:
0837     case FMUL8x16AU_OPF:
0838     case FMUL8x16AL_OPF:
0839     case FMUL8SUx16_OPF:
0840     case FMUL8ULx16_OPF:
0841     case FMULD8SUx16_OPF:
0842     case FMULD8ULx16_OPF:
0843         pmul(regs, insn, opf);
0844         break;
0845 
0846     /* Pixel Compare Instructions  */
0847     case FCMPGT16_OPF:
0848     case FCMPGT32_OPF:
0849     case FCMPLE16_OPF:
0850     case FCMPLE32_OPF:
0851     case FCMPNE16_OPF:
0852     case FCMPNE32_OPF:
0853     case FCMPEQ16_OPF:
0854     case FCMPEQ32_OPF:
0855         pcmp(regs, insn, opf);
0856         break;
0857 
0858     /* Edge Handling Instructions  */
0859     case EDGE8_OPF:
0860     case EDGE8N_OPF:
0861     case EDGE8L_OPF:
0862     case EDGE8LN_OPF:
0863     case EDGE16_OPF:
0864     case EDGE16N_OPF:
0865     case EDGE16L_OPF:
0866     case EDGE16LN_OPF:
0867     case EDGE32_OPF:
0868     case EDGE32N_OPF:
0869     case EDGE32L_OPF:
0870     case EDGE32LN_OPF:
0871         edge(regs, insn, opf);
0872         break;
0873 
0874     /* Pixel Component Distance  */
0875     case PDIST_OPF:
0876         pdist(regs, insn);
0877         break;
0878 
0879     /* Three-Dimensional Array Addressing Instructions  */
0880     case ARRAY8_OPF:
0881     case ARRAY16_OPF:
0882     case ARRAY32_OPF:
0883         array(regs, insn, opf);
0884         break;
0885 
0886     /* Byte Mask and Shuffle Instructions  */
0887     case BMASK_OPF:
0888         bmask(regs, insn);
0889         break;
0890 
0891     case BSHUFFLE_OPF:
0892         bshuffle(regs, insn);
0893         break;
0894     }
0895 
0896     regs->tpc = regs->tnpc;
0897     regs->tnpc += 4;
0898     return 0;
0899 }