Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * linux/arch/alpha/lib/memset.S
0004  *
0005  * This is an efficient (and small) implementation of the C library "memset()"
0006  * function for the alpha.
0007  *
0008  *  (C) Copyright 1996 Linus Torvalds
0009  *
0010  * This routine is "moral-ware": you are free to use it any way you wish, and
0011  * the only obligation I put on you is a moral one: if you make any improvements
0012  * to the routine, please send me your improvements for me to use similarly.
0013  *
0014  * The scheduling comments are according to the EV5 documentation (and done by 
0015  * hand, so they might well be incorrect, please do tell me about it..)
0016  */
0017 #include <asm/export.h>
0018     .set noat
0019     .set noreorder
0020 .text
0021     .globl memset
0022     .globl __memset
0023     .globl ___memset
0024     .globl __memset16
0025     .globl __constant_c_memset
0026 
0027     .ent ___memset
0028 .align 5
0029 ___memset:
0030     .frame $30,0,$26,0
0031     .prologue 0
0032 
0033     and $17,255,$1      /* E1 */
0034     insbl $17,1,$17     /* .. E0 */
0035     bis $17,$1,$17      /* E0 (p-c latency, next cycle) */
0036     sll $17,16,$1       /* E1 (p-c latency, next cycle) */
0037 
0038     bis $17,$1,$17      /* E0 (p-c latency, next cycle) */
0039     sll $17,32,$1       /* E1 (p-c latency, next cycle) */
0040     bis $17,$1,$17      /* E0 (p-c latency, next cycle) */
0041     ldq_u $31,0($30)    /* .. E1 */
0042 
0043 .align 5
0044 __constant_c_memset:
0045     addq $18,$16,$6     /* E0 */
0046     bis $16,$16,$0      /* .. E1 */
0047     xor $16,$6,$1       /* E0 */
0048     ble $18,end     /* .. E1 */
0049 
0050     bic $1,7,$1     /* E0 */
0051     beq $1,within_one_quad  /* .. E1 (note EV5 zero-latency forwarding) */
0052     and $16,7,$3        /* E0 */
0053     beq $3,aligned      /* .. E1 (note EV5 zero-latency forwarding) */
0054 
0055     ldq_u $4,0($16)     /* E0 */
0056     bis $16,$16,$5      /* .. E1 */
0057     insql $17,$16,$2    /* E0 */
0058     subq $3,8,$3        /* .. E1 */
0059 
0060     addq $18,$3,$18     /* E0   $18 is new count ($3 is negative) */
0061     mskql $4,$16,$4     /* .. E1 (and possible load stall) */
0062     subq $16,$3,$16     /* E0   $16 is new aligned destination */
0063     bis $2,$4,$1        /* .. E1 */
0064 
0065     bis $31,$31,$31     /* E0 */
0066     ldq_u $31,0($30)    /* .. E1 */
0067     stq_u $1,0($5)      /* E0 */
0068     bis $31,$31,$31     /* .. E1 */
0069 
0070 .align 4
0071 aligned:
0072     sra $18,3,$3        /* E0 */
0073     and $18,7,$18       /* .. E1 */
0074     bis $16,$16,$5      /* E0 */
0075     beq $3,no_quad      /* .. E1 */
0076 
0077 .align 3
0078 loop:
0079     stq $17,0($5)       /* E0 */
0080     subq $3,1,$3        /* .. E1 */
0081     addq $5,8,$5        /* E0 */
0082     bne $3,loop     /* .. E1 */
0083 
0084 no_quad:
0085     bis $31,$31,$31     /* E0 */
0086     beq $18,end     /* .. E1 */
0087     ldq $7,0($5)        /* E0 */
0088     mskqh $7,$6,$2      /* .. E1 (and load stall) */
0089 
0090     insqh $17,$6,$4     /* E0 */
0091     bis $2,$4,$1        /* .. E1 */
0092     stq $1,0($5)        /* E0 */
0093     ret $31,($26),1     /* .. E1 */
0094 
0095 .align 3
0096 within_one_quad:
0097     ldq_u $1,0($16)     /* E0 */
0098     insql $17,$16,$2    /* E1 */
0099     mskql $1,$16,$4     /* E0 (after load stall) */
0100     bis $2,$4,$2        /* E0 */
0101 
0102     mskql $2,$6,$4      /* E0 */
0103     mskqh $1,$6,$2      /* .. E1 */
0104     bis $2,$4,$1        /* E0 */
0105     stq_u $1,0($16)     /* E0 */
0106 
0107 end:
0108     ret $31,($26),1     /* E1 */
0109     .end ___memset
0110 EXPORT_SYMBOL(___memset)
0111 EXPORT_SYMBOL(__constant_c_memset)
0112 
0113     .align 5
0114     .ent __memset16
0115 __memset16:
0116     .prologue 0
0117 
0118     inswl $17,0,$1      /* E0 */
0119     inswl $17,2,$2      /* E0 */
0120     inswl $17,4,$3      /* E0 */
0121     or $1,$2,$1     /* .. E1 */
0122     inswl $17,6,$4      /* E0 */
0123     or $1,$3,$1     /* .. E1 */
0124     or $1,$4,$17        /* E0 */
0125     br __constant_c_memset  /* .. E1 */
0126 
0127     .end __memset16
0128 EXPORT_SYMBOL(__memset16)
0129 
0130 memset = ___memset
0131 __memset = ___memset
0132     EXPORT_SYMBOL(memset)
0133     EXPORT_SYMBOL(__memset)