Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * arch/alpha/lib/stxcpy.S
0004  * Contributed by Richard Henderson (rth@tamu.edu)
0005  *
0006  * Copy a null-terminated string from SRC to DST.
0007  *
0008  * This is an internal routine used by strcpy, stpcpy, and strcat.
0009  * As such, it uses special linkage conventions to make implementation
0010  * of these public functions more efficient.
0011  *
0012  * On input:
0013  *  t9 = return address
0014  *  a0 = DST
0015  *  a1 = SRC
0016  *
0017  * On output:
0018  *  t12 = bitmask (with one bit set) indicating the last byte written
0019  *  a0  = unaligned address of the last *word* written
0020  *
0021  * Furthermore, v0, a3-a5, t11, and t12 are untouched.
0022  */
0023 
0024 #include <asm/regdef.h>
0025 
0026     .set noat
0027     .set noreorder
0028 
0029     .text
0030 
0031 /* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
0032    doesn't like putting the entry point for a procedure somewhere in the
0033    middle of the procedure descriptor.  Work around this by putting the
0034    aligned copy in its own procedure descriptor */
0035 
0036     .ent stxcpy_aligned
0037     .align 3
0038 stxcpy_aligned:
0039     .frame sp, 0, t9
0040     .prologue 0
0041 
0042     /* On entry to this basic block:
0043        t0 == the first destination word for masking back in
0044        t1 == the first source word.  */
0045 
0046     /* Create the 1st output word and detect 0's in the 1st input word.  */
0047     lda t2, -1      # e1    : build a mask against false zero
0048     mskqh   t2, a1, t2  # e0    :   detection in the src word
0049     mskqh   t1, a1, t3  # e0    :
0050     ornot   t1, t2, t2  # .. e1 :
0051     mskql   t0, a1, t0  # e0    : assemble the first output word
0052     cmpbge  zero, t2, t8    # .. e1 : bits set iff null found
0053     or  t0, t3, t1  # e0    :
0054     bne t8, $a_eos  # .. e1 :
0055 
0056     /* On entry to this basic block:
0057        t0 == the first destination word for masking back in
0058        t1 == a source word not containing a null.  */
0059 
0060 $a_loop:
0061     stq_u   t1, 0(a0)   # e0    :
0062     addq    a0, 8, a0   # .. e1 :
0063     ldq_u   t1, 0(a1)   # e0    :
0064     addq    a1, 8, a1   # .. e1 :
0065     cmpbge  zero, t1, t8    # e0 (stall)
0066     beq t8, $a_loop # .. e1 (zdb)
0067 
0068     /* Take care of the final (partial) word store.
0069        On entry to this basic block we have:
0070        t1 == the source word containing the null
0071        t8 == the cmpbge mask that found it.  */
0072 $a_eos:
0073     negq    t8, t6      # e0    : find low bit set
0074     and t8, t6, t12 # e1 (stall)
0075 
0076     /* For the sake of the cache, don't read a destination word
0077        if we're not going to need it.  */
0078     and t12, 0x80, t6   # e0    :
0079     bne t6, 1f      # .. e1 (zdb)
0080 
0081     /* We're doing a partial word store and so need to combine
0082        our source and original destination words.  */
0083     ldq_u   t0, 0(a0)   # e0    :
0084     subq    t12, 1, t6  # .. e1 :
0085     zapnot  t1, t6, t1  # e0    : clear src bytes >= null
0086     or  t12, t6, t8 # .. e1 :
0087     zap t0, t8, t0  # e0    : clear dst bytes <= null
0088     or  t0, t1, t1  # e1    :
0089 
0090 1:  stq_u   t1, 0(a0)   # e0    :
0091     ret (t9)        # .. e1 :
0092 
0093     .end stxcpy_aligned
0094 
0095     .align 3
0096     .ent __stxcpy
0097     .globl __stxcpy
0098 __stxcpy:
0099     .frame sp, 0, t9
0100     .prologue 0
0101 
0102     /* Are source and destination co-aligned?  */
0103     xor a0, a1, t0  # e0    :
0104     unop            #       :
0105     and t0, 7, t0   # e0    :
0106     bne t0, $unaligned  # .. e1 :
0107 
0108     /* We are co-aligned; take care of a partial first word.  */
0109     ldq_u   t1, 0(a1)   # e0    : load first src word
0110     and a0, 7, t0   # .. e1 : take care not to load a word ...
0111     addq    a1, 8, a1       # e0    :
0112     beq t0, stxcpy_aligned  # .. e1 : ... if we wont need it
0113     ldq_u   t0, 0(a0)   # e0    :
0114     br  stxcpy_aligned  # .. e1 :
0115 
0116 
0117 /* The source and destination are not co-aligned.  Align the destination
0118    and cope.  We have to be very careful about not reading too much and
0119    causing a SEGV.  */
0120 
0121     .align 3
0122 $u_head:
0123     /* We know just enough now to be able to assemble the first
0124        full source word.  We can still find a zero at the end of it
0125        that prevents us from outputting the whole thing.
0126 
0127        On entry to this basic block:
0128        t0 == the first dest word, for masking back in, if needed else 0
0129        t1 == the low bits of the first source word
0130        t6 == bytemask that is -1 in dest word bytes */
0131 
0132     ldq_u   t2, 8(a1)   # e0    :
0133     addq    a1, 8, a1   # .. e1 :
0134 
0135     extql   t1, a1, t1  # e0    :
0136     extqh   t2, a1, t4  # e0    :
0137     mskql   t0, a0, t0  # e0    :
0138     or  t1, t4, t1  # .. e1 :
0139     mskqh   t1, a0, t1  # e0    :
0140     or  t0, t1, t1  # e1    :
0141 
0142     or  t1, t6, t6  # e0    :
0143     cmpbge  zero, t6, t8    # .. e1 :
0144     lda t6, -1      # e0    : for masking just below
0145     bne t8, $u_final    # .. e1 :
0146 
0147     mskql   t6, a1, t6      # e0    : mask out the bits we have
0148     or  t6, t2, t2      # e1    :   already extracted before
0149     cmpbge  zero, t2, t8        # e0    :   testing eos
0150     bne t8, $u_late_head_exit   # .. e1 (zdb)
0151 
0152     /* Finally, we've got all the stupid leading edge cases taken care
0153        of and we can set up to enter the main loop.  */
0154 
0155     stq_u   t1, 0(a0)   # e0    : store first output word
0156     addq    a0, 8, a0   # .. e1 :
0157     extql   t2, a1, t0  # e0    : position ho-bits of lo word
0158     ldq_u   t2, 8(a1)   # .. e1 : read next high-order source word
0159     addq    a1, 8, a1   # e0    :
0160     cmpbge  zero, t2, t8    # .. e1 :
0161     nop         # e0    :
0162     bne t8, $u_eos  # .. e1 :
0163 
0164     /* Unaligned copy main loop.  In order to avoid reading too much,
0165        the loop is structured to detect zeros in aligned source words.
0166        This has, unfortunately, effectively pulled half of a loop
0167        iteration out into the head and half into the tail, but it does
0168        prevent nastiness from accumulating in the very thing we want
0169        to run as fast as possible.
0170 
0171        On entry to this basic block:
0172        t0 == the shifted high-order bits from the previous source word
0173        t2 == the unshifted current source word
0174 
0175        We further know that t2 does not contain a null terminator.  */
0176 
0177     .align 3
0178 $u_loop:
0179     extqh   t2, a1, t1  # e0    : extract high bits for current word
0180     addq    a1, 8, a1   # .. e1 :
0181     extql   t2, a1, t3  # e0    : extract low bits for next time
0182     addq    a0, 8, a0   # .. e1 :
0183     or  t0, t1, t1  # e0    : current dst word now complete
0184     ldq_u   t2, 0(a1)   # .. e1 : load high word for next time
0185     stq_u   t1, -8(a0)  # e0    : save the current word
0186     mov t3, t0      # .. e1 :
0187     cmpbge  zero, t2, t8    # e0    : test new word for eos
0188     beq t8, $u_loop # .. e1 :
0189 
0190     /* We've found a zero somewhere in the source word we just read.
0191        If it resides in the lower half, we have one (probably partial)
0192        word to write out, and if it resides in the upper half, we
0193        have one full and one partial word left to write out.
0194 
0195        On entry to this basic block:
0196        t0 == the shifted high-order bits from the previous source word
0197        t2 == the unshifted current source word.  */
0198 $u_eos:
0199     extqh   t2, a1, t1  # e0    :
0200     or  t0, t1, t1  # e1    : first (partial) source word complete
0201 
0202     cmpbge  zero, t1, t8    # e0    : is the null in this first bit?
0203     bne t8, $u_final    # .. e1 (zdb)
0204 
0205 $u_late_head_exit:
0206     stq_u   t1, 0(a0)   # e0    : the null was in the high-order bits
0207     addq    a0, 8, a0   # .. e1 :
0208     extql   t2, a1, t1  # e0    :
0209     cmpbge  zero, t1, t8    # .. e1 :
0210 
0211     /* Take care of a final (probably partial) result word.
0212        On entry to this basic block:
0213        t1 == assembled source word
0214        t8 == cmpbge mask that found the null.  */
0215 $u_final:
0216     negq    t8, t6      # e0    : isolate low bit set
0217     and t6, t8, t12 # e1    :
0218 
0219     and t12, 0x80, t6   # e0    : avoid dest word load if we can
0220     bne t6, 1f      # .. e1 (zdb)
0221 
0222     ldq_u   t0, 0(a0)   # e0    :
0223     subq    t12, 1, t6  # .. e1 :
0224     or  t6, t12, t8 # e0    :
0225     zapnot  t1, t6, t1  # .. e1 : kill source bytes >= null
0226     zap t0, t8, t0  # e0    : kill dest bytes <= null
0227     or  t0, t1, t1  # e1    :
0228 
0229 1:  stq_u   t1, 0(a0)   # e0    :
0230     ret (t9)        # .. e1 :
0231 
0232     /* Unaligned copy entry point.  */
0233     .align 3
0234 $unaligned:
0235 
0236     ldq_u   t1, 0(a1)   # e0    : load first source word
0237 
0238     and a0, 7, t4   # .. e1 : find dest misalignment
0239     and a1, 7, t5   # e0    : find src misalignment
0240 
0241     /* Conditionally load the first destination word and a bytemask
0242        with 0xff indicating that the destination byte is sacrosanct.  */
0243 
0244     mov zero, t0    # .. e1 :
0245     mov zero, t6    # e0    :
0246     beq t4, 1f      # .. e1 :
0247     ldq_u   t0, 0(a0)   # e0    :
0248     lda t6, -1      # .. e1 :
0249     mskql   t6, a0, t6  # e0    :
0250 1:
0251     subq    a1, t4, a1  # .. e1 : sub dest misalignment from src addr
0252 
0253     /* If source misalignment is larger than dest misalignment, we need
0254        extra startup checks to avoid SEGV.  */
0255 
0256     cmplt   t4, t5, t12 # e0    :
0257     beq t12, $u_head    # .. e1 (zdb)
0258 
0259     lda t2, -1      # e1    : mask out leading garbage in source
0260     mskqh   t2, t5, t2  # e0    :
0261     nop         # e0    :
0262     ornot   t1, t2, t3  # .. e1 :
0263     cmpbge  zero, t3, t8    # e0    : is there a zero?
0264     beq t8, $u_head # .. e1 (zdb)
0265 
0266     /* At this point we've found a zero in the first partial word of
0267        the source.  We need to isolate the valid source data and mask
0268        it into the original destination data.  (Incidentally, we know
0269        that we'll need at least one byte of that original dest word.) */
0270 
0271     ldq_u   t0, 0(a0)   # e0    :
0272 
0273     negq    t8, t6      # .. e1 : build bitmask of bytes <= zero
0274     and t6, t8, t12 # e0    :
0275     and a1, 7, t5   # .. e1 :
0276     subq    t12, 1, t6  # e0    :
0277     or  t6, t12, t8 # e1    :
0278     srl t12, t5, t12    # e0    : adjust final null return value
0279 
0280     zapnot  t2, t8, t2  # .. e1 : prepare source word; mirror changes
0281     and t1, t2, t1  # e1    : to source validity mask
0282     extql   t2, a1, t2  # .. e0 :
0283     extql   t1, a1, t1  # e0    :
0284 
0285     andnot  t0, t2, t0  # .. e1 : zero place for source to reside
0286     or  t0, t1, t1  # e1    : and put it there
0287     stq_u   t1, 0(a0)   # .. e0 :
0288     ret (t9)        # e1    :
0289 
0290     .end __stxcpy