Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 */
0002 /*
0003  * strlen() for PPC32
0004  *
0005  * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
0006  *
0007  * Inspired from glibc implementation
0008  */
0009 #include <asm/ppc_asm.h>
0010 #include <asm/export.h>
0011 #include <asm/cache.h>
0012 
0013     .text
0014 
0015 /*
0016  * Algorithm:
0017  *
0018  * 1) Given a word 'x', we can test to see if it contains any 0 bytes
0019  *    by subtracting 0x01010101, and seeing if any of the high bits of each
0020  *    byte changed from 0 to 1. This works because the least significant
0021  *    0 byte must have had no incoming carry (otherwise it's not the least
0022  *    significant), so it is 0x00 - 0x01 == 0xff. For all other
0023  *    byte values, either they have the high bit set initially, or when
0024  *    1 is subtracted you get a value in the range 0x00-0x7f, none of which
0025  *    have their high bit set. The expression here is
0026  *    (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
0027  *    there were no 0x00 bytes in the word.  You get 0x80 in bytes that
0028  *    match, but possibly false 0x80 matches in the next more significant
0029  *    byte to a true match due to carries.  For little-endian this is
0030  *    of no consequence since the least significant match is the one
0031  *    we're interested in, but big-endian needs method 2 to find which
0032  *    byte matches.
0033  * 2) Given a word 'x', we can test to see _which_ byte was zero by
0034  *    calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
0035  *    This produces 0x80 in each byte that was zero, and 0x00 in all
0036  *    the other bytes. The '| ~0x80808080' clears the low 7 bits in each
0037  *    byte, and the '| x' part ensures that bytes with the high bit set
0038  *    produce 0x00. The addition will carry into the high bit of each byte
0039  *    iff that byte had one of its low 7 bits set. We can then just see
0040  *    which was the most significant bit set and divide by 8 to find how
0041  *    many to add to the index.
0042  *    This is from the book 'The PowerPC Compiler Writer's Guide',
0043  *    by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
0044  */
0045 
0046 _GLOBAL(strlen)
0047     andi.   r0, r3, 3
0048     lis r7, 0x0101
0049     addi    r10, r3, -4
0050     addic   r7, r7, 0x0101  /* r7 = 0x01010101 (lomagic) & clear XER[CA] */
0051     rotlwi  r6, r7, 31  /* r6 = 0x80808080 (himagic) */
0052     bne-    3f
0053     .balign IFETCH_ALIGN_BYTES
0054 1:  lwzu    r9, 4(r10)
0055 2:  subf    r8, r7, r9
0056     and.    r8, r8, r6
0057     beq+    1b
0058     andc.   r8, r8, r9
0059     beq+    1b
0060     andc    r8, r9, r6
0061     orc r9, r9, r6
0062     subfe   r8, r6, r8
0063     nor r8, r8, r9
0064     cntlzw  r8, r8
0065     subf    r3, r3, r10
0066     srwi    r8, r8, 3
0067     add r3, r3, r8
0068     blr
0069 
0070     /* Missaligned string: make sure bytes before string are seen not 0 */
0071 3:  xor r10, r10, r0
0072     orc r8, r8, r8
0073     lwzu    r9, 4(r10)
0074     slwi    r0, r0, 3
0075     srw r8, r8, r0
0076     orc r9, r9, r8
0077     b   2b
0078 EXPORT_SYMBOL(strlen)