| /* |
| * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. |
| * |
| * SPDX-License-Identifier: GPL-2.0+ |
| */ |
| |
| /* |
| * This is optimized primarily for the ARC700. |
| * It would be possible to speed up the loops by one cycle / word |
| * respective one cycle / byte by forcing double source 1 alignment, unrolling |
| * by a factor of two, and speculatively loading the second word / byte of |
| * source 1; however, that would increase the overhead for loop setup / finish, |
| * and strcmp might often terminate early. |
| */ |
| |
| .global strcmp |
| .align 4 |
| strcmp: |
| or %r2, %r0, %r1 |
| bmsk_s %r2, %r2, 1 |
| brne %r2, 0, .Lcharloop |
| mov_s %r12, 0x01010101 |
| ror %r5, %r12 |
| .Lwordloop: |
| ld.ab %r2, [%r0, 4] |
| ld.ab %r3, [%r1, 4] |
| nop_s |
| sub %r4, %r2, %r12 |
| bic %r4, %r4, %r2 |
| and %r4, %r4, %r5 |
| brne %r4, 0, .Lfound0 |
| breq %r2 ,%r3, .Lwordloop |
| #ifdef __LITTLE_ENDIAN__ |
| xor %r0, %r2, %r3 /* mask for difference */ |
| sub_s %r1, %r0, 1 |
| bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ |
| sub %r1, %r5, %r0 |
| xor %r0, %r5, %r1 /* mask for least significant difference byte */ |
| and_s %r2, %r2, %r0 |
| and_s %r3, %r3, %r0 |
| #endif /* _ENDIAN__ */ |
| cmp_s %r2, %r3 |
| mov_s %r0, 1 |
| j_s.d [%blink] |
| bset.lo %r0, %r0, 31 |
| |
| .balign 4 |
| #ifdef __LITTLE_ENDIAN__ |
| .Lfound0: |
| xor %r0, %r2, %r3 /* mask for difference */ |
| or %r0, %r0, %r4 /* or in zero indicator */ |
| sub_s %r1, %r0, 1 |
| bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ |
| sub %r1, %r5, %r0 |
| xor %r0, %r5, %r1 /* mask for least significant difference byte */ |
| and_s %r2, %r2, %r0 |
| and_s %r3, %r3, %r0 |
| sub.f %r0, %r2, %r3 |
| mov.hi %r0, 1 |
| j_s.d [%blink] |
| bset.lo %r0, %r0, 31 |
| #else /* __BIG_ENDIAN__ */ |
| /* |
| * The zero-detection above can mis-detect 0x01 bytes as zeroes |
| * because of carry-propagateion from a lower significant zero byte. |
| * We can compensate for this by checking that bit0 is zero. |
| * This compensation is not necessary in the step where we |
| * get a low estimate for r2, because in any affected bytes |
| * we already have 0x00 or 0x01, which will remain unchanged |
| * when bit 7 is cleared. |
| */ |
| .balign 4 |
| .Lfound0: |
| lsr %r0, %r4, 8 |
| lsr_s %r1, %r2 |
| bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */ |
| bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */ |
| or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */ |
| cmp_s %r3, %r2 /* ... be independent of trailing garbage */ |
| or_s %r2, %r2, %r0 /* likewise for r3 > r2 */ |
| bic_s %r3, %r3, %r0 |
| rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */ |
| cmp_s %r2, %r3 |
| j_s.d [%blink] |
| bset.lo %r0, %r0, 31 |
| #endif /* _ENDIAN__ */ |
| |
| .balign 4 |
| .Lcharloop: |
| ldb.ab %r2,[%r0,1] |
| ldb.ab %r3,[%r1,1] |
| nop_s |
| breq %r2, 0, .Lcmpend |
| breq %r2, %r3, .Lcharloop |
| .Lcmpend: |
| j_s.d [%blink] |
| sub %r0, %r2, %r3 |