Simon Glass | 9ab6049 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2010, Google Inc. |
| 3 | * |
| 4 | * Brought in from coreboot uldivmod.S |
| 5 | * |
| 6 | * SPDX-License-Identifier: GPL-2.0 |
| 7 | */ |
| 8 | |
| 9 | #include <linux/linkage.h> |
| 10 | #include <asm/assembler.h> |
| 11 | |
Simon Glass | 9ab6049 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 12 | /* |
| 13 | * A, Q = r0 + (r1 << 32) |
| 14 | * B, R = r2 + (r3 << 32) |
| 15 | * A / B = Q ... R |
| 16 | */ |
| 17 | |
| 18 | A_0 .req r0 |
| 19 | A_1 .req r1 |
| 20 | B_0 .req r2 |
| 21 | B_1 .req r3 |
| 22 | C_0 .req r4 |
| 23 | C_1 .req r5 |
| 24 | D_0 .req r6 |
| 25 | D_1 .req r7 |
| 26 | |
| 27 | Q_0 .req r0 |
| 28 | Q_1 .req r1 |
| 29 | R_0 .req r2 |
| 30 | R_1 .req r3 |
| 31 | |
| 32 | THUMB( |
| 33 | TMP .req r8 |
| 34 | ) |
| 35 | |
Marek Vasut | 13b0a91 | 2016-05-26 18:01:46 +0200 | [diff] [blame] | 36 | .pushsection .text.__aeabi_uldivmod, "ax" |
Stephen Warren | b2f1858 | 2016-06-03 13:05:11 -0600 | [diff] [blame] | 37 | ENTRY(__aeabi_uldivmod) |
Marek Vasut | 13b0a91 | 2016-05-26 18:01:46 +0200 | [diff] [blame] | 38 | |
Simon Glass | 9ab6049 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 39 | stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} |
| 40 | @ Test if B == 0 |
| 41 | orrs ip, B_0, B_1 @ Z set -> B == 0 |
| 42 | beq L_div_by_0 |
| 43 | @ Test if B is power of 2: (B & (B - 1)) == 0 |
| 44 | subs C_0, B_0, #1 |
| 45 | sbc C_1, B_1, #0 |
| 46 | tst C_0, B_0 |
| 47 | tsteq B_1, C_1 |
| 48 | beq L_pow2 |
| 49 | @ Test if A_1 == B_1 == 0 |
| 50 | orrs ip, A_1, B_1 |
| 51 | beq L_div_32_32 |
| 52 | |
| 53 | L_div_64_64: |
| 54 | /* CLZ only exists in ARM architecture version 5 and above. */ |
| 55 | #ifdef HAVE_CLZ |
| 56 | mov C_0, #1 |
| 57 | mov C_1, #0 |
| 58 | @ D_0 = clz A |
| 59 | teq A_1, #0 |
| 60 | clz D_0, A_1 |
| 61 | clzeq ip, A_0 |
| 62 | addeq D_0, D_0, ip |
| 63 | @ D_1 = clz B |
| 64 | teq B_1, #0 |
| 65 | clz D_1, B_1 |
| 66 | clzeq ip, B_0 |
| 67 | addeq D_1, D_1, ip |
| 68 | @ if clz B - clz A > 0 |
| 69 | subs D_0, D_1, D_0 |
| 70 | bls L_done_shift |
| 71 | @ B <<= (clz B - clz A) |
| 72 | subs D_1, D_0, #32 |
| 73 | rsb ip, D_0, #32 |
| 74 | movmi B_1, B_1, lsl D_0 |
| 75 | ARM( orrmi B_1, B_1, B_0, lsr ip ) |
| 76 | THUMB( lsrmi TMP, B_0, ip ) |
| 77 | THUMB( orrmi B_1, B_1, TMP ) |
| 78 | movpl B_1, B_0, lsl D_1 |
| 79 | mov B_0, B_0, lsl D_0 |
| 80 | @ C = 1 << (clz B - clz A) |
| 81 | movmi C_1, C_1, lsl D_0 |
| 82 | ARM( orrmi C_1, C_1, C_0, lsr ip ) |
| 83 | THUMB( lsrmi TMP, C_0, ip ) |
| 84 | THUMB( orrmi C_1, C_1, TMP ) |
| 85 | movpl C_1, C_0, lsl D_1 |
| 86 | mov C_0, C_0, lsl D_0 |
| 87 | L_done_shift: |
| 88 | mov D_0, #0 |
| 89 | mov D_1, #0 |
| 90 | @ C: current bit; D: result |
| 91 | #else |
| 92 | @ C: current bit; D: result |
| 93 | mov C_0, #1 |
| 94 | mov C_1, #0 |
| 95 | mov D_0, #0 |
| 96 | mov D_1, #0 |
| 97 | L_lsl_4: |
| 98 | cmp B_1, #0x10000000 |
| 99 | cmpcc B_1, A_1 |
| 100 | cmpeq B_0, A_0 |
| 101 | bcs L_lsl_1 |
| 102 | @ B <<= 4 |
| 103 | mov B_1, B_1, lsl #4 |
| 104 | orr B_1, B_1, B_0, lsr #28 |
| 105 | mov B_0, B_0, lsl #4 |
| 106 | @ C <<= 4 |
| 107 | mov C_1, C_1, lsl #4 |
| 108 | orr C_1, C_1, C_0, lsr #28 |
| 109 | mov C_0, C_0, lsl #4 |
| 110 | b L_lsl_4 |
| 111 | L_lsl_1: |
| 112 | cmp B_1, #0x80000000 |
| 113 | cmpcc B_1, A_1 |
| 114 | cmpeq B_0, A_0 |
| 115 | bcs L_subtract |
| 116 | @ B <<= 1 |
| 117 | mov B_1, B_1, lsl #1 |
| 118 | orr B_1, B_1, B_0, lsr #31 |
| 119 | mov B_0, B_0, lsl #1 |
| 120 | @ C <<= 1 |
| 121 | mov C_1, C_1, lsl #1 |
| 122 | orr C_1, C_1, C_0, lsr #31 |
| 123 | mov C_0, C_0, lsl #1 |
| 124 | b L_lsl_1 |
| 125 | #endif |
| 126 | L_subtract: |
| 127 | @ if A >= B |
| 128 | cmp A_1, B_1 |
| 129 | cmpeq A_0, B_0 |
| 130 | bcc L_update |
| 131 | @ A -= B |
| 132 | subs A_0, A_0, B_0 |
| 133 | sbc A_1, A_1, B_1 |
| 134 | @ D |= C |
| 135 | orr D_0, D_0, C_0 |
| 136 | orr D_1, D_1, C_1 |
| 137 | L_update: |
| 138 | @ if A == 0: break |
| 139 | orrs ip, A_1, A_0 |
| 140 | beq L_exit |
| 141 | @ C >>= 1 |
| 142 | movs C_1, C_1, lsr #1 |
| 143 | movs C_0, C_0, rrx |
| 144 | @ if C == 0: break |
| 145 | orrs ip, C_1, C_0 |
| 146 | beq L_exit |
| 147 | @ B >>= 1 |
| 148 | movs B_1, B_1, lsr #1 |
| 149 | mov B_0, B_0, rrx |
| 150 | b L_subtract |
| 151 | L_exit: |
| 152 | @ Note: A, B & Q, R are aliases |
| 153 | mov R_0, A_0 |
| 154 | mov R_1, A_1 |
| 155 | mov Q_0, D_0 |
| 156 | mov Q_1, D_1 |
| 157 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 158 | |
| 159 | L_div_32_32: |
| 160 | @ Note: A_0 & r0 are aliases |
| 161 | @ Q_1 r1 |
| 162 | mov r1, B_0 |
| 163 | bl __aeabi_uidivmod |
| 164 | mov R_0, r1 |
| 165 | mov R_1, #0 |
| 166 | mov Q_1, #0 |
| 167 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 168 | |
| 169 | L_pow2: |
| 170 | #ifdef HAVE_CLZ |
| 171 | @ Note: A, B and Q, R are aliases |
| 172 | @ R = A & (B - 1) |
| 173 | and C_0, A_0, C_0 |
| 174 | and C_1, A_1, C_1 |
| 175 | @ Q = A >> log2(B) |
| 176 | @ Note: B must not be 0 here! |
| 177 | clz D_0, B_0 |
| 178 | add D_1, D_0, #1 |
| 179 | rsbs D_0, D_0, #31 |
| 180 | bpl L_1 |
| 181 | clz D_0, B_1 |
| 182 | rsb D_0, D_0, #31 |
| 183 | mov A_0, A_1, lsr D_0 |
| 184 | add D_0, D_0, #32 |
| 185 | L_1: |
| 186 | movpl A_0, A_0, lsr D_0 |
| 187 | ARM( orrpl A_0, A_0, A_1, lsl D_1 ) |
| 188 | THUMB( lslpl TMP, A_1, D_1 ) |
| 189 | THUMB( orrpl A_0, A_0, TMP ) |
| 190 | mov A_1, A_1, lsr D_0 |
| 191 | @ Mov back C to R |
| 192 | mov R_0, C_0 |
| 193 | mov R_1, C_1 |
| 194 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 195 | #else |
| 196 | @ Note: A, B and Q, R are aliases |
| 197 | @ R = A & (B - 1) |
| 198 | and C_0, A_0, C_0 |
| 199 | and C_1, A_1, C_1 |
| 200 | @ Q = A >> log2(B) |
| 201 | @ Note: B must not be 0 here! |
| 202 | @ Count the leading zeroes in B. |
| 203 | mov D_0, #0 |
| 204 | orrs B_0, B_0, B_0 |
| 205 | @ If B is greater than 1 << 31, divide A and B by 1 << 32. |
| 206 | moveq A_0, A_1 |
| 207 | moveq A_1, #0 |
| 208 | moveq B_0, B_1 |
| 209 | @ Count the remaining leading zeroes in B. |
| 210 | movs B_1, B_0, lsl #16 |
| 211 | addeq D_0, #16 |
| 212 | moveq B_0, B_0, lsr #16 |
| 213 | tst B_0, #0xff |
| 214 | addeq D_0, #8 |
| 215 | moveq B_0, B_0, lsr #8 |
| 216 | tst B_0, #0xf |
| 217 | addeq D_0, #4 |
| 218 | moveq B_0, B_0, lsr #4 |
| 219 | tst B_0, #0x3 |
| 220 | addeq D_0, #2 |
| 221 | moveq B_0, B_0, lsr #2 |
| 222 | tst B_0, #0x1 |
| 223 | addeq D_0, #1 |
| 224 | @ Shift A to the right by the appropriate amount. |
| 225 | rsb D_1, D_0, #32 |
| 226 | mov Q_0, A_0, lsr D_0 |
Marek Vasut | e64d759 | 2016-05-26 18:01:41 +0200 | [diff] [blame] | 227 | ARM( orr Q_0, Q_0, A_1, lsl D_1 ) |
| 228 | THUMB( lsl A_1, D_1 ) |
| 229 | THUMB( orr Q_0, A_1 ) |
Simon Glass | 9ab6049 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 230 | mov Q_1, A_1, lsr D_0 |
| 231 | @ Move C to R |
| 232 | mov R_0, C_0 |
| 233 | mov R_1, C_1 |
| 234 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
| 235 | #endif |
| 236 | |
| 237 | L_div_by_0: |
| 238 | bl __div0 |
| 239 | @ As wrong as it could be |
| 240 | mov Q_0, #0 |
| 241 | mov Q_1, #0 |
| 242 | mov R_0, #0 |
| 243 | mov R_1, #0 |
| 244 | ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} |
Simon Glass | 9ab6049 | 2016-03-16 07:44:34 -0600 | [diff] [blame] | 245 | ENDPROC(__aeabi_uldivmod) |
Stephen Warren | b2f1858 | 2016-06-03 13:05:11 -0600 | [diff] [blame] | 246 | .popsection |