
; DIVISION ROUTINES with scaled reciprocals for constants
; (all functions optimized for speed, ~ <36 cycles w/o push/pop)
; Target: AVR MCUs with hardware multiplier ("mul" instruction)
; Author: Andreas Lenze (andreas.lenze@tonline.de)
; Feb. 2003
;div. by n: n: scaled reciprocal: shift count:
;Div16_3 3 1010101010101011 AAAB 17
;Div16_5 5 1100110011001101 CCCD 18
;Div16_6 6 1010101010101011 AAAB 18
;Div16_7 7 10010010010010011 19 > 17 bits req'd,(MSB=1,rest 2493h)
;Div16_7a 7 1001001001001001 9249 18 > needs correction for accurate result
;Div16_9 9 1110001110001111 E38F 19
;Div16_10 10 1100110011001101 CCCD 19
;Div16_11 11 1011101000101111 BA2F 19
;Div16_12 12 1010101010101011 AAAB 19
;Div16_13 13 1001110110001010 9D8A 19
;Div16_14 14 10010010010010011 20 > 17 bits req'd,(MSB=1,rest 2493h)
;Div16_15 15 1000100010001001 8889 19
;Div16_17 17 1111000011110001 F0F1 20
;Div16_18 18 1110001110001111 E38F 20
;Div16_19 19 0110101111001011 6BCB 19
;Div16_20 20 1100110011001101 CCCD 20
;Div16_21 21 1100001100001011 C30B 20 > needs correction for accurate result
;Div16_22 22 1011101000101111 BA2F 20
;Div16_23 23 1011001000010101 B215 20 > needs correction for accurate result
;D16_nn(by) 223 > "Cstyle" function with macro 'Div16by' to perform a
; constants division with all divisors from 2 to 23. Price
; tag for the comfort is ~50 cycles / ~50 words overhead
; NOTE: Other divisor constants like /24 etc. can easily be created by
; modifying the shift count in "Q = Q >> x": add 1 shift right for
; 'divisor x 2' (e.g. for "/24" we need a total of 20 instead of
; the 19 shifts needed for "/12")
; If the remainder of the division is not needed, the multiply/subtract
; operation after the comment
;
; r19:r18 now "Q" (= result >> xx)
; R = A  xx*Q (start removal
; ldi r16,xx
; ....
; sbc XH,YH (end removal)
;
; may be omitted to save another 9 cycles / 7 words
; (* not applicable for 'Div16_7a'  remainder is always needed *)
;
; "Div16_7a" and "Div16_21/3" demonstrate and use the 'approximate and correct'
; technique which may be necessary for some larger divisors (e.g. /21, /23)
;
;***************************************************************************
;*
;* Function "Div16_3"
;* Divides an unsigned 16 bit word (XH:XL) by 3
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xAAAB;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xAAAB) >> 17)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 10 words)
;* cycles: 48 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_3:
push r2
push r19
push r18
push r17
ldi YH,0xAA ; scaled reciprocal for /3
ldi YL,0xAB
; Q = A * 0xAAAB
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 1
lsr r19 ; do the last shift
ror r18
; r19:r18 now "Q" (= result >> 17)
; R = A  3*Q;
ldi r17,3 ; multiply r19:r18 by 3
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_3 ****
;***************************************************************************
;*
;* Function "Div16_5"
;* Divides an unsigned 16 bit word (XH:XL) by 5
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xCCCD;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xCCCD) >> 18)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 38 (w. push/pop = 10 words)
;* cycles: 54 (w. push/pop = 20 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_5:
push r2
push r19
push r18
push r17
ldi YH,0xCC ; scaled reciprocal for /5
ldi YL,0xCD
; Q = A * 0xCCCD
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 2
lsr r19 ; do the last 2 shifts
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 18)
; R = A  5*Q;
ldi r17,5 ; multiply r19:r18 by 5
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_5 ****
;***************************************************************************
;*
;* Function "Div16_6"
;* Divides an unsigned 16 bit word (XH:XL) by 6
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xAAAB;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xAAAB) >> 18)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 38 (w. push/pop = 10 words)
;* cycles: 54 (w. push/pop = 20 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_6:
push r2
push r19
push r18
push r17
ldi YH,0xAA ; scaled reciprocal for /6
ldi YL,0xAB
; Q = A * 0xAAAB
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 2
lsr r19 ; do the last 2 shifts
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 18)
; R = A  6*Q;
ldi r17,6 ; multiply r19:r18 by 6
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_6 ****
;***************************************************************************
;*
;* Function "Div16_7"
;* Divides an unsigned 16 bit word (XH:XL) by 7
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0x2493;
;* unsigned int Q; /* the quotient */
;*
;* Q = (((A * 0x2493) >> 16) + A) >> 3 > 17 bits reciprocal!
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 38 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_7:
push r2
push r19
push r18
push r17
ldi YH,0x24 ; scaled reciprocal for /7
ldi YL,0x93
; Q = A * 0x2493
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q + A
add r18,XL
adc r19,XH
; Q = Q >> 3
ror r19 ; do the last 3 shifts, including
ror r18 ; carry (!) from previous addition
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 19)
; R = A  7*Q;
ldi r17,7 ; multiply r19:r18 by 7
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_7 ****
;***************************************************************************
;*
;* Function "Div16_7a"
;* Divides an unsigned 16 bit word (XH:XL) by 7
;* Call with 16 bit number in XH:XL
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* (Equations partly by D. W. Jones)
;*
;* Reciprocal multiplication w. extra precision:
;* (This version uses correction to achieve the required precision)
;* unsigned int R; /* remainder */
;* unsigned int long A; /* dividend */
;* unsigned int long Q; /* quotient */
;*
;* Q = ((A * 0x9249) >> 18)
;*
;* /* Q = A/7 or Q+1 = A/7 for all A <= 65535 */
;* /* correct Q and calculate remainder */
;* R = A  7*Q
;* if (R >= 7) {
;* R = R  7;
;* Q = Q + 1;
;* }
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 59 (w. push/pop = 20 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_7a:
push r2
push r19
push r18 ; Tmp3
push r17 ; Tmp2
ldi YH,0x92 ; scaled reciprocal for /7
ldi YL,0x49 ; (16 bit only, 0/1 error possible)
; Q = A * 0x9249
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 2
lsr r19 ; 2 shifts remaining
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 18)
; R = A  7*Q;
ldi r17,7 ; multiply r19:r18 by 7
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of current Q
; XH:XL now "R":
; if (R >= 7)
; R = R  7;
; Q = Q + 1;
cpi XL,0x07
brlo PC+3
subi XL,7
adiw YL,1
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_7a ****
;***************************************************************************
;*
;* Function "Div16_9"
;* Divides an unsigned 16 bit word (XH:XL) by 9
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xE38F;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xE38F) >> 19)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_9:
push r2
push r19
push r18
push r17
ldi YH,0xE3 ; scaled reciprocal for /9
ldi YL,0x8F
; Q = A * 0xE38F
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 3
lsr r19 ; do the last 3 shifts
ror r18
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 19)
; R = A  9*Q;
ldi r17,9 ; multiply r19:r18 by 9
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_9 ****
;***************************************************************************
;*
;* Function "Div16_10"
;* Divides an unsigned 16 bit word (XH:XL) by 10
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xCCCD;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xCCCD) >> 19)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_10:
push r2
push r19
push r18
push r17
ldi YH,0xCC ; scaled reciprocal for /10
ldi YL,0xCD
; Q = A * 0xCCCD
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 3
lsr r19 ; do the last 3 shifts
ror r18
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 19)
; R = A  10*Q;
ldi r17,10 ; multiply r19:r18 by 10
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_10 ****
;***************************************************************************
;*
;* Function "Div16_11"
;* Divides an unsigned 16 bit word (XH:XL) by 11
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xBA2F;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xBA2F) >> 19)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_11:
push r2
push r19
push r18
push r17
ldi YH,0xBA ; scaled reciprocal for /11
ldi YL,0x2F
; Q = A * 0xBA2F
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 3
lsr r19 ; do the last 3 shifts
ror r18
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 19)
; R = A  11*Q;
ldi r17,11 ; multiply r19:r18 by 11
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_11 ****
;***************************************************************************
;*
;* Function "Div16_12"
;* Divides an unsigned 16 bit word (XH:XL) by 12
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xAAAB;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xAAAB) >> 19)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_12:
push r2
push r19
push r18
push r17
ldi YH,0xAA ; scaled reciprocal for /12
ldi YL,0xAB
; Q = A * 0xAAAB
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 3
lsr r19 ; do the last 3 shifts
ror r18
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 19)
; R = A  12*Q;
ldi r17,12 ; multiply r19:r18 by 12
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_12 ****
;***************************************************************************
;*
;* Function "Div16_13"
;* Divides an unsigned 16 bit word (XH:XL) by 13
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0x9D89;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0x9D8A) >> 19)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_13:
push r2
push r19
push r18
push r17
ldi YH,0x9D ; scaled reciprocal for /13
ldi YL,0x8A
; Q = A * 0x9D8A
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 3
lsr r19 ; do the last 3 shifts
ror r18
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 19)
; R = A  13*Q;
ldi r17,13 ; multiply r19:r18 by 13
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_13 ****
;***************************************************************************
;*
;* Function "Div16_14"
;* Divides an unsigned 16 bit word (XH:XL) by 14
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0x2493;
;* unsigned int Q; /* the quotient */
;*
;* Q = (((A * 0x2493) >> 16) + A) >> 4 > 17 bits reciprocal!
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 40 (w. push/pop = 8 words)
;* cycles: 44 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_14:
push r2
push r19
push r18
push r17
ldi YH,0x24 ; scaled reciprocal for /7, /14
ldi YL,0x93
; Q = A * 0x2493
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q + A
add r18,XL
adc r19,XH
; Q = Q >> 4
ror r19 ; do the last 4 shifts, including
ror r18 ; carry (!) from previous addition
lsr r19
ror r18
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 20)
; R = A  14*Q;
ldi r17,14 ; multiply r19:r18 by 14
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_14 ****
;***************************************************************************
;*
;* Function "Div16_15"
;* Divides an unsigned 16 bit word (XH:XL) by 15
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0x8889;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0x8889) >> 19)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_15:
push r2
push r19
push r18
push r17
ldi YH,0x88 ; scaled reciprocal for /15
ldi YL,0x89
; Q = A * 0x8889
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 3
lsr r19 ; do the last 3 shifts
ror r18
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 19)
; R = A  15*Q;
ldi r17,15 ; multiply r19:r18 by 15
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_15 ****
;***************************************************************************
;*
;* Function "Div16_17"
;* Divides an unsigned 16 bit word (XH:XL) by 17
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xF0F1;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xF0F1) >> 20)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 38 (w. push/pop = 10 words)
;* cycles: 44 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_17:
push r2
push r19
push r18
push r17
ldi YH,0xF0 ; scaled reciprocal for /17
ldi YL,0xF1
; Q = A * 0xF0F1
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 4
swap r18 ; do the last 4 shifts
swap r19
andi r18,0x0F
eor r18,r19
andi r19,0x0F
eor r18,r19
; r19:r18 now "Q" (= result >> 20)
; R = A  17*Q;
ldi r17,17 ; multiply r19:r18 by 17
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_17 ****
;***************************************************************************
;*
;* Function "Div16_18"
;* Divides an unsigned 16 bit word (XH:XL) by 18
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xE38F;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xE38F) >> 20)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 38 (w. push/pop = 10 words)
;* cycles: 44 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_18:
push r2
push r19
push r18
push r17
ldi YH,0xE3 ; scaled reciprocal for /9 /18
ldi YL,0x8F
; Q = A * 0xE38F
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 4
swap r18 ; do the last 4 shifts
swap r19
andi r18,0x0F
eor r18,r19
andi r19,0x0F
eor r18,r19
; r19:r18 now "Q" (= result >> 20)
; R = A  18*Q;
ldi r17,18 ; multiply r19:r18 by 18
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_18 ****
;***************************************************************************
;*
;* Function "Div16_19"
;* Divides an unsigned 16 bit word (XH:XL) by 19
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0x6BCA;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0x6BCB) >> 19)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_19:
push r2
push r19
push r18
push r17
ldi YH,0x6B ; scaled reciprocal for /19
ldi YL,0xCB
; Q = A * 0x6BCB
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 3
lsr r19 ; do the last 3 shifts
ror r18
lsr r19
ror r18
lsr r19
ror r18
; r19:r18 now "Q" (= result >> 19)
; R = A  19*Q;
ldi r17,18 ; multiply r19:r18 by 18
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_19 ****
;***************************************************************************
;*
;* Function "Div16_20"
;* Divides an unsigned 16 bit word (XH:XL) by 20
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xCCCD;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xCCCD) >> 20)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_20:
push r2
push r19
push r18
push r17
ldi YH,0xCC ; scaled reciprocal for /10, /20
ldi YL,0xCD
; Q = A * 0xCCCD
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 4
swap r18 ; do the last 4 shifts
swap r19
andi r18,0x0F
eor r18,r19
andi r19,0x0F
eor r18,r19
; r19:r18 now "Q" (= result >> 20)
; R = A  20*Q;
ldi r17,20 ; multiply r19:r18 by 20
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_20 ****
;***************************************************************************
;*
;* Function "Div16_21"
;* Divides an unsigned 16 bit word (XH:XL) by 21
;* Call with 16 bit number in XH:XL
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* (Equations partly by D. W. Jones)
;*
;* Reciprocal multiplication w. extra precision:
;* (uses correction to achieve the required precision)
;* unsigned int R; /* remainder */
;* unsigned int long A; /* dividend */
;* unsigned int long Q; /* quotient */
;*
;* Q = ((A * 0xC30B) >> 20)
;*
;* /* Q = A/21 or Q+1 = A/21 for all A <= 65535 */
;* /* correct Q and calculate remainder */
;* R = A  21*Q
;* if (R >= 21) {
;* R = R  21;
;* Q = Q + 1;
;* }
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 40 (w. push/pop = 8 words)
;* cycles: 52 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_21:
push r2
push r19
push r18
push r17
ldi YH,0xC3 ; scaled reciprocal for /21
ldi YL,0x0B ; (16 bit only, 0/1 error possible)
; Q = A * 0xC30B
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 4
swap r18 ; do the last 4 shifts
swap r19
andi r18,0x0F
eor r18,r19
andi r19,0x0F
eor r18,r19
; r19:r18 now "Q" (= result >> 20)
; R = A  21*Q;
ldi r17,21 ; multiply r19:r18 by 21
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of current Q
; XH:XL now "R":
; if (R >= 21)
; R = R  21;
; Q = Q + 1;
cpi XL,0x15
brlo PC+3
subi XL,21
adiw YL,1
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_21 ****
;***************************************************************************
;*
;* Function "Div16_22"
;* Divides an unsigned 16 bit word (XH:XL) by 22
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Equations by D: W. Jones:
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = 0xBA2F;
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * 0xBA2F) >> 20)
;*
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 46 (w. push/pop = 16 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_22:
push r2
push r19
push r18
push r17
ldi YH,0xBA ; scaled reciprocal for /11, /22
ldi YL,0x2F
; Q = A * 0xBA2F
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 4
swap r18 ; do the last 4 shifts
swap r19
andi r18,0x0F
eor r18,r19
andi r19,0x0F
eor r18,r19
; r19:r18 now "Q" (= result >> 20)
; R = A  22*Q;
ldi r17,22 ; multiply r19:r18 by 22
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_22 ****
;***************************************************************************
;*
;* Function "Div16_23"
;* Divides an unsigned 16 bit word (XH:XL) by 23
;* Call with 16 bit number in XH:XL
;* Returns quotient in YH:YL and remainder in XL
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* (Equations partly by D. W. Jones)
;*
;* Reciprocal multiplication w. extra precision:
;* (uses correction to achieve the required precision)
;* unsigned int R; /* remainder */
;* unsigned int long A; /* dividend */
;* unsigned int long Q; /* quotient */
;*
;* Q = ((A * 0xB215) >> 20)
;*
;* /* Q = A/23 or Q+1 = A/23 for all A <= 65535 */
;* /* correct Q and calculate remainder */
;* R = A  1*Q
;* if (R >= 23) {
;* R = R  23;
;* Q = Q + 1;
;* }
;* Uses: high regs: 7 (r17, r18, r19, X, Y)
;* low regs: 3 (r0, r1, r2)
;*
;* words: 36 (w. push/pop = 8 words)
;* cycles: 59 (w. push/pop = 20 cycles)
;*
;* Note: Hardware multiplier required ("mul" instruction)
;*
;***************************************************************************
Div16_23:
push r2
push r19
push r18
push r17
ldi YH,0xB2 ; scaled reciprocal for /23
ldi YL,0x15 ; (16 bit only, 0/1 error possible)
; Q = A * 0xB215
; (r19:r18:r17[:rXX] = XH:XL * YH:YL)
clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to [rXX] is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; Q = Q >> 4
swap r18 ; do the last 4 shifts
swap r19
andi r18,0x0F
eor r18,r19
andi r19,0x0F
eor r18,r19
; r19:r18 now "Q" (= result >> 20)
; R = A  23*Q;
ldi r17,23 ; multiply r19:r18 by 23
mul r18, r17 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of currentQ
; XH:XL now "R":
; if (R >= 23)
; R = R  23;
; Q = Q + 1;
cpi XL,0x17
brlo PC+3
subi XL,23
adiw YL,1
; XL holds "R"
; YH:YL holds "Q"
pop r17
pop r18
pop r19
pop r2
ret
;**** End of function Div16_23 ****
;***************************************************************************
; macro definition to call/use "division by xx"  module (D16_nn)
macro Div16by
push r20
ldi r20,@0
call D16_nn
pop r20
.endm
;***************************************************************************
;***************************************************************************
;*
;* Function "D16_nn"
;* Divides an unsigned 16 bit word by [2] > [23]
;* Note: divisor 2, 4, 8, 16 options are provided for remainder calculation
;* and (for easeofuse) to cover the complete divisors range (223)
;*
;* Call with dividend loaded to XH:XL (high/low bytes)
;* Returns quotient in YH:YL and remainder in XL
;*
;* Usage: define the macro "Div16by" prior to using the function,
;* use macro with divisor as parameter, e.g. "Div16by 17" to
;* divide XH:XL by 17 decimal
;*
;* .macro Div16_by
;* push r20
;* ldi r20,@0
;* call D16_nn
;* pop r20
;* .endm
;*
;* Author: Andreas Lenze (andreas.lenze@tonline.de)
;* Feb. 2003
;* Equations mostly by D. W. Jones
;*
;* Reciprocal mul w. extra precision:
;* unsigned int A;
;* unsigned int scaled_reciprocal = xxxx
;* unsigned int Q; /* the quotient */
;*
;* Q = ((A * scaled_reciprocal) >> 16) >> nn
;* or
;* Q = (((A * scaled_reciprocal) >> 16) + A) >> nn > for /7 and /14
;*
;* /* special case: use correction for Q (e.g. for /21, /23) */
;* if (R >= divisor)
;* R = R  divisor;
;* Q = Q + 1;
;*
;* div. by n: scaled reciprocal: shift count:
;*
;* 2  1
;* 3 1010101010101011 AAAB 17
;* 4  2
;* 5 1100110011001101 CCCD 18
;* 6 1010101010101011 AAAB 18
;* 7 10010010010010011 19 > 17 bits req'd,(MSB=1,rest 2493h)
;* 8  3
;* 9 1110001110001111 E38F 19
;* 10 1100110011001101 CCCD 19
;* 11 1011101000101111 BA2F 19
;* 12 1010101010101011 AAAB 19
;* 13 1001110110001010 9D8A 19
;* 14 10010010010010011 20 > 17 bits req'd,(MSB=1,rest 2493h)
;* 15 1000100010001001 8889 19
;* 16  4
;* 17 1111000011110001 F0F1 20
;* 18 1110001110001111 E38F 20
;* 19 0110101111001011 6BCB 19
;* 20 1100110011001101 CCCD 20
;* 21 1100001100001011 C30B 20 > needs correction for accurate result
;* 22 1011101000101111 BA2F 20
;* 23 1011001000010101 B215 20 > needs correction for accurate result
;*
;* Uses: high regs: 11 (r16, r17, r18, r19, r20, X, Y, Z)
;* low regs: 3 (r0, r1, r2)
;* regs 1620 saved, all others destroyed
;* Tflag destroyed (cleared)
;*
;* words: 97 (incl. 5 words for macro)
;* cycles: 85111 (w. call/ret & macro), typically 102
;* table bytes: 66
;*
;* Target: AVR MCUs with hardware multiplier ("mul" instruction and
;* "lpm rd,Z/Z+" functionality required)
;*
;***************************************************************************
D16_nT:
; lookup table for D16_nn: 3 bytes per entry, range for divisor "2" to "23"
; data format: 2 bytes scaled reciprocal (word, high/low), 3rd byte "flags"
.cseg
.db 0x00, 0xFF, 0x01, 0xAA, 0xAB, 0x01, 0x00, 0xFF, 0x02, 0xCC, 0xCD, 0x02, 0xAA, 0xAB
;by /2 /3 /4 /5 /6
.db 0x02, 0x24, 0x93, 0x13, 0x00, 0xFF, 0x03, 0xE3, 0x8F, 0x03, 0xCC, 0xCD, 0x03, 0xBA
;by /7 /8 /9 /10 /11
.db 0x2F, 0x03, 0xAA, 0xAB, 0x03, 0x9D, 0x8A, 0x03, 0x24, 0x93, 0x14, 0x88, 0x89, 0x03
;by /12 /13 /14 /15
.db 0x00, 0xFF, 0x04, 0xF0, 0xF1, 0x04, 0xE3, 0x8F, 0x04, 0x6B, 0xCB, 0x03, 0xCC, 0xCD
;by /16 /17 /18 /19 /20
.db 0x04, 0xC3, 0x0B, 0x04, 0xBA, 0x2F, 0x04, 0xB2, 0x15, 0x04
;by /21 /22 /23
D16_nn:
push r19 ; save scrap regs
push r18
push r17
push r16
clt ; make sure Tflag is cleared
push r20 ; save divisor
cpi r20,24 ; exit if divisor > 23
brsh D16_0
subi r20,2
brmi D16_0 ; exit if divisor <= 1
ldi ZH,high(D16_nT*2)
ldi ZL,low(D16_nT*2)
clr r2
mov r1,r20
lsl r20 ; x2 (3 bytes per entry)
add r20,r1 ; + org value = x3
add ZL,r20 ; point Z to divisor's data table position
adc ZH,r2
lpm YH,Z+ ; scaled reciprocal for /xx
lpm YL,Z+ ; Z now points at flags
cpi YL,0xFF ; low byte "FFh" in the scaled_reciprocal data
; indicates divisor 2, 4, 8, 16 > no 'mul' req.
brne D16_1 ; != FFh > mul required
mov r19,XH ; FFh: no div., shifts only: move input
mov r18,XL ; to "result registers" and go to shifts
rjmp D16_2 ; directly (saves ~18 cycles)
D16_0: pop r20 ; clean up stack before exit
rjmp D16_Err ; intermediate label (avoid "out of reach" for brxx)
; Q = A * scaled_reciprocal
; (r19:r18:r17:r16 = XH:XL * YH:YL)
D16_1: clr r2
mul XH, YH ; ah * bh
movw r19:r18, r1:r0
mul XL, YL ; al * bl
mov r17,r1 ; r0 to r16 is superfluous
mul XH, YL ; ah * bl
add r17, r0
adc r18, r1
adc r19, r2
mul YH, XL ; bh * al
add r17, r0
adc r18, r1
adc r19, r2
; Q = Q >> 16: use r19:r18 as word
; do the remaining shifts
D16_2: lpm r20,Z ; fetch "flag"
cpi r20,3 ; flag = 3 > 3 normal shifts required
breq D16_6 ; (div. 8, 9, 10, 11, 12, 13, 15, 19)
cpi r20,4 ; flag = 4 > 4 normal shifts required
breq D16_3 ; (div. 16, 17, 18, 20, 21, 22, 23)
cpi r20,2 ; flag = 2 > 2 normal shifts required
breq D16_7 ; (div. 4, 5, 6)
cpi r20,1 ; flag = 1 > 1 normal shift required
breq D16_8 ; (div. 2, 3)
cpi r20,0x14 ; flag = 4spec > 4 special shifts required
breq D16_5 ; (div. 14, extra addition, 1st shift w. carry)
cpi r20,0x13 ; flag = 3spec > 3 special shifts required
breq D16_4 ; (div. 7, extra addition, 1st shift w. carry)
rjmp D16_Err ; no valid flags, exit
; Q = Q >> 4
D16_3: swap r18 ; 4 normal shifts
swap r19
andi r18,0x0F
eor r18,r19
andi r19,0x0F
eor r18,r19
rjmp D16_9
; Q = (Q + A) >> 34
D16_4: set
D16_5: add r18,XL ; (Q + A)
adc r19,XH
ror r19 ; 34 "special" shifts, include
ror r18 ; carry from previous addition into 1st shift
D16_6: lsr r19
ror r18
D16_7: lsr r19
ror r18
brts D16_9 ; if Tflag set, skip this shift
D16_8: lsr r19
ror r18
; r19:r18 now "Q" (= result >> yy)
; R = A  xx*Q;
D16_9: pop r16 ; multiply r19:r18 by divisor
mul r18, r16 ; al * bl
sub XL,r0
clr XH
movw YL,r18 ; make copy of Q
; XL = "R" (remainder)
; /* use correction  can be omitted if /21, /23 are not used */
; if (R >= divisor)
; R = R  divisor;
; Q = Q + 1;
cp XL,r16
brlo PC+3
sub XL,r16
adiw YL,1
; YH:YL = "Q" (quotient, result)
D16_Err:clt ; make sure Tflag is cleared
pop r16 ; restore regs
pop r17
pop r18
pop r19
ret
;**** End of function D16_nn ****

