|
This is an attempt to provide some FFT-math to the AVR community. Code has been tested with AVR-Studio4.
Works fine! FFT-Output is about 7 bits accurate.
******************
Known limitations and problems:
The code assumes input Data to be in the range -127($81)...+127($FF).
Input-data is organized in SRAM as
Data1LowByte,Data1HighByte,
Data2LowByte,Data2HighByte,
Data3LowByte,Data3HighByte,....
The FFT uses "in-place" operations and produces the following output:
Re(f0)LowByte,Re(f0HighByte),
Im(f0)LowByte,Im(f0HighByte),
Re(f1)LowByte,Re(f1HighByte),
Im(f1)LowByte,Im(f1HighByte),....
Thus for n=128 data points (at the moment this is the maximum), we get 64 frequency Data points.
CPU time for different FFT-lengths:
n=32 T=4,05ms
n=64 T=9,3ms
n=128 T=20,95ms
**************************
Things to do:
1) I hope to find some inspiration on the scaling issue (to allow greater input range).
2) The loop-counters are still 8bit only, so these would need to be 16bit to get longer FFTs working.
At the moment 64 frequency bins represent maximum resolution.
3) Include amplitude calculation, because most people just want to know amplitude vs. frequency and nothing more.
;*************************************************************
;* FFT-Programm für bis zu 128 Datenpunkte *
;* reelle Eingangsdaten werden "in place" transformiert *
;* 128 Datenpunkte ergeben 64 Real/Imaginär-Datenpaare *
;* Programmiert von niels laugwitz für avr-Mikrocontroller *
;*************************************************************
;
;.device ATmega8
.include "m8def.inc" ;Lade die Registerdefinitionen für AT-Mega8
.DEF temprL = r2
.def temprH = r3
.DEF tempiL = r4
.def tempiH = r5
.DEF i1 = r2 ;Benutzt in for_Schleife_i
.def i2 = r3 ;Benutzt in for_Schleife_i
.DEF i3 = r4 ;Benutzt in for_Schleife_i
.def i4 = r5 ;Benutzt in for_Schleife_i
.def sortierzeiger=r0;
.def nullwert=r6
.def jjende=r7
.def data1 = R8
.def data2 =R9
.def data3 =R10
.def data4 =R11
.def wrL=r12
.def wrH=r13
.def wiL=r14
.def wiH=r15
.DEF temp =r16
.DEF ii =r17
.DEF jj =r18
.DEF mmax =r19
.DEF j =r20
.DEF istep=r21
.def i =r22
.DEF m =r23
.def zaehler=r24
.DEF tmp_lo =r18 ;Benutzt in for_Schleife_i
.DEF tmp_hi =r19 ;Benutzt in for_Schleife_i
.set nn2 =128 ;Anzahl der Datenpunkte (FFT-Länge)
;8 =< nn2 =< 128
.equ h1r_lo =$60
.equ h1r_hi =$61
.equ h1i_lo =$62
.equ h1i_hi =$63
.equ h2r_lo =$64
.equ h2r_hi =$65
.equ h2i_lo =$66
.equ h2i_hi =$67
.equ sramstart = $0080
.org $0000
rjmp main ;reset
.include "avr201.asm"
schleife_jj_body:
;Berechnung der Indexe i und j:
mov i,m
mul jj,istep ;r0 := jj*istep
mov i,r0
add i,m ;i:=m+jj*istep
mov temp,i
add temp,mmax
mov j,temp ;j:=i+mmax
;Hole data[j] und data[j+1] aus dem SRAM
ldi XH,0
ldi XL,sramstart
add XL,j
adc XH,nullwert ;da das Array Data als 16bit-Werte abgespeichert ist,
add XL,j ;muss der Index verdoppelt werden.
adc XH,nullwert
ld data1,X+ ;hole Data[j]low-byte
ld data2,X+ ;hole Data[j]High-byte
ld data3,X+ ;Hole Data[j+1]low-Byte
ld data4,X ;hole Data[j+1]High-Byte
;Berechne tempr und tempi mehrere Multiplikationen erfordern
;etwas platz in den oberen registern. Im Moment brauchen wir die
; nicht: Also ab auf den Stack!
push r23
push r22
push r21
push r20
push r19
push r18
push r17
push r16
;Berechne tempr
movw r21:r20,wrh:wrl
movw r23:r22,data2:data1
rcall muls16x16_32 ;tempr:=wr*data[j] (16bit signed)
MOV temprH,r18 ;we take just the middle-result-bytes
MOV temprL,r17
movw r21:r20,wih:wil
movw r23:r22,data4:data3
rcall muls16x16_32 ;tempi:=wi*data[j+1] (16bit signed)
MOV tempiH,r18 ;we take just the middle-result-bytes
MOV tempiL,r17
sub temprL,tempiL
sbc temprH,tempiH ;tempr := wr*data[j]-wi*data[j+1]
;Berechne tempi
push temprL
push temprH
movw r21:r20,wrh:wrl
movw r23:r22,data4:data3
rcall muls16x16_32 ;tempr:=wr*data[j+1] (16bit signed)
MOV temprH,r18 ;we take just the middle-result-bytes
MOV temprL,r17
movw r21:r20,wih:wil
movw r23:r22,data2:data1
rcall muls16x16_32 ;tempi:=wi*data[j] (16bit signed)
MOV tempiH,r18 ;we take just the middle-result-bytes
MOV tempiL,r17
add tempiL,temprL
adc tempiH,temprH ;tempi := wr*data[j+1]+wi*data[j]
pop temprH
pop temprL
pop r16
pop r17
pop r18
pop r19
pop r20
pop r21
pop r22
pop r23
;multiplikationen beendet, Stack und Register wieder in Ordnung
;Hole data[i] aus dem SRAM
ldi XH,0
ldi XL,sramstart
add XL,i
adc XH,nullwert
add XL,i
adc XH,nullwert
ld data3,X+ ;hole Data[i]Low-Byte
ld data4,X ;Hole Data[i]high-Byte
;data[j]:=data[i]-tempr
movw data2:data1, data4:data3
sub data1,temprL
sbc data2,temprH
ldi XH,0
ldi XL,sramstart
add XL,j
adc XH,nullwert
add XL,j
adc XH,nullwert
st X+,data1
st X,data2
;Hole data[i+1] aus dem SRAM
ldi XH,0
ldi XL,sramstart
add XL,i
adc XH,nullwert
add XL,i
adc XH,nullwert
adiw XH:XL,2
ld data3,X+ ;hole Data[i+1]low-Byte
ld data4,X ;Hole Data[i+1]high-Byte
;data[j+1]:=data[i+1]-tempi
movw data2:data1, data4:data3
sub data1,tempiL
sbc data2,tempiH
ldi XH,0
ldi XL,sramstart
add XL,j
adc XH,nullwert
add XL,j
adc XH,nullwert
adiw XH:XL,2
st X+,data1
st X,data2
;data[i+1]:=data[i+1]+tempi
add data3,tempiL
adc data4,tempiH
ldi XH,0
ldi XL,sramstart
add XL,i
adc XH,nullwert
add XL,i
adc XH,nullwert
adiw XH:XL,2
st X+,data3
st X,data4
;Hole data[i] aus dem SRAM
ldi XH,0
ldi XL,sramstart
add XL,i
adc XH,nullwert
add XL,i
adc XH,nullwert
ld data3,X+ ;hole Data[i]low-Byte
ld data4,X ;Hole Data[i]high-Byte
;data[i]:=data[i]+tempr
add data3,temprL
adc data4,temprH
ldi XH,0
ldi XL,sramstart
add XL,i
adc XH,nullwert
add XL,i
adc XH,nullwert
st X+,data3
st X,data4
ret ;Rücksprung zur Schleife_jj
;****************************
;****** HAUPTPROGRAMM *******
;****************************
main:
;initialisieren des Stacks
ldi r16,high(RAMEND)
out spH,r16
ldi r16,low(RAMEND)
out spl,r16
sei
;Kopieren der Test-Daten ins SRAM
;Testdaten werden als 16-bit-signed abgespeichert.
;Die FFT ergibt ebenfalls 16-bit Daten,
;davon sind aber nur ca. 7 bit relevant.
ldi ZH,high(2*testdata)
ldi ZL, low(2*testdata)
clr i
blockkopieren: ;Das Kopieren der Werte erspart mir eine
;butterfly-sort zu programmieren, simple stupid!
LPM temp,Z+ ;Lade Werte aus der Ursprungstabelle testdata
push ZH
push ZL
ldi ZH,high(2*speicherorte_128_werte)
ldi ZL, low(2*speicherorte_128_werte)
add ZL,i
adc ZH,nullwert
lpm sortierzeiger,Z
pop ZL
pop ZH
ldi XH,0
ldi XL,sramstart
add XL,sortierzeiger
adc XH,nullwert
add XL,sortierzeiger
adc XH,nullwert
sbrc temp,7 ;wenn temp negativ, dann
dec nullwert ;setze register nullwert ausnahmsweise auf $FF
st X+,temp ;speichere lower-Byte zuerst
st X,nullwert ;Highbyte (enthält zunächst nur das Vorzeichen)
;speichere alle Daten umsortiert im SRAM
clr nullwert ;setze Nullwert wieder auf Null!
inc i
cpi i,nn2
brne blockkopieren
;nun sind die daten für die FFT umsortiert und stehen im SRAM ab
;Adresse (sramstart+2)=$0082. Für Eigene Anwendungen sollten die Daten
;von Anfang an nach der Sortiertabelle "speicherorte_XX_werte" geordnet
;werden. Die Werte sind im SRAM abgelegt als 16 bit signed integer!
;dw = LOW:HIGH = value:sign
;****************************************
;hier beginnt die eigentliche FFT
ldi mmax,2
while_schleife_start:
cpi mmax,nn2 ;nn2=64
brlo while_schleife_body ;while nn2>mmax do while_schleife_body
rjmp while_schleife_ende ;ende der while-Schleife erreicht
while_schleife_body:
mov istep,mmax
lsl istep ;istep:=2*mmax
clr ii ;ii:=0
for_schleife_ii_start: ;for ii=1 to (mmax div 2)
inc ii
mov m,ii
lsl m ;m:=2*ii
dec m ;m:=2*ii-1
ldi temp,nn2 ;nn2=64
sub temp,m ;nn2-m
mov dd8u,temp
mov dv8u,istep
rcall div8u ;Divisionsroutine
;.def drem8u =r15 ;remainder
;.def dres8u =r24 ;result
;.def dd8u =r24 ;dividend
;.def dv8u =r25 ;divisor
mov jjende,dres8u ;jjende:=nn2-m div istep
clr jj ;jj:=0 Initialisierung für Schleife
;Nun holen wir die Cosinuswerte
ldi ZH,high(2*cosinetab)
ldi ZL, low(2*cosinetab)
ldi zaehler,nn2
mov temp,mmax
lsr temp
pp: lsr zaehler
lsr temp
brne pp
mov temp,ii
dec temp
mul temp,zaehler
add ZL,r0
adc ZH,r1
add ZL,r0 ;addition des pointers doppelt wegen
adc ZH,r1 ;2Byte pro Tabelleneintrag
lpm wrL,Z+ ;jetzt haben wir wrL(ow)
lpm wrH,Z ;jetzt haben wir wrH(igh)
;Nun holen wir die Sinuswerte
ldi ZH,high(2*sinetab)
ldi ZL, low(2*sinetab)
ldi zaehler,nn2
mov temp,mmax
lsr temp
pq: lsr zaehler
lsr temp
brne pq
mov temp,ii
dec temp
mul temp,zaehler
add ZL,r0
adc ZH,r1
add ZL,r0 ;Addition des Pointer-Werts doppelt wegen
adc ZH,r1 ;2Byte pro Tabelleneintrag
lpm wiL,Z+ ;jetzt haben wir wiL(ow)
lpm wiH,Z ;jetzt haben wir wiH(igh)
for_schleife_jj_start:
rcall Schleife_jj_body ;damit die Schleife mit einer branch-instruktion
;abgeschlossen werden kann, wird die eigentliche
;Abarbeitungsroutine mit rcall aufgerufen.
;(Problem ist die Sprungreichweite von brsh)
inc jj
cp jjende,jj
brsh for_schleife_jj_start
;ende der jj-Schleife
mov temp,mmax
lsr temp ;iiende:=mmax div 2
cp ii,temp
brlo for_schleife_ii_start
;hier ist das ende der for_schleife_ii
mov mmax,istep
rjmp while_schleife_start
While_schleife_ende:
; Die FFTransformation ist geschafft! Jetzt muß nur noch richtig
; sortiert werden, da Inputdaten reelle Werte waren.
ldi i,1 ;Initialisierung von i
for_schleife_i_anfang:
inc i ;for i:=2 to ((nn2 div 4)+1)
;Berechnung der Indizes i1..i4
mov i1,i
lsl i1
dec i1 ; i1:=i+i-1
mov i2,i1
inc i2 ; i2:= i1+1
ldi temp,nn2
sub temp,i2
subi temp,-3
mov i3,temp ; i3:=nn2-i2+3
mov i4,i3
inc i4 ; i4:=i3+1
;Nun holen wir die wr
ldi ZH,high(2*cosinetab)
ldi ZL, low(2*cosinetab)
mov temp,i
dec temp
lsl temp
add ZL,temp
adc ZH,nullwert
lpm wrL,Z+ ;jetzt haben wir wrL(ow)
lpm wrH,Z ;jetzt haben wir wrH(igh)
;Nun holen wir die wi
ldi ZH,high(2*sinetab)
ldi ZL, low(2*sinetab)
mov temp,i
dec temp
lsl temp
add ZL,temp
adc ZH,nullwert
lpm wiL,Z+ ;jetzt haben wir wiL(ow)
lpm wiH,Z ;jetzt haben wir wiH(igh)
rcall for_schleife_calculus ;auch hier Unterprogramm, damit die
;Schleife mit brlo enden kann.
;test ob FOR_Schleife schon fertig
ldi temp,nn2
lsr temp
lsr temp
inc temp
cp i,temp
brlo for_schleife_i_anfang
for_schleife_i_exit: ;Umsortieren ist geschafft.
nop
;here comes compensation for DC-Value.
; if DC is not important, then just skip the following routine
rcall DC_compensation
;***********************************************************
;now the fft is done.
;data contains real-part and imaginary-part as 16bit values
;arranged in pairs first real, then imaginary
;***********************************************************
ende: ;Das Werk ist vollbracht!
rjmp ende ;SCHLUSS! ENDE! AUS!
for_schleife_calculus:
;***** Berechnung von h1r und h2i
;hole Data[i1] in reg. data2:data1
ldi XH,0
ldi XL,sramstart
add XL,i1
adc XH,nullwert
add XL,i1
adc XH,nullwert
ld data1,X+ ;lower-Byte zuerst
ld data2,X ;Highbyte
;hole Data[i3] in register-pair data4:data3
ldi XH,0
ldi XL,sramstart
add XL,i3
adc XH,nullwert
add XL,i3
adc XH,nullwert
ld data3,X+ ;lower-Byte zuerst
ld data4,X ;Highbyte
asr data2
ror data1 ;data[i1]:=data[i1]/2
asr data4
ror data3 ;data[i3]:=data[i3]/2
movw tmp_hi:tmp_lo,data2:data1
add tmp_lo,data3
adc tmp_hi,data4 ;h1r:=data[i1]+data[i3]
sts h1r_lo,tmp_lo
sts h1r_hi,tmp_hi ;save h1r to sram-location
movw tmp_hi:tmp_lo,data2:data1
sub tmp_lo,data3
sbc tmp_hi,data4 ;h1r:=-1*(data[i1]-data[i3])
com tmp_lo
com tmp_hi
ldi temp,1
add tmp_lo,temp
adc tmp_hi,nullwert
sts h2i_lo,tmp_lo
sts h2i_hi,tmp_hi ;save h1r to sram-location
;***** Berechnung von h1i und h2r
;hole Data[i2] in reg. data2:data1
ldi XH,0
ldi XL,sramstart
add XL,i2
adc XH,nullwert
add XL,i2
adc XH,nullwert
ld data1,X+ ;lower-Byte zuerst
ld data2,X ;Highbyte
;hole Data[i4] in register-pair data4:data3
ldi XH,0
ldi XL,sramstart
add XL,i4
adc XH,nullwert
add XL,i4
adc XH,nullwert
ld data3,X+ ;lower-Byte zuerst
ld data4,X ;Highbyte
asr data2
ror data1 ;data[i2]:=data[i2]/2
asr data4
ror data3 ;data[i4]:=data[i4]/2
movw tmp_hi:tmp_lo,data2:data1
sub tmp_lo,data3
sbc tmp_hi,data4
sts h1i_lo,tmp_lo
sts h1i_hi,tmp_hi ;save h1i to sram-location
add data1,data3
adc data2,data4
sts h2r_lo,data1
sts h2r_hi,data2 ;save h2r to sram-location
push r23
push r22
push r21
push r20
push r19
push r18
push r17
push r16
; **** Data[i1]:=h1r+wr*h2r-wi*h2i
movw R21:R20,wrh:wrl
lds r23,h2r_hi
lds r22,h2r_lo
rcall muls16x16_32
mov data2,r18
mov data1,r17
movw R21:R20,wih:wil
lds r23,h2i_hi
lds r22,h2i_lo
rcall muls16x16_32
sub data1,r17
sbc data2,r18
lds r18,h1r_hi
lds r17,h1r_lo
add data1,r17
adc data2,r18
ldi XH,0
ldi XL,sramstart
add XL,i1
adc XH,nullwert
add XL,i1
adc XH,nullwert
st X+,data1
st X ,data2
; **** Data[i2]:=h1i+wr*h2i+wi*h2r
movw R21:R20,wrh:wrl
lds r23,h2i_hi
lds r22,h2i_lo
rcall muls16x16_32
mov data2,r18
mov data1,r17
movw R21:R20,wih:wil
lds r23,h2r_hi
lds r22,h2r_lo
rcall muls16x16_32
add data1,r17
adc data2,r18
lds r18,h1i_hi
lds r17,h1i_lo
add data1,r17
adc data2,r18
ldi XH,0
ldi XL,sramstart
add XL,i2
adc XH,nullwert
add XL,i2
adc XH,nullwert
st X+,data1
st X ,data2
; **** Data[i3]:=h1r-wr*h2r+wi*h2i
movw R21:R20,wih:wil
lds r23,h2i_hi
lds r22,h2i_lo
rcall muls16x16_32
mov data2,r18
mov data1,r17
movw R21:R20,wrh:wrl
lds r23,h2r_hi
lds r22,h2r_lo
rcall muls16x16_32
sub data1,r17
sbc data2,r18
lds r18,h1r_hi
lds r17,h1r_lo
add data1,r17
adc data2,r18
ldi XH,0
ldi XL,sramstart
add XL,i3
adc XH,nullwert
add XL,i3
adc XH,nullwert
st X+,data1
st X ,data2
; **** Data[i4]:=-h1i+wr*h2i+wi*h2r
movw R21:R20,wrh:wrl
lds r23,h2i_hi
lds r22,h2i_lo
rcall muls16x16_32
mov data2,r18
mov data1,r17
movw R21:R20,wih:wil
lds r23,h2r_hi
lds r22,h2r_lo
rcall muls16x16_32
add data1,r17
adc data2,r18
lds r18,h1i_hi
lds r17,h1i_lo
sub data1,r17
sbc data2,r18
ldi XH,0
ldi XL,sramstart
add XL,i4
adc XH,nullwert
add XL,i4
adc XH,nullwert
st X+,data1
st X ,data2
pop r16
pop r17
pop r18
pop r19
pop r20
pop r21
pop r22
pop r23
ret ;ende der umfangreichen Berechnungen in For_schleife_i
DC_compensation:
lds data1,sramstart+2
lds data2,sramstart+3
lds data3,sramstart+4
lds data4,sramstart+5
movw tmp_hi:tmp_lo,data2:data1
add data1,data3
adc data2,data4
sub tmp_lo,data3
sbc tmp_hi,data4
sts sramstart+2,data1
sts sramstart+3,data2
sts sramstart+4,tmp_lo
sts sramstart+5,tmp_hi
ret
;ende DC-kompensation
sinetab: ;Jede Tabelle 128 Werte = Anzahl Datenpunkte
.dw 0, 13, 25, 38, 50, 62, 74, 86, 98, 109, 121, 132, 142, 152, 162, 172
.dw 181, 190, 198, 206, 213, 220, 226, 231, 237, 241, 245, 248, 251, 253, 255, 256
cosinetab: ;überlappt mit Sinustabelle um Speicherplatz sparen
.dw 256, 256, 255, 253, 251, 248, 245, 241, 237, 231, 226, 220, 213, 206, 198, 190
.dw 181, 172, 162, 152, 142, 132, 121, 109, 98, 86, 74, 62, 50, 38, 25, 13
.dw 0, -13, -25, -38, -50, -62, -74, -86, -98,-109,-121,-132,-142,-152,-162,-172
.dw -181,-190,-198,-206,-213,-220,-226,-231,-237,-241,-245,-248,-251,-253,-255,-256
.dw -256,-256,-255,-253,-251,-248,-245,-241,-237,-231,-226,-220,-213,-206,-198,-190
.dw -181,-172,-162,-152,-142,-132,-121,-109, -98, -86, -74, -62, -50, -38, -25, -13
.dw 0, 13, 25, 38, 50, 62, 74, 86, 98, 109, 121, 132, 142, 152, 162, 172
.dw 181, 190, 198, 206, 213, 220, 226, 231, 237, 241, 245, 248, 251, 253, 255, 256
sinetab_64: ;Jede Tabelle 64 Werte = Anzahl Datenpunkte
.dw 0, 25, 50, 74, 98, 121, 142,162
.dw 181,198,213,226,237,245,251,255
cosinetab_64: ;überlappt mit Sinustabelle um Speicherplatz sparen
.dw 256, 255, 251, 245, 237, 226, 213, 198
.dw 181, 162, 142, 121, 98, 74, 50, 25
.dw 0, -25, -50, -74, -98,-121,-142,-162
.dw -181,-198,-213,-226,-237,-245,-251,-255
.dw -256,-255,-251,-245,-237,-226,-213,-198
.dw -181,-162,-142,-121, -98, -74, -50, -25
.dw 0, 25, 50, 74, 98, 121, 142,162
.dw 181,198,213,226,237,245,251,255
sinetab_32: ;Jede Tabelle 32 Werte = Anzahl Datenpunkte
.dw 0, 50, 98, 142
.dw 181, 213, 237, 251
cosinetab_32: ;überlappt mit Sinustabelle um Speicherplatz sparen
.dw 256, 251, 237, 213
.dw 181, 142, 98, 50
.dw 0, -50, -98,-142
.dw -181,-213,-237,-251
.dw -256,-251,-237,-213
.dw -181,-142, -98, -50
.dw 0, 50, 98, 142
.dw 181, 213, 237, 251
testdata: ;128 Datenpunkte mit 7bit + sign !!
.db 1,2,3,4,5,6,7,8
.db 9,10,11,12,13,14,15,16
.db 17,18,19,20,21,22,23,24
.db 25,25,27,28,29,30,31,32
.db 33,34,35,36,37,38,39,40
.db 41,42,43,44,45,46,47,48
.db 49,50,51,52,53,54,55,56
.db 57,58,59,60,61,62,63,64
.db 65,66,67,68,69,70,71,72
.db 73,74,75,76,77,78,79,80
.db 81,82,83,84,85,86,87,88
.db 89,90,91,92,93,94,95,96
.db 97,98,99,100,101,102,103,104
.db 105,106,107,108,109,110,111,112
.db 113,114,115,116,117,118,119,120
.db 121,122,123,124,125,126,127,0
speicherorte_128_werte:
.db 1,2
.db 65,66
.db 33,34
.db 97,98
.db 17,18
.db 81,82
.db 49,50
.db 113,114
.db 9,10
.db 73,74
.db 41,42
.db 105,106
.db 25,26
.db 89,90
.db 57,58
.db 121,122
.db 5,6
.db 69,70
.db 37,38
.db 101,102
.db 21,22
.db 85,86
.db 53,54
.db 117,118
.db 13,14
.db 77,78
.db 45,46
.db 109,110
.db 29,30
.db 93,94
.db 61,62
.db 125,126
.db 3,4
.db 67,68
.db 35,36
.db 99,100
.db 19,20
.db 83,84
.db 51,52
.db 115,116
.db 11,12
.db 75,76
.db 43,44
.db 107,108
.db 27,28
.db 91,92
.db 59,60
.db 123,124
.db 7,8
.db 71,72
.db 39,40
.db 103,104
.db 23,24
.db 87,88
.db 55,56
.db 119,120
.db 15,16
.db 79,80
.db 47,48
.db 111,112
.db 31,32
.db 95,96
.db 63,64
.db 127,128
speicherorte_64_werte: ;Sortiertabelle für 64 Werte
.db 1, 2, 33, 34
.db 17, 18, 49, 50
.db 9, 10, 41, 42
.db 25, 26, 57, 58
.db 5, 6, 37, 38
.db 21, 22, 53, 54
.db 13, 14, 45, 46
.db 29, 30, 61, 62
.db 3, 4, 35, 36
.db 19, 20, 51, 52
.db 11, 12, 43, 44
.db 27, 28, 59, 60
.db 7, 8, 39, 40
.db 23, 24, 55, 56
.db 15, 16, 47, 48
.db 31, 32, 63, 64
;*********************
speicherorte_32_werte: ;Sortiertabelle für 32 Werte
.db 1, 2, 17, 18
.db 9, 10, 25, 26
.db 5, 6, 21, 22
.db 13, 14, 29, 30
.db 3, 4, 19, 20
.db 11, 12, 27, 28
.db 7, 8, 23, 24
.db 15, 16, 31, 32
;******************************************************************************
;*
;* FUNCTION
;* muls16x16_32
;* DECRIPTION
;* Signed multiply of two 16bits numbers with 32bits result.
;* USAGE
;* r19:r18:r17:r16 = r23:r22 * r21:r20
;* STATISTICS
;* Cycles : 19 + ret
;* Words : 15 + ret
;* Register usage: r0:r1 and r6 and r16 to r23 (11 registers)
;* NOTE
;* The routine is non-destructive to the operands.
;*
;******************************************************************************
muls16x16_32:
; clr r6
muls r23, r21 ; (signed)ah * (signed)bh
movw r19:r18, r1:r0
mul r22, r20 ; al * bl
movw r17:r16, r1:r0
mulsu r23, r20 ; (signed)ah * bl
sbc r19, r6
add r17, r0
adc r18, r1
adc r19, r6
mulsu r21, r22 ; (signed)bh * al
sbc r19, r6
add r17, r0
adc r18, r1
adc r19, r6
ret
;***************************************************************************
;*
;* "div8u" - 8/8 Bit Unsigned Division
;*
;* This subroutine divides the two register variables "dd8u" (dividend) and
;* "dv8u" (divisor). The result is placed in "dres8u" and the remainder in
;* "drem8u".
;*
;* Number of words :66 + return
;* Number of cycles :50/58/66 (Min/Avg/Max) + return
;* Low registers used :1 (drem8u)
;* High registers used :2 (dres8u/dd8u,dv8u)
;*
;***************************************************************************
;***** Subroutine Register Variables
.def drem8u =r15 ;remainder
.def dres8u =r24 ;result
.def dd8u =r24 ;dividend
.def dv8u =r25 ;divisor
;***** Code
div8u: sub drem8u,drem8u ;clear remainder and carry
rol dd8u ;shift left dividend
rol drem8u ;shift dividend into remainder
sub drem8u,dv8u ;remainder = remainder - divisor
brcc d8u_1 ;if result negative
add drem8u,dv8u ; restore remainder
clc ; clear carry to be shifted into result
rjmp d8u_2 ;else
d8u_1: sec ; set carry to be shifted into result
d8u_2: rol dd8u ;shift left dividend
rol drem8u ;shift dividend into remainder
sub drem8u,dv8u ;remainder = remainder - divisor
brcc d8u_3 ;if result negative
add drem8u,dv8u ; restore remainder
clc ; clear carry to be shifted into result
rjmp d8u_4 ;else
d8u_3: sec ; set carry to be shifted into result
d8u_4: rol dd8u ;shift left dividend
rol drem8u ;shift dividend into remainder
sub drem8u,dv8u ;remainder = remainder - divisor
brcc d8u_5 ;if result negative
add drem8u,dv8u ; restore remainder
clc ; clear carry to be shifted into result
rjmp d8u_6 ;else
d8u_5: sec ; set carry to be shifted into result
d8u_6: rol dd8u ;shift left dividend
rol drem8u ;shift dividend into remainder
sub drem8u,dv8u ;remainder = remainder - divisor
brcc d8u_7 ;if result negative
add drem8u,dv8u ; restore remainder
clc ; clear carry to be shifted into result
rjmp d8u_8 ;else
d8u_7: sec ; set carry to be shifted into result
d8u_8: rol dd8u ;shift left dividend
rol drem8u ;shift dividend into remainder
sub drem8u,dv8u ;remainder = remainder - divisor
brcc d8u_9 ;if result negative
add drem8u,dv8u ; restore remainder
clc ; clear carry to be shifted into result
rjmp d8u_10 ;else
d8u_9: sec ; set carry to be shifted into result
d8u_10: rol dd8u ;shift left dividend
rol drem8u ;shift dividend into remainder
sub drem8u,dv8u ;remainder = remainder - divisor
brcc d8u_11 ;if result negative
add drem8u,dv8u ; restore remainder
clc ; clear carry to be shifted into result
rjmp d8u_12 ;else
d8u_11: sec ; set carry to be shifted into result
d8u_12: rol dd8u ;shift left dividend
rol drem8u ;shift dividend into remainder
sub drem8u,dv8u ;remainder = remainder - divisor
brcc d8u_13 ;if result negative
add drem8u,dv8u ; restore remainder
clc ; clear carry to be shifted into result
rjmp d8u_14 ;else
d8u_13: sec ; set carry to be shifted into result
d8u_14: rol dd8u ;shift left dividend
rol drem8u ;shift dividend into remainder
sub drem8u,dv8u ;remainder = remainder - divisor
brcc d8u_15 ;if result negative
add drem8u,dv8u ; restore remainder
clc ; clear carry to be shifted into result
rjmp d8u_16 ;else
d8u_15: sec ; set carry to be shifted into result
d8u_16: rol dd8u ;shift left dividend
ret
|
|