Vlnky by Řrřola [web]
; ~. ~.~. ~. ~.~. ~.~. ~. Vlnky
; ~. ~.~. ~~.~.~.~. ~.~. 256b intro
; ~.~. ~. ~.~~.~~.~. ~. by Rrrola
; ~. ~~~.~. ~.~. ~.~. rrrola@gmail.com
; greets to everyone who disassembles sourceless intros with delight
; use the keyboard to control speed and pattern parameters
; use NASM to compile
%define abs(x) bp-14h+x ; stack
%define adr(x) byte bp+si-116h+x ; constants in the code
org 100h ; assume ah=ch=bx=0 dx=cs si=0100 sp=di=FFFE
jmp short S
;Subroutine: fs:bx = address of cos(st0), st0 = [fs:bx]
fmul dword[adr(F2600_9)]
fistp dword[si] ; "dword" because we want mod 65536
imul bx,[si],byte 4 ;-) overwrites dithering buffer
fld dword[fs:bx]
ret
;Initialize mode 13h and segments (with intertwined constants)
S mov al,13h
W104 equ $
W160 equ $+2
push 0A000h ;-) pop later so that sp = -4
add dh,al
cmpsw ; si = 102h, di = 0
mov ds,dx ;<-[bp+si] ; ds = dithering buffer = cs+1300h
int 10h
F2600_9 equ $ ; about 16384/(2*PI) ~ 2607.6
xchg ax,bp
mov fs,[bp+si] ; fs = costable = [bp+si] = 8EA7h
inc bp ; bp = 0014h
;Precompute costable (16384 floats)
fninit
C mov [si],di
fild word[si]
fdiv dword[adr(F2600_9)]
fcos
fstp dword[fs:bx] ; [fs:bx] = cos(bx/4 * about 2*PI/16384)
inc di ;-) the inaccuracy makes it more interesting
sub bx,sp ; bx += 4
jnz C
pop es ; es = vram = A000h
;Main loop
;Store frame coefficients to [FF00..FFFC] (1st pass: [4000..FFFC])
M add al,15
push bp
F add [abs(-4)],ax ; [ss:-4] = 20 + 1..64*(key+15)
fild dword[bp+si] ; {T}
fidiv word[abs(-4)] ; {T / [ss:-4]}
call si
fstp dword[di] ; store T1..T64 = cos(T / [ss:-4])
sub di,sp ; di += 4
jnz F
pop ax ; di = 0, fs:bx = address of T64
;Wait for vertical retrace start (70 fps cap)
mov dx,3DAh
W in al,dx
test al,8
jz W
;Set palette: 4+4 bits, slowly changing opponent colors
pop ax ; ax = sp = 0
mov dl,0C8h
out dx,al
inc dx ; ax = ...xxxx....yyyy.
P or ax,1110000111100001b
mov [si],ax
fild word[si]
dec si
fisub word[si] ; {(X = xx:yy) - (Y = yy:??)}
fmul dword[fs:bx+di]; cos(q = T/(980+64*key) + rgb*2*PI/3)
fiadd word[si]
fiadd word[si+1] ; {color = X+Y + (X-Y)*cos(q)}
fistp word[si-1]
outsb ; out 3C9h,color>>8
add di,5558h
adc ax,sp ; ax:di += 5558h: cycle rgb, loop y and x
jnz P
;Loop initialization
xchg ax,di ; di = adr = 0, ah:al = prevErrors = 0800h
F5_837 equ $
mov cl,204 ; dx = X, cx = Y (ch is always 0)
Y mov dx,320
X pusha ;[ss:-2 -4 -6 -8 -A -C -E -10h]
I fild word[abs(-6)] ; err Y X - 0 20 258 adr; sp = -16
xor bp,byte 2
fisub word[adr(W104)]; p1:[-6]-[W160]=X-160 p2:[-4]-[W104]=Y-104
fdiv dword[byte ss:si-102h+F5_837]
jpo I
fldl2t ; {w=3.322 u=(Y-104)/5.837 v=(X-160)/5.837}
mov al,[di]
aam 10h ; ah:al = cummulative errors
;Sum 5 plasma iterations
mov cl,5
L fsub st2
W_23 equ $+1
fsubr st1
call si ; {W=cos(u+v-w) u v}
fstp st3 ; cycle {W u v} -> {u v W}
mov di,0FF34h ; di = 34 24 14 | 38 28 18 | 3C
K fld st2
fmul dword[di]
add di,sp ; V = W*cos(T6) + u*cos(T10) + v*cos(T14)
jpe K ; U = v*cos(T7) + W*cos(T11) + u*cos(T15)
faddp st1
fsubrp st1 ; p1:{V W u v} p2:{U v W u V}
fld st0
call si
fimul word[adr(W_23)]
fistp word[si] ; [si] = 23*cos(V) [si] = 23*cos(U)
sub al,[si] ; color += [si] COLOR += [si]
xchg al,ah
fxch st3 ; {v W u V} {u v W U V}
add di,byte 13*4
jpo K
fcompp ; {W U V}
loop L
fninit ; {}
;Compute final colors, draw tinted bands
add [abs(-2)],ax ; pushed ax = prevErr += cummErr + plasma
popa
add ax,118*101h ; center colors
cmp bp,dx ; if (X <= 20) color = 0
sbb bl,bl
cmp bp,cx ; if (Y <= 20) COLOR = 0
sbb bh,bh
and ax,bx ; ax = CCCCIJKLccccijkl
; cmp cx,byte 5 ; alternate version with no bands:
; cmc ; if (cx < 5) color = COLOR = 0
; sbb bx,bx
; and ax,bx ; ax = CCCCIJKLccccijkl
;Combine color planes, distribute error diffusion
; Frame coefficients get mangled in invisible bottom rows, but never
; reach "slow" values (like subnormals or NaNs) thanks to the horz band
mov bx,di ;-) for compact addressing
rol al,4
rol ax,4 ; IJKLijklccccCCCC
stosb
mov al,ah ; IJKLijklIJKLijkl
shr ax,1
shr ah,1 ; ..IJ..ij.IJK.ijk
and ax,0011001101110111b
mov [bx],al ; Sierra's "Filter Lite" error diffusion
add [bx+si-102h+320],ah ; (+) E/2
add [bx+si-102h+319],ah ; E/4 E/4 + E/2 to next frame
aam 10h ; .....IJK.....ijk
dec dx
jnz X
loop Y ; di = 204*320 = FF00h
push cx ; return 0 to stack
;Advance time, keycheck and exit
add dword[bp+si],byte 24 ;<-slow write (destroys code cache)
in al,60h ;-) ah=0 from the bands
dec ax
jnz M
ret
[ back to the prod ]
