;----------------------------------------------------------------------------
; Copyright (C) 2006 Arturo Ragozini and Daniel Vik
;
;  This software is provided 'as-is', without any express or implied
;  warranty.  In no event will the authors be held liable for any damages
;  arising from the use of this software.
;
;  Permission is granted to anyone to use this software for any purpose,
;  including commercial applications, and to alter it and redistribute it
;  freely, subject to the following restrictions:
;
;  1. The origin of this software must not be misrepresented; you must not
;     claim that you wrote the original software. If you use this software
;     in a product, an acknowledgment in the product documentation would be
;     appreciated but is not required.
;  2. Altered source versions must be plainly marked as such, and must not be
;     misrepresented as being the original software.
;  3. This notice may not be removed or altered from any source distribution.
;----------------------------------------------------------------------------
; 
; 2018 a.d., an attempt to 'port' this RLE AY PLAYER to ZX Spectrum environment.
; the original developers seem to forgot to mention which software they grant ;-)
; by Chris Born, Rotterdam
; timings are different and the ports used by ZX are different + adressing. thats PROBABLY all.
; zx nop= 4t, msx nop= 5t
; ZX has ONLY 1 CPU SPEED AVAILABLE, although 48k and 128k differ SLIGHTLY, like (100% /350000) *3546900 = 101.34 %

; MSX uses the fast 'out ($a0),a' and 'out($a1),a)' while 
; zx insists on two 16bits ports: 0xfffd (set ay_port) and 0xbffd (write ay_port) used in 'out (c),a' using BC_reg as PORT, which insist of rewriting the value of B_reg TWICE per 'out' command (95% off the routine's, but 100% in this routine)!!!!!

; 1 tstate = 1/3,500,000 = 0.000,000,286 second on 48k spectrum, 1/3.500000=0.285714286 >>0.000000285714286
; 1 tstate = 1/3,546,900 = 0.000,000,282 second on 128k spectrum 1/3.546900=0.281936339 >>0.000000281936339

; average sample on ZX seems 112.67 tstate, in this routine, with 'full assemble'
; 0.286 *  112.67 =  32.22362 millisecond per sample on 48k spectrum
; 3500000/112.67 = 31064.169699121 = 31.064 khz 

; 0.282 *  112.67 =  31.77294 millisecond per sample on 128k spectrum
; 3546900/112.67 = 31480.42957309  = 31.480 khz

;https://www.msx.org/news/software/en/pcm-encoder
;                           CPU frequency
;   replayed frequency  =  ---------------
;                          dt1 + dt2 + dt3
; pcmenc should use the following command line arguments:
;
;    pcmenc -dt1 A_dt1 -dt2 A_dt2 -dt3 A_dt3 file.wav

;----------------
; this is the 4 or 3 bit rle + 4 bit sample in 1 byte version  %1111 + %1111
;----------------
;4bit rle can hold 16 periods off timing, but is 7t slower per sample
;3bit rle can hold  8 periods off timing, bit 4 is kept on 0 to avoid switching to 'enveloppe' state off AY_channels

; decicive markers
rl4    equ 1   ; if 1 then use 4 bit,%1111, and take 7t extra for every sample,  instead of 3 bit,%1110 which does NOT need the 'and 0x0f' since bit4 is not used at all

delay  equ 1   ; if 1 build in delay sequence , takes 15 tstate extra per sample trio and stretches +13t per extra raise off B_reg
               ; but its a rude way with this RLE type, sounds may be twisted.

key    equ 1   ; if 1 build in 'anykeytrap' , takes 32 tstates extra per sample trio.



;in assembly variables
time_1 equ 15 * delay     ; 7+8 minimum extra
time_2 equ 32 * key       ; 11+7+7+7 extra

A_dt1  equ time_A         ; in the symboltable 'A_' comes alphabeticly first.
A_dt2  equ time_B
A_dt3  equ time_C + time_1 + time_2 + time_Work

tstate equ A_dt1 + A_dt2 + A_dt3


zx128hz equ 3546900   ;0x361F14 =24 bits

wavehz equ   cpuhz / tstate

	org	8000h ; zx uncontended 'timing safe zone' starts at 32768 and has 16k length, 'highmem'

;-------------------------------------
; Entry point
;-------------------------------------       
START:
        di
        exx
        push hl   ; save alt_hl for ZX BASIC.
        call RESET_PSG
          inc a   ; make a=7
          ld d,a  ; start AY_reg
          ld e,63 ; value set mixer to %00.111.111  , 
        call xov1 ; set mixer channel in AY, C_reg still 0xfd

        ld  hl, SAMPLE_START + 2
        ld  de, (SAMPLE_START)

        call PLAY_SAMPLE
          
        pop hl  ; restore
        exx     ; alt hl in place again


;        ret
;block player        ELSE
;.REWIND:
;        xor a
;.LOOP:
;        inc a
;        ld  (6800h),a
;        ld  hl, SAMPLE_START + 2
;        ld  de, (SAMPLE_START)
;        ex   af,af'
;        call PLAY_SAMPLE
;        ex   af,af'
;        cp  (SAMPLE_END - SAMPLE_START + #1FFF)/#2000
;        jr  nz,.LOOP
;        jr  .REWIND
;        ret
;        ENDIF       


;-------------------------------------
; Resets the PSG
;-------------------------------------
          ei       ; did i forget about it ??? weird
silence   ld a,13  ; end   AY_reg ??will it work??
          jr d255
RESET_PSG:
;silence   ld hl,silen+13  ; set Ay registers all to 0 for real silence
;hl points to 14 byte buffer at the BEGIN of the last compressed data block

vox       ld a,6          ; end reg PORT
d255	  ld d,255        ; start reg PORT-1
          ld e,0          ; VALUE to set

xov                       ; set AY channels to preset Formant voice
;	  ld e,(hl)
;	  inc hl
          inc d           ; so start with 255 for 0
xov1	  ld bc,$fffd
	  out (c),d       ;
	  ld b,$bf
	  out (c),e
          cp d            ;
	  jr nz,xov       ; if a <> d then next AY_register
          ret             ; return to basic or not

;silen     defb 0,0,0,0,0,0,0,0,0,0,0,0,0,0  ; set all AY channels to zero 'silence'
;AYreg	  defb 0,0,0,0,0,0,0,63,0,0,0,0,0,0 

;-------------------------------------
; Plays one sample block
; IN   HL - Encoded sample start address
;      DE - Sample length (#pcm samples)
;-------------------------------------
PLAY_SAMPLE:
        push    hl      ;st1
;        ld      h,d
;        ld      l,e     ; hl=counter
        ld      c,$fd   ; lsb port adres
        ex      de,hl
        ld      de,$ffbf; msb port adres  d,open port, or e, write port
        exx             ; alt_reg
        pop     hl      ;st0
        ld      c,$00
        ld      de,$0000

PsgLoop:
; Calculate channel A volume
        ld      a,c         ; 4

if (rl4 = 1)
        sub     $10         ; 7
else    
        sub     $20         ; 7
endif

        jr      nc,PsgWaitA ; 7t +5t
                            ;=4+7+7=18t
	ld a,8              ; 7t
        exx                 ; 4t  nor_reg
	ld b,d              ; 4t
	out (c),a           ; 12t       $fffd,a
        exx                 ; 4t  alt_reg
        ld      a,(hl)      ; 7
        inc     hl          ; 6
        ld      c,a         ; 4

if (rl4 = 1)
        and $0f             ; 7t
endif

        exx                 ; 4t  nor_reg
	ld b,e              ; 4t
	out (c),a           ; 12t   $bffd,a
        exx                 ; 4t  alt_reg
                            ;=7+4+4+12+4+7+6+4(+7)+4+4+12+4=79
                            ; 18+79=97 state per sample !!!! ( TWICE the MSX duration!!!)
time_A equ 7+4+4+12+4+7+6+4+(7*rl4)+4+4+12+4+18
PsgDoneA:

; Calculate channel B volume
        ld      a,d         ; 

if (rl4 = 1)
        sub     $10         ; 7
else    
        sub     $20         ; 7
endif

        jr      nc,PsgWaitB ; /
        ld      a,9         ; 
        exx                 ; nor_reg
	ld      b,d         ; 
	out     (c),a       ;        $fffd,a
        exx                 ; alt_reg
        ld      a,(hl)      ; 
        inc     hl          ; 
        ld      d,a         ;

if (rl4 = 1)
        and $0f             ; 7t
endif

        exx                 ; nor_reg
	ld      b,e         ; 
	out     (c),a       ;   =  $bffd,a
        exx                 ; alt_reg
time_B equ 7+4+4+12+4+7+6+4+(7*rl4)+4+4+12+4+18
PsgDoneB:

; Calculate channel C volume
        ld      a,e         ; 

if (rl4 = 1)
        sub     $10         ; 7
else    
        sub     $20         ; 7
endif

        jr      nc,PsgWaitC ; /
        ld      a,10        ; 
        exx                 ; nor_reg
	ld      b,d         ; t
	out     (c),a       ; t       $fffd,a
        exx                 ; alt_reg
        ld      a,(hl)      ; 
        inc     hl          ; 
        ld      e,a         ; 

if (rl4 = 1)
        and $0f             ; 7t
endif

        exx                 ; nor_reg
	ld      b,e         ; t
	out     (c),a       ; t  =  $bffd,a
        exx                 ; alt_reg
time_C equ 7+4+4+12+4+7+6+4+(7*rl4)+4+4+12+4+18
PsgDoneC:


;its a rude way off delay, like a stick in a wheel.
if (delay = 1)
bb1     equ     $+1
        ld      b,1         ; 7t
bb      djnz    bb          ; 1*13-5=8 t  minimum delayloop=7+8=15 tstate (2+13!) any raise off b gains 13 tstate off delay extra.
                            ; 0=256 >> 2+13*b , a 15 to 3330 range in adding tstates
endif


if (key = 1)
; test for ANY key input
          in    a,($fe)   ;11t port $00fe read keyboard CONTENDED !!!! 11 or 24 tstate!!
          and   $1F       ; 7t filter ANY key touch with bin 11111
          cp    $1F       ; 7t 
          jr   nz,ter     ; 7t
endif

; Decrement length and return if zero
        exx                 ;  4   nor_reg
        dec     hl          ;  4
        ld a,l              ;  4
        or h                ;  4
        exx                 ;  4   alt_reg
        jp      nz,PsgLoop  ; 12   97+97+97+(7+8)+(11+7+7+7)+4+4+4+4+4+12=  370 , 370/3 =123.333333333 average
ter     ret                 ; ...
time_Work equ 4+4+4+4+4+12
        
PsgWaitA:                   ;+5t by jump  > 72-5-4-12= 51 tstate more needed in compensation-delay
        ld      c,a         ; 4t store RLE value 
if (rl4 = 1)
        ld a,(bc)           ; 7t
endif
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,i              ; 9t  (7*rl4)+7+7+7+7+7+7 +9 =58
        jr      PsgDoneA    ; 12   ->5+4+58+12= 79 including branch
        
PsgWaitB:
        ld      d,a         ; 
if (rl4 = 1)
        ld a,(bc)           ; 7t
endif
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,i              ; 9t  
        jr      PsgDoneB    ;    ->  including branch
        
PsgWaitC:
        ld      e,a         ; 
if (rl4 = 1)
        ld a,(bc)           ; 7t
endif
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,(bc)           ; 7t
        ld a,i              ; 9t
        jr      PsgDoneC    ;

; original, not used
;-------------------------------------
; Padding for rom player
;-------------------------------------
;        if (ExeType != 0)
;        DS (#6000 - $)
;        endif

;-------------------------------------
; Sample data
;-------------------------------------
SAMPLE_START equ $
;        incbin "plop.bin"
;SAMPLE_END:

; original, not used
;-------------------------------------
; Padding, align rom image to a power of two.
;-------------------------------------
;        IF (ExeType != 0)
;        
;SAMPLE_LENGTH   equ SAMPLE_END - SAMPLE_START

;        IF (SAMPLE_LENGTH <= #6000)
;        DS (#6000 - SAMPLE_LENGTH)
;        ELSE
;        IF (SAMPLE_LENGTH <= #E000)
;        DS (#E000 - SAMPLE_LENGTH)
;        ELSE
;        IF (SAMPLE_LENGTH <= #1E000)
;        DS (#1E000 - SAMPLE_LENGTH)
;        ELSE
;        IF (SAMPLE_LENGTH <= #3E000)
;        DS (#3E000 - SAMPLE_LENGTH)
;        ELSE
;        IF (SAMPLE_LENGTH <= #7E000)
;        DS (#7E000 - SAMPLE_LENGTH)
;        ELSE
;        DS (#FE000 - SAMPLE_LENGTH)
;        ENDIF
;        ENDIF
;        ENDIF
;        ENDIF
;        ENDIF
        
;        ENDIF

;FINISH:

the      end
