import type { Transcript } from '#/deepgram/types'
import type Voice from './Voice.js'
import Microphone from './microphone/index.js'
import VoiceError from './error.js'

import type {
  VoiceState,
  VoiceSession,
  CursorPosition,
} from './types.js'


type UseVoiceOptions = {
  getContainerRect: () => DOMRect | null
  isActive?: boolean
  onCursorMove: (position: CursorPosition) => void
  onSpeakingChange?: (isSpeaking: boolean) => void
  onStateChange?: (state: VoiceState) => void
  onTranscript?: (text: string) => void
  url: string
}

const isSupportedPending = Microphone.isSupported()

export default function useVoice(options: UseVoiceOptions) {
  const {
    isActive,
    url,
  } = options
  
  const getContainerRect = useEvent(options.getContainerRect)
  const onCursorMove = useEvent(options.onCursorMove)
  const onSpeakingChange = useEvent(options.onSpeakingChange)
  const onStateChange = useEvent(options.onStateChange)
  const onTranscript = useEvent(options.onTranscript)
  const [isSupported, setSupported] = useState(null as boolean | null)
  const [hasAccess, setHasAccess] = useState(null as boolean | null)

  useEffect(() => {
    isSupportedPending
      .then(setSupported)
      .catch(() => setSupported(false))
  }, [])

  useEffect(() => {
    let fn: null | (() => void) = null

    Microphone
      .monitorAccess(setHasAccess)
      .then(fn_ => fn = fn_)
      .catch(() => setHasAccess(false))

    return () => {
      fn?.()
      fn = null
    }
  }, [])


  const api = useMemo(() => {
    let initializer: Promise<Voice> | null = null
    let session: VoiceSession | null = null
    let currentTranscript: Transcript | null = null
    const transcripts: Transcript[] = []


    const handle = <A extends unknown[], R>(fn: (session: VoiceSession, ...args: A) => R) => (...args: A) => {
      if (session) {
        fn(session, ...args)
      }
    }


    const position = handle(({ pointer, selection }) => {
      try {
        selection.collapseToEnd()
      } catch (e) {}
      
      const child_ = pointer.getClientRects()
      const child = child_[0]
      const parent = getContainerRect()

      if (child && parent) {
        onCursorMove({
          top: child.top - parent.top,
          left: child.right - parent.left,
        })
      }
    })


    const run = async (fn: (voice: Voice) => Promise<boolean> | void) => {
      const voice = await initializer

      return voice ?
        (await fn(voice)) ?? false :
        false
    }

    const stop = async () => {
      if (session) {
        return run(s => s.stop())
      }

      onStateChange('inactive')
      return false
    }

    const events = {
      error: (error: unknown) => {
        if (error instanceof VoiceError) {
          if (error.cause === 'microphone') {
            setHasAccess(false)
          }

        } else {
          dx.capture(error)
        }

        void stop()
      },

      finalize: () => {
        position()
        onStateChange('finalizing')
      },

      ready: () => onStateChange('ready'),
      silent: () => onSpeakingChange(false),
      speaking: () => onSpeakingChange(true),

      start() {
        transcripts.length = 0
        currentTranscript = null
        const selection = window.getSelection()
        
        if (!selection || selection.rangeCount === 0) {
          return
        }

        const node = document.createElement('span')
        const pointer = document.createElement('span')
        const range = selection.getRangeAt(0)
        range.deleteContents()
        range.collapse(false)
        range.insertNode(pointer)
        range.insertNode(node)

        session = {
          range,
          selection,
          node,
          pointer,
        }

        position()
        onStateChange('active')
      },

      // TODO: this needs to be standalone
      stop: handle(({ node, pointer }) => {
        position()

        const text = `${transcripts.mapBy('text').join(' ')} `
        const replacement = document.createTextNode(text)
        node.parentNode!.insertBefore(replacement, node)
        node.remove()
        pointer.remove()
        session = null
        currentTranscript = null
        transcripts.length = 0

        onTranscript(text)
        onStateChange('inactive')
      }),

      transcript: handle((s, transcript: Transcript) => {
        if (!currentTranscript) {
          currentTranscript = transcript
          transcripts.push(currentTranscript)
        }

        currentTranscript.text = transcript.text
        const text = `${transcripts.mapBy('text').join(' ')} `
        s.node.innerText = text

        if (transcript.isCheckpoint) {
          currentTranscript = null
        }

        onTranscript(text)
        position()
      }),
    }

     const initialize = async () => {
      onStateChange('initializing')
      const { default: Voice } = await import('./Voice.js')
      const voice = new Voice({ url })
      voice.bind(events)
      return voice
    }

    const start = async () => {
      if (!isSupported || !hasAccess) {
        return false
      }

      try {
        initializer ??= initialize()
        return await run(s => s?.start())

      } catch (e) {
        initializer = null
        throw e
      }
    }

    const toggle = async (nextActive?: boolean) => {
      if (session && !nextActive)  {
        return stop()
      }

      if (!session && nextActive) {
        return start()
      }

      return false
    }
    
    const destroy = () => run(s => s.destroy())


    return {
      start,
      stop,
      toggle,
      destroy,
      isSupported,
      hasAccess,
    }
  }, [
    getContainerRect,
    onCursorMove,
    onSpeakingChange,
    onStateChange,
    onTranscript,
    isSupported,
    hasAccess,
    url,
  ])

  useEffect(() => () => {
    void api.destroy()
  }, [api])


  useEffect(() => {
    void api?.toggle(isActive)
  }, [api, isActive])
  
  return api
}
