"""Erzeugt Trainings-Audio für das 'Plappi'-Wake-Word.
Positive = 'Plappi' in vielen Stimmen + Augmentierung (Pitch/Tempo).
Negative = verwechselbare + zufällige Wörter (gegen Fehlalarme).
"""
import asyncio, os, subprocess, sys
import edge_tts

POS_DIR = "wakeword/data/pos"
NEG_DIR = "wakeword/data/neg"
os.makedirs(POS_DIR, exist_ok=True)
os.makedirs(NEG_DIR, exist_ok=True)

VOICES = ["de-DE-KatjaNeural", "de-DE-ConradNeural", "de-DE-AmalaNeural", "de-DE-KillianNeural",
          "de-AT-IngridNeural", "de-AT-JonasNeural", "de-CH-LeniNeural", "de-CH-JanNeural"]
POS_TEXTS = ["Plappi", "Plappi.", "Hey Plappi", "Plappi!", "Plappi Plappi"]
# verwechselbar (hard negatives) + allgemein
NEG_TEXTS = ["Pappi", "Klappe", "Lappen", "Plapper", "Pappa", "Papa", "Lampe", "Klappi", "Blappi",
             "Krabbi", "Happy", "Apfel", "Banane", "Hund", "Katze", "Auto", "Bagger", "Hallo",
             "Danke", "ich mag dich", "wir spielen", "komm her", "schau mal", "guten Morgen",
             "gute Nacht", "eins zwei drei", "erzähl eine Geschichte", "lass uns spielen"]


async def synth(text, voice, path):
    await edge_tts.Communicate(text, voice).save(path)


def aug(mp3, wav, semitones=0, tempo=1.0):
    af = []
    if semitones:
        r = 2 ** (semitones / 12.0)
        af.append(f"asetrate=16000*{r:.4f},aresample=16000")
    if tempo != 1.0:
        af.append(f"atempo={tempo:.2f}")
    af.append("apad=pad_dur=0.3")
    subprocess.run(["ffmpeg", "-nostdin", "-y", "-i", mp3, "-af", ",".join(af),
                    "-ar", "16000", "-ac", "1", wav], capture_output=True)


async def main():
    i = 0
    for v in VOICES:
        for t in POS_TEXTS:
            mp3 = f"/tmp/p_{i}.mp3"
            try:
                await synth(t, v, mp3)
            except Exception as e:
                print("skip pos", e, file=sys.stderr); continue
            for st in (-2, 0, 2):
                for tp in (0.9, 1.0, 1.1):
                    aug(mp3, f"{POS_DIR}/p_{i}_{st}_{int(tp*10)}.wav", st, tp)
            i += 1
    print("POS clips:", len(os.listdir(POS_DIR)), flush=True)
    i = 0
    for v in VOICES:
        for t in NEG_TEXTS:
            mp3 = f"/tmp/n_{i}.mp3"
            try:
                await synth(t, v, mp3)
            except Exception as e:
                print("skip neg", e, file=sys.stderr); continue
            for st in (0, 2):
                aug(mp3, f"{NEG_DIR}/n_{i}_{st}.wav", st, 1.0)
            i += 1
    print("NEG clips:", len(os.listdir(NEG_DIR)), flush=True)
    print("DONE gen_samples", flush=True)


asyncio.run(main())