import torch
import soundfile as sf
from qwen_tts import Qwen3TTSModel
model = Qwen3TTSModel.from_pretrained(
"Qwen/Qwen3-TTS-12Hz-1.7B-Base",
device_map="cuda:0",
dtype=torch.bfloat16,
)
ref_audio = "https://raw.githubusercontent.com/simonguest/CS-394/refs/heads/main/src/08/code/voice.wav"
ref_text = "This is kind of nice. Er. It's a kinda nice overview. I can go into. These are the rows of data that I've uploaded here. I can go into this, er."
wavs, sr = model.generate_voice_clone(
text="Just to confirm. There's no more work to do this course, no more assignments. And everyone is getting an A.",
language="English",
ref_audio=ref_audio,
ref_text=ref_text,
)
sf.write("clone.wav", wavs[0], sr)