Skip to content

clients.openai.audio #

Quick Example: Transcribing Audio


import incubaid.herolib.clients.openai

mut client:= openai.get()! //will be the default client, key is in `AIKEY` on environment variable or `OPENROUTER_API_KEY`

// Assuming you have an audio file named 'audio.mp3' in the same directory
// For a real application, handle file paths dynamically
audio_file_path := 'audio.mp3'

resp := client.audio.create_transcription(
    file: audio_file_path,
    model: 'whisper-1'
)!

fn (OpenAIAlias) create_transcription #

fn (mut f OpenAIAlias) create_transcription(args AudioArgs) !AudioResponse

create transcription from an audio file supported audio formats are mp3, mp4, mpeg, mpga, m4a, wav, or webm

fn (OpenAIAlias) create_tranlation #

fn (mut f OpenAIAlias) create_tranlation(args AudioArgs) !AudioResponse

create translation to english from an audio file supported audio formats are mp3, mp4, mpeg, mpga, m4a, wav, or webm

fn (OpenAIAlias) create_speech #

fn (mut f OpenAIAlias) create_speech(args CreateSpeechArgs) !

enum AudioFormat #

enum AudioFormat {
	mp3
	opus
	aac
	flac
	wav
	pcm
}

enum AudioRespType #

enum AudioRespType {
	json
	text
	srt
	verbose_json
	vtt
}

enum Voice #

enum Voice {
	alloy
	ash
	coral
	echo
	fable
	onyx
	nova
	sage
	shimmer
}

struct AudioArgs #

@[params]
struct AudioArgs {
pub mut:
	filepath        string
	prompt          string
	response_format AudioRespType
	temperature     int
	language        string
}

struct AudioResponse #

struct AudioResponse {
pub mut:
	text string
}

struct CreateSpeechArgs #

@[params]
struct CreateSpeechArgs {
pub:
	model           string = 'tts_1'
	input           string @[required]
	voice           Voice       = .alloy
	response_format AudioFormat = .mp3
	speed           f32         = 1.0
	output_path     string @[required]
}

struct CreateSpeechRequest #

struct CreateSpeechRequest {
pub:
	model           string
	input           string
	voice           string
	response_format string
	speed           f32
}