The definitive, open-source Swift framework for interfacing with generative AI. Supports OpenAI, Anthropic, Mistral, Groq, and ElevenLabs.
AI — The definitive, open-source Swift framework for interfacing with generative AI.
Supports macOS, iOS, iPadOS, tvOS, and watchOS with a unified interface for multiple AI providers.
Swift Package Manager:
import AI
// OpenAI / GPT
import OpenAI
let client = OpenAI.Client(apiKey: "YOUR_API_KEY")
// Anthropic / Claude
import Anthropic
let client = Anthropic.Client(apiKey: "YOUR_API_KEY")
// Mistral
import Mistral
let client = Mistral.Client(apiKey: "YOUR_API_KEY")
// Groq
import Groq
let client = Groq.Client(apiKey: "YOUR_API_KEY")
// ElevenLabs
import ElevenLabs
let client = ElevenLabs.Client(apiKey: "YOUR_API_KEY")Abstract out the LLM Client to allow users to choose between providers:
import AI
import OpenAI
import Anthropic
// Use any LLMRequestHandling conformant client
let client: any LLMRequestHandling = OpenAI.Client(apiKey: "YOUR_KEY")
// Or switch to:
let client: any LLMRequestHandling = Anthropic.Client(apiKey: "YOUR_KEY")// OpenAI GPT Models
let gpt_4o: OpenAI.Model = .gpt_4o
let gpt_4: OpenAI.Model = .gpt_4
let gpt_3_5: OpenAI.Model = .gpt_3_5
// OpenAI Text Embedding Models
let small: OpenAI.Model = .embedding(.text_embedding_3_small)
let large: OpenAI.Model = .embedding(.text_embedding_3_large)
// Anthropic Models
let haiku: Anthropic.Model = .haiku
let sonnet: Anthropic.Model = .sonnet
let opus: Anthropic.Model = .opus
// Mistral Models
let tiny: Mistral.Model = .mistral_tiny
let small: Mistral.Model = .mistral_small
let medium: Mistral.Model = .mistral_medium
// Groq Models
let llama3_8b: Groq.Model = .llama3_8b
let llama3_70b: Groq.Model = .llama3_70b
let mixtral: Groq.Model = .mixtral_8x7b
// ElevenLabs Models
let multiV2: ElevenLabs.Model = .MultilingualV2
let turboV2: ElevenLabs.Model = .TurboV2Modern LLMs operate by receiving inputs (messages/prompts) and completing them with the next probable output based on their neural network architectures.
import AI
import OpenAI
let client: any LLMRequestHandling = OpenAI.Client(apiKey: "YOUR_KEY")
let messages: [AbstractLLM.ChatMessage] = [
.system("You are an extremely intelligent assistant."),
.user("What is the meaning of life?")
]
let parameters = AbstractLLM.ChatCompletionParameters(
tokenLimit: .fixed(200),
temperatureOrTopP: .temperature(1.2),
stops: ["END OF CHAPTER"],
functions: nil
)
let result: String = try await client.complete(
messages,
parameters: parameters,
model: .gpt_4o,
as: .string
)LLMs are expanding into multimodal capabilities. With Vision, LLMs can analyze images and provide information about their content.
let systemPrompt: PromptLiteral = "You are a VisionExpertGPT. List all items in the image and write a poem about each."
let userPrompt: PromptLiteral = "List the items in this image and write a short poem about each."
let imageLiteral = try PromptLiteral(image: imageInput)
let messages: [AbstractLLM.ChatMessage] = [
.system(systemPrompt),
.user {
.concatenate(separator: nil) {
userPrompt
imageLiteral
}
}
]
let result: String = try await client.complete(
messages,
model: .gpt_4o,
as: .string
)Function calling allows your app to receive structured JSON responses from an LLM, ensuring consistent data formats. Perfect for apps that need to process AI responses programmatically.
struct AddScreenshotFunctionParameters: Codable, Hashable, Sendable {
let title: String
let summary: String
let description: String
let category: String
}
let screenshotFunctionParameterSchema: JSONSchema = try JSONSchema(
type: AddScreenshotFunctionParameters.self,
description: "Detailed information about a mobile screenshot.",
propertyDescriptions: [
"title": "A concise title (3-5 words)",
"summary": "A brief one-sentence summary",
"description": "Comprehensive description with keywords",
"category": "Single-word tag (music, art, movie, etc.)"
],
required: true
)
let addScreenshotAnalysisFunction = AbstractLLM.ChatFunctionDefinition(
name: "add_screenshot_analysis_to_db",
context: "Adds analysis of a mobile screenshot to the database",
parameters: JSONSchema(
type: .object,
description: "Screenshot Analysis",
properties: ["screenshot_analysis_parameters": screenshotFunctionParameterSchema]
)
)
let functionCall: AbstractLLM.ChatFunctionCall = try await client.complete(
messages,
functions: [addScreenshotAnalysisFunction],
as: .functionCall
)
struct ScreenshotAnalysisResult: Codable {
let screenshotAnalysisParameters: AddScreenshotFunctionParameters
}
let result = try functionCall.decode(ScreenshotAnalysisResult.self)Generate unique, personalized images for your applications instead of using generic stock images.
let imagePrompt = "A serene Japanese garden with cherry blossoms..."
let images = try await openAIClient.createImage(
prompt: imagePrompt,
quality: .standard, // or .hd
size: .w1024h1024, // 1024x1024, 1792x1024, or 1024x1792
style: .vivid // or .natural
)
if let imageURL = images.first?.url {
return URL(string: imageURL)
}Whisper is an ASR system trained on 680,000 hours of audio, excelling at transcribing audio with background noise and varying accents.
let audioFile = URL(string: "YOUR_AUDIO_FILE_URL_PATH")
// Optional prompt for domain-specific vocabulary
let prompt = "ZyntriQix, Digique Plus, CynapseFive..."
let transcription = try await openAIClient.createTranscription(
audioFile: audioFile,
prompt: prompt,
language: .en,
temperature: 0,
timestampGranularities: [.segment, .word]
)
let fullTranscription = transcription.text
let segments = transcription.segments
let words = transcription.words// tts-1: optimized for speed (real-time use cases)
// tts-1-hd: optimized for quality
let textInput = "In a quiet village nestled in a lush valley..."
// Voices: alloy, echo, fable, onyx, nova, shimmer
let speech = try await openAIClient.createSpeech(
model: .tts_1,
text: textInput,
voice: .alloy,
speed: 1.0 // 0.25 to 4.0
)
let audioData = speech.dataElevenLabs provides speech generation in hundreds of voices across 29 languages, plus voice cloning with just 1 minute of audio.
import ElevenLabs
let client = ElevenLabs.Client(apiKey: "YOUR_API_KEY")
let voiceSettings = ElevenLabs.VoiceSettings(
stability: 0.5, // 0 (variable) to 1 (stable)
similarityBoost: 0.75, // 0 (low) to 1 (high)
styleExaggeration: 0.0, // 0 (low) to 1 (high)
speakerBoost: true
)
let speech = try await client.speech(
for: textInput,
voiceID: "4v7HtLWqY9rpQ7Cg2GT4",
voiceSettings: voiceSettings,
model: .MultilingualV2
)Text embedding models convert text into numerical vectors for machine calculations. The primary use case is improving search functionality in your application.
let textInput = "Hello, Text Embeddings!"
let embeddings = try await client.textEmbeddings(
for: [textInput],
model: .embedding(.text_embedding_3_small)
)
return embeddings.data.first?.embedding.descriptionThis package is licensed under the MIT License.