AI — The definitive, open-source Swift framework for interfacing with generative AI.

Supports macOS, iOS, iPadOS, tvOS, and watchOS with a unified interface for multiple AI providers.

Installation

Swift Package Manager:

Open your Swift project in Xcode
Go to File → Add Package Dependency
Enter the repository URL: https://github.com/PreternaturalAI/AI
Choose the version and click Add Package

Initialize an AI Client

Swift

import AI

// OpenAI / GPT
import OpenAI
let client = OpenAI.Client(apiKey: "YOUR_API_KEY")

// Anthropic / Claude
import Anthropic
let client = Anthropic.Client(apiKey: "YOUR_API_KEY")

// Mistral
import Mistral
let client = Mistral.Client(apiKey: "YOUR_API_KEY")

// Groq
import Groq
let client = Groq.Client(apiKey: "YOUR_API_KEY")

// ElevenLabs
import ElevenLabs
let client = ElevenLabs.Client(apiKey: "YOUR_API_KEY")

LLM Clients Abstraction

Abstract out the LLM Client to allow users to choose between providers:

Swift

import AI
import OpenAI
import Anthropic

// Use any LLMRequestHandling conformant client
let client: any LLMRequestHandling = OpenAI.Client(apiKey: "YOUR_KEY")
// Or switch to:
let client: any LLMRequestHandling = Anthropic.Client(apiKey: "YOUR_KEY")

Supported Models

Swift

// OpenAI GPT Models
let gpt_4o: OpenAI.Model = .gpt_4o
let gpt_4: OpenAI.Model = .gpt_4
let gpt_3_5: OpenAI.Model = .gpt_3_5

// OpenAI Text Embedding Models
let small: OpenAI.Model = .embedding(.text_embedding_3_small)
let large: OpenAI.Model = .embedding(.text_embedding_3_large)

// Anthropic Models
let haiku: Anthropic.Model = .haiku
let sonnet: Anthropic.Model = .sonnet
let opus: Anthropic.Model = .opus

// Mistral Models
let tiny: Mistral.Model = .mistral_tiny
let small: Mistral.Model = .mistral_small
let medium: Mistral.Model = .mistral_medium

// Groq Models
let llama3_8b: Groq.Model = .llama3_8b
let llama3_70b: Groq.Model = .llama3_70b
let mixtral: Groq.Model = .mixtral_8x7b

// ElevenLabs Models
let multiV2: ElevenLabs.Model = .MultilingualV2
let turboV2: ElevenLabs.Model = .TurboV2

Basic Completions

Modern LLMs operate by receiving inputs (messages/prompts) and completing them with the next probable output based on their neural network architectures.

Swift

import AI
import OpenAI

let client: any LLMRequestHandling = OpenAI.Client(apiKey: "YOUR_KEY")

let messages: [AbstractLLM.ChatMessage] = [
    .system("You are an extremely intelligent assistant."),
    .user("What is the meaning of life?")
]

let parameters = AbstractLLM.ChatCompletionParameters(
    tokenLimit: .fixed(200),
    temperatureOrTopP: .temperature(1.2),
    stops: ["END OF CHAPTER"],
    functions: nil
)

let result: String = try await client.complete(
    messages,
    parameters: parameters,
    model: .gpt_4o,
    as: .string
)

Vision: Image-to-Text

LLMs are expanding into multimodal capabilities. With Vision, LLMs can analyze images and provide information about their content.

Swift

let systemPrompt: PromptLiteral = "You are a VisionExpertGPT. List all items in the image and write a poem about each."

let userPrompt: PromptLiteral = "List the items in this image and write a short poem about each."

let imageLiteral = try PromptLiteral(image: imageInput)

let messages: [AbstractLLM.ChatMessage] = [
    .system(systemPrompt),
    .user {
        .concatenate(separator: nil) {
            userPrompt
            imageLiteral
        }
    }
]

let result: String = try await client.complete(
    messages,
    model: .gpt_4o,
    as: .string
)

Function Calling

Function calling allows your app to receive structured JSON responses from an LLM, ensuring consistent data formats. Perfect for apps that need to process AI responses programmatically.

Swift

struct AddScreenshotFunctionParameters: Codable, Hashable, Sendable {
    let title: String
    let summary: String
    let description: String
    let category: String
}

let screenshotFunctionParameterSchema: JSONSchema = try JSONSchema(
    type: AddScreenshotFunctionParameters.self,
    description: "Detailed information about a mobile screenshot.",
    propertyDescriptions: [
        "title": "A concise title (3-5 words)",
        "summary": "A brief one-sentence summary",
        "description": "Comprehensive description with keywords",
        "category": "Single-word tag (music, art, movie, etc.)"
    ],
    required: true
)

let addScreenshotAnalysisFunction = AbstractLLM.ChatFunctionDefinition(
    name: "add_screenshot_analysis_to_db",
    context: "Adds analysis of a mobile screenshot to the database",
    parameters: JSONSchema(
        type: .object,
        description: "Screenshot Analysis",
        properties: ["screenshot_analysis_parameters": screenshotFunctionParameterSchema]
    )
)

let functionCall: AbstractLLM.ChatFunctionCall = try await client.complete(
    messages,
    functions: [addScreenshotAnalysisFunction],
    as: .functionCall
)

struct ScreenshotAnalysisResult: Codable {
    let screenshotAnalysisParameters: AddScreenshotFunctionParameters
}

let result = try functionCall.decode(ScreenshotAnalysisResult.self)

DALLE-3 Image Generation

Generate unique, personalized images for your applications instead of using generic stock images.

Swift

let imagePrompt = "A serene Japanese garden with cherry blossoms..."

let images = try await openAIClient.createImage(
    prompt: imagePrompt,
    quality: .standard,  // or .hd
    size: .w1024h1024,   // 1024x1024, 1792x1024, or 1024x1792
    style: .vivid        // or .natural
)

if let imageURL = images.first?.url {
    return URL(string: imageURL)
}

Audio Transcription: Whisper

Whisper is an ASR system trained on 680,000 hours of audio, excelling at transcribing audio with background noise and varying accents.

Swift

let audioFile = URL(string: "YOUR_AUDIO_FILE_URL_PATH")

// Optional prompt for domain-specific vocabulary
let prompt = "ZyntriQix, Digique Plus, CynapseFive..."

let transcription = try await openAIClient.createTranscription(
    audioFile: audioFile,
    prompt: prompt,
    language: .en,
    temperature: 0,
    timestampGranularities: [.segment, .word]
)

let fullTranscription = transcription.text
let segments = transcription.segments
let words = transcription.words

Audio Generation: OpenAI TTS

Swift

// tts-1: optimized for speed (real-time use cases)
// tts-1-hd: optimized for quality

let textInput = "In a quiet village nestled in a lush valley..."

// Voices: alloy, echo, fable, onyx, nova, shimmer
let speech = try await openAIClient.createSpeech(
    model: .tts_1,
    text: textInput,
    voice: .alloy,
    speed: 1.0  // 0.25 to 4.0
)

let audioData = speech.data

Audio Generation: ElevenLabs

ElevenLabs provides speech generation in hundreds of voices across 29 languages, plus voice cloning with just 1 minute of audio.

Swift

import ElevenLabs

let client = ElevenLabs.Client(apiKey: "YOUR_API_KEY")

let voiceSettings = ElevenLabs.VoiceSettings(
    stability: 0.5,           // 0 (variable) to 1 (stable)
    similarityBoost: 0.75,    // 0 (low) to 1 (high)
    styleExaggeration: 0.0,   // 0 (low) to 1 (high)
    speakerBoost: true
)

let speech = try await client.speech(
    for: textInput,
    voiceID: "4v7HtLWqY9rpQ7Cg2GT4",
    voiceSettings: voiceSettings,
    model: .MultilingualV2
)

Text Embeddings

Text embedding models convert text into numerical vectors for machine calculations. The primary use case is improving search functionality in your application.

Swift

let textInput = "Hello, Text Embeddings!"

let embeddings = try await client.textEmbeddings(
    for: [textInput],
    model: .embedding(.text_embedding_3_small)
)

return embeddings.data.first?.embedding.description

Roadmap

Completed: OpenAI, Anthropic, Mistral, Groq, ElevenLabs
Planned: Ollama, Perplexity

License

This package is licensed under the MIT License.