Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
d80d038
Update LLamaModelParams.cs
krisbiradar Sep 4, 2025
da01789
Add Flash Attention and diffusion model support
krisbiradar Sep 11, 2025
53c8c56
Update LLamaSharp.csproj
krisbiradar Sep 13, 2025
20bcf74
Update LLamaSharp.csproj
krisbiradar Sep 13, 2025
48f109a
bug fix: remove flash attention parameter from the model params
krisbiradar Sep 15, 2025
424a736
Fixed some failing tests, it looks like there's a min context size wh…
martindevans Oct 5, 2025
ff6ea95
Fix Reranker and Sampling Test Failures
krisbiradar Oct 13, 2025
5a0e7b8
Mtmd Implementation base
SignalRT Sep 27, 2025
ab0d42c
Update LLama/Native/NativeApi.cs
SignalRT Sep 29, 2025
f591985
Resolve comment: https://github.com/SciSharp/LLamaSharp/pull/1261#dis…
SignalRT Sep 29, 2025
03d4441
Remove duplicate code
SignalRT Oct 5, 2025
ea4ba82
Move common logic to LlamaExecutorBase
SignalRT Oct 5, 2025
385c62a
Rename SafeMtmdWeights
SignalRT Oct 25, 2025
2cdcc5a
Implement SafeHandle
SignalRT Oct 25, 2025
63d8ce4
Add IntPtrExtension
SignalRT Oct 25, 2025
32edd6f
Solve bad DLL naming in Windows with MTMD libraries
Oct 26, 2025
0990be3
Enable FlashAttention and clean up P/Invoke signatures
krisbiradar Oct 29, 2025
d6b10a0
Bump BenchmarkDotNet and BenchmarkDotNet.Diagnostics.Windows
dependabot[bot] Nov 1, 2025
ae845fd
Bump Microsoft.AspNetCore.Mvc.Razor.RuntimeCompilation from 8.0.20 to…
dependabot[bot] Nov 1, 2025
17bd6b8
Bump Microsoft.AspNetCore.OpenApi from 8.0.20 to 8.0.21
dependabot[bot] Nov 1, 2025
7a2dfd9
Bump Microsoft.Bcl.AsyncInterfaces from 9.0.9 to 9.0.10
dependabot[bot] Nov 1, 2025
a2014bb
Merge pull request #1284 from SciSharp/dependabot/nuget/LLama.Benchma…
martindevans Nov 1, 2025
eb0bffe
Merge pull request #1287 from SciSharp/dependabot/nuget/LLama/Microso…
martindevans Nov 1, 2025
78f2848
Bump Microsoft.Extensions.AI.Abstractions from 9.9.1 to 9.10.1
dependabot[bot] Nov 1, 2025
1b780a0
Merge pull request #1288 from SciSharp/dependabot/nuget/LLama/Microso…
martindevans Nov 1, 2025
2a891b9
Merge pull request #1286 from SciSharp/dependabot/nuget/LLama.WebAPI/…
martindevans Nov 1, 2025
8afd3eb
Merge pull request #1285 from SciSharp/dependabot/nuget/LLama.Web/Mic…
martindevans Nov 1, 2025
4355809
Enable FlashAttention and remove SeqMax param
krisbiradar Nov 1, 2025
ba5ca33
Update IContextParamsExtensions.cs
krisbiradar Nov 2, 2025
55a7aeb
fix : check-properties tests
krisbiradar Nov 11, 2025
4bc90f4
Merge pull request #1248 from krisbiradar/add-support-for-gemma-3n
martindevans Nov 13, 2025
070ff33
Mtmd Implementation base
SignalRT Sep 27, 2025
82c039c
Update LLama/Native/NativeApi.cs
SignalRT Sep 29, 2025
83d31f8
Resolve comment: https://github.com/SciSharp/LLamaSharp/pull/1261#dis…
SignalRT Sep 29, 2025
b65a6cf
Remove duplicate code
SignalRT Oct 5, 2025
3e36bb9
Move common logic to LlamaExecutorBase
SignalRT Oct 5, 2025
e58618c
Rename SafeMtmdWeights
SignalRT Oct 25, 2025
7e92b31
Implement SafeHandle
SignalRT Oct 25, 2025
f13f286
Add IntPtrExtension
SignalRT Oct 25, 2025
09fb90d
Solve bad DLL naming in Windows with MTMD libraries
Oct 26, 2025
8858196
Merge branch 'mtmd_implementation' of https://github.com/SignalRT/LLa…
SignalRT Dec 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions LLama.Benchmark/LLama.Benchmark.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.15.4" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.15.4" />
<PackageReference Include="BenchmarkDotNet" Version="0.15.5" />
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.15.5" />
</ItemGroup>

<ItemGroup>
Expand Down
4 changes: 2 additions & 2 deletions LLama.Examples/ExampleRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class ExampleRunner
{ "Chat Session: Automatic conversation", TalkToYourself.Run },
{ "Chat Session: Chinese characters", ChatChineseGB2312.Run },
{ "Executor: Interactive mode chat", InteractiveModeExecute.Run },
{ "Executor: Llava Interactive mode chat", LlavaInteractiveModeExecute.Run },
{ "Executor: Mtmd Interactive mode chat", MtmdInteractiveModeExecute.Run },
{ "Executor: Instruct mode chat", InstructModeExecute.Run },
{ "Executor: Stateless mode chat", StatelessModeExecute.Run },
{ "Save and Load: chat session", SaveAndLoadSession.Run },
Expand All @@ -33,7 +33,7 @@ public class ExampleRunner
{ "Batched Executor: Save/Load", BatchedExecutorSaveAndLoad.Run },
{ "Batched Executor: Fork", BatchedExecutorFork.Run },
{ "Batched Executor: Rewind", BatchedExecutorRewind.Run },
{ "Batched Executor: LLava", BatchedExecutorLLava.Run },
{ "Batched Executor: Mtmd", BatchedExecutorMtmd.Run },
{ "Batched Executor: BoolQ Benchmark", BatchedExecutorBoolQ.Run },
{ "Batched Executor: Beam Search", BatchedExecutorBeamSearch.Run },
{ "Custom Sampling Pipeline", CustomSampler.Run },
Expand Down
91 changes: 0 additions & 91 deletions LLama.Examples/Examples/BatchedExecutorLLava.cs

This file was deleted.

126 changes: 126 additions & 0 deletions LLama.Examples/Examples/BatchedExecutorMtmd.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
using System;
using System.Collections.Generic;
using System.IO;
using LLama.Batched;
using LLama.Common;
using LLama.Exceptions;
using LLama.Native;
using LLama.Sampling;
using Spectre.Console;

namespace LLama.Examples.Examples;

/// <summary>
/// Demonstrates how to evaluate an image with MTMD helpers and continue generation by
/// manually scheduling batches, similar to what the batched executor does internally.
/// </summary>
public class BatchedExecutorMtmd
{
/// <summary>
/// Number of completion tokens to generate after sending the image prompt.
/// </summary>
public const int TokenCount = 10000;

public static async Task Run()
{
// Load the base LLM and its clip/mtmd sidecar weights so the executor has everything it needs.
var parameters = new ModelParams(UserSettings.GetModelPath());
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
var mtmdParams = MtmdContextParams.Default(); // reuse llama.cpp defaults for helper settings
mtmdParams.UseGpu = false;
var marker = mtmdParams.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>";

using var mtmd = await MtmdWeights.LoadFromFileAsync(UserSettings.GetMMProjPath(), model, mtmdParams); // multimodal helper weights

using var executor = new BatchedExecutor(model, parameters, mtmd); // drives batched token + chunk evaluation

// Prepend the media marker so the helper knows where to inject the encoded image tokens.
var defaultPrompt = "\nUSER: Provide a full description of the image.\nASSISTANT: ";
var promptSuffix = AnsiConsole.Ask("Prompt (or ENTER for default):", defaultPrompt);
var promptText = string.Concat(marker, promptSuffix);

var imagePath = UserSettings.GetImagePath();
AnsiConsole.Write(new CanvasImage(imagePath));

var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;

// Simple low-temperature sampler keeps the demo deterministic-ish.
var sampler = new DefaultSamplingPipeline
{
Temperature = 0.1f
};

// Stream decoded text to the console as soon as tokens arrive.
var decoder = new StreamingTokenDecoder(executor.Context)
{
DecodeSpecialTokens = false
};

try
{
// Each conversation tracks its own KV cache sequence IDs.
var conversation = executor.Create();
// enqueue the image so MtmdHelper sees it
conversation.QueueMedia(imagePath);
// schedule multimodal prompt
conversation.Prompt(promptText, addBos: true, special: true);

Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("Prompt queued with multimodal chunks. Generating response...\n");
Console.ResetColor();

var remaining = TokenCount;

// Run one decode/sampling/prompt cycle – mirrors the batched executor inner loop.
async Task<bool> ProcessNextAsync()
{
var decodeResult = await executor.Infer();
if (decodeResult == DecodeResult.NoKvSlot) // KV cache exhausted – surface to the user
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("Insufficient KV cache space for multimodal evaluation.");
Console.ResetColor();
return false;
}

if (decodeResult != DecodeResult.Ok)
throw new RuntimeError($"Failed to evaluate batch: {decodeResult}.");

if (!conversation.RequiresSampling) // another conversation may still be queued
return true;

var token = conversation.Sample(sampler); // pull logits (or -1 for mtmd chunk) and sample
if (token.IsEndOfGeneration(vocab))
return false;

decoder.Add(token);
var delta = decoder.Read();
if (!string.IsNullOrEmpty(delta))
Console.Write(delta);

sampler.Accept(token); // keep sampler state in sync
conversation.Prompt(token); // feed the accepted token back into the batch
remaining--;
return remaining > 0;
}

while (remaining > 0 && await ProcessNextAsync()) // continue until EOS or budget is reached
{
}

Console.WriteLine();
}
catch (IOException ex)
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"Could not load media '{imagePath}': {ex.Message}");
Console.ResetColor();
}
catch (RuntimeError ex)
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine($"MTMD processing failed: {ex.Message}");
Console.ResetColor();
}
}
}
Loading
Loading